/// <summary> /// Construct the input history for the custom fitness functions. For /// regression models, pass the entire input data set it, for time series /// models, have to reconstruct the original input series. /// </summary> /// <param name="TrainingData"></param> private void PrepareFitnessInputHistory(GPTrainingData TrainingData) { if (TrainingData.TimeSeriesSource) { // // Have to create a single column of data, an exact copy of the // time series data itself. This turns out to be the number of rows // PLUS the remaining data in the last row. m_UserInputHistoryCustomFitness = new List <List <double> >(TrainingData.Rows + TrainingData.Columns - 1); for (int Row = 0; Row < TrainingData.Rows; Row++) { m_UserInputHistoryCustomFitness.Add(new List <double>(1)); m_UserInputHistoryCustomFitness[Row].Add(TrainingData[Row, 0]); } // // Now, grab the remaing data items in the last row for (int Column = 1; Column < TrainingData.Columns; Column++) { m_UserInputHistoryCustomFitness.Add(new List <double>(1)); m_UserInputHistoryCustomFitness[TrainingData.Rows + Column - 1].Add(TrainingData[TrainingData.Rows - 1, Column]); } } else { m_UserInputHistoryCustomFitness = m_UserInputHistory[m_UserInputHistory.Count - 1]; } }
/// <summary> /// Converts a time series training data set into something that looks /// like a normal data set with some number of inputs. /// </summary> /// <returns>Read the summary section</returns> private GPTrainingData ConvertTSToModeling(int InputDimension, int PredictionDistance) { GPTrainingData TSTraining = new GPTrainingData(); TSTraining.TimeSeriesSource = true; // // The number of rows is the number of rows in the Training // set minus the input dimension and the prediction distance. We subtract this off // because we can't predict anything before the first 'n' input values. int Rows = Training.Rows - InputDimension - PredictionDistance + 1; TSTraining.ConstructStorage(Rows, InputDimension, 1); // // Now, put together the rows of input data and the objective for each // of these rows. for (int Row = 0; Row < Rows; Row++) { // // Set the inputs for (int Column = 0; Column < InputDimension; Column++) { TSTraining[Row, Column] = Training[Row + Column, 0]; } // // Set the objective TSTraining.ObjectiveRow(Row)[0] = Training[Row + InputDimension + PredictionDistance - 1, 0]; } return(TSTraining); }
/// <summary> /// The GPProgram object needs to work with a generic list of historical /// inputs...the reason is discussed elsewhere, but it basically has to do /// because of the final source code generation and how that required custom /// user defined functions to be written. /// /// The first row contains the inputs for the current prediction. In other words, /// the first prediction has a history of the inputs for itself. /// /// TODO: This is kind of an ugly wart because we are keeping around two copies /// of the same data. Once in the Training data (where it should be) and one /// time here. At some point, this needs to be dealt with and only one copy /// of the data should be kept around. /// </summary> /// <param name="Training"></param> private void TransformHistoricalInputs(GPTrainingData Training) { m_UserInputHistory = new List <List <List <double> > >(Training.HistoricalDataSets.Length); // // Start by creating a reference to each full historical set foreach (double[][] Set in Training.HistoricalDataSets) { List <List <double> > TSet = new List <List <double> >(Set.Length); m_UserInputHistory.Add(TSet); } // // Next, create the rows for the sets. We reuse the rows from previous // sets into the later sets to save on memory space...in a huge way. for (int Row = 0; Row < Training.HistoricalDataSets[Training.HistoricalDataSets.Length - 1].Length; Row++) { // // Create each row and add it to all the historical sets as needed List <double> TRow = new List <double>(Training.HistoricalDataSets[Training.HistoricalDataSets.Length - 1][Row].Length); foreach (double Item in Training.HistoricalDataSets[Training.HistoricalDataSets.Length - 1][Row]) { TRow.Add(Item); } // // Go through the sets and add this row accordingly for (int Set = Row; Set < m_UserInputHistory.Count; Set++) { m_UserInputHistory[Set].Add(TRow); } } }
/// <summary> /// This method instructs the server object to clean up as much memory /// as possible. /// </summary> public void ForceCleanup() { m_Population = null; m_Fitness.TerminateProcessingThreads(); m_Fitness = null; m_Training = null; System.GC.Collect(); }
/// <summary> /// Computes the maximum possible error for the training data. A factor /// of 10 is used on the max error, because the reality is that programs can /// actually create error larger than the "max error" because they might return /// funky constants. /// </summary> /// <param name="Training">Training data</param> /// <returns>Maximum error</returns> private double ComputeMaximumError(GPTrainingData Training) { // // Compute the maximum possible error double MaximumError = 0.0; for (int Value = 0; Value < Training.Rows; Value++) { MaximumError += Math.Abs(Training.ObjectiveRow(Value)[0]); } return(MaximumError * 10.0); }
/// <summary> /// Compute the average, min and max value of the training data /// </summary> /// <param name="Training">Training data</param> private void ComputeTrainingStats(GPTrainingData Training) { m_TrainingMax = Training.ObjectiveRow(0)[0]; m_TrainingMin = Training.ObjectiveRow(0)[0]; double Total = 0.0; for (int Value = 0; Value < Training.Rows; Value++) { Total += Training.ObjectiveRow(Value)[0]; m_TrainingMax = Math.Max(m_TrainingMax, Training.ObjectiveRow(Value)[0]); m_TrainingMin = Math.Min(m_TrainingMin, Training.ObjectiveRow(Value)[0]); } m_TrainingAverage = Total / Training.Rows; }
/// <summary> /// Compute the average, min and max value of the training data /// </summary> /// <param name="Training">Training data</param> private void ComputeTrainingStats(GPTrainingData Training, ref double Average, ref double Min, ref double Max) { Max = Training.ObjectiveRow(0)[0]; Min = Training.ObjectiveRow(0)[0]; double Total = 0.0; for (int Value = 0; Value < Training.Rows; Value++) { Total += Training.ObjectiveRow(Value)[0]; Max = Math.Max(Max, Training.ObjectiveRow(Value)[0]); Min = Math.Min(Min, Training.ObjectiveRow(Value)[0]); } Average = Total / Training.Rows; }
/// <summary> /// Default constructor - Prepare the memory for storing results. /// </summary> /// <param name="Config">Modeling configuration</param> /// <param name="TrainingData">Reference to the training data</param> /// <param name="Tolerance">Allowable tolerance around a resulting value for exact matching</param> /// <param name="UseInputHistory">True, if the InputHistory parameter is in use</param> public GPFitness(GPModelerServer Config, GPTrainingData TrainingData, double Tolerance, bool UseInputHistory) { m_Config = Config; m_TrainingData = TrainingData; m_Tolerance = Tolerance; m_UseInputHistory = UseInputHistory; // // Create the contained program fitness selection object if (Config.Profile.SPEA2MultiObjective) { m_FitnessSelection = new GPFitnessSPEA2(Config.Profile.PopulationSize); } else { m_FitnessSelection = new GPFitnessSingle(Config.Profile.PopulationSize); } // // Given the training data, compute the maximum possible error, we need // this for the adaptive parsimony pressure. m_MaximumError = ComputeMaximumError(TrainingData); ComputeTrainingStats(TrainingData); // // Create room for the fitness measures InitializeStorage(Config.Profile.PopulationSize); m_PrevPopulationSize = Config.Profile.PopulationSize; // // Have to convert the training data version of the historical inputs // into the form that programs utilize. TransformHistoricalInputs(TrainingData); // // The input history for the custom fitness functions is a little // different than UDFs because there is no time step during fitness computation, // so need to do a little dance to handle that. PrepareFitnessInputHistory(TrainingData); // // Create the processing threads, one for each processor InitializeProcessingThreads(Environment.ProcessorCount); }
/// <summary> /// Construct the input history for the custom fitness functions. For /// regression models, pass the entire input data set it, for time series /// models, have to reconstruct the original input series. /// </summary> /// <param name="Training"></param> private void PrepareInputHistory(GPTrainingData Training, ref List <List <double> > InputHistory) { if (Training.TimeSeriesSource) { // // Have to create a single column of data, an exact copy of the // time series data itself. This turns out to be the number of rows // PLUS the remaining data in the last row. InputHistory = new List <List <double> >(Training.Rows + Training.Columns - 1); for (int Row = 0; Row < Training.Rows; Row++) { InputHistory.Add(new List <double>(1)); InputHistory[Row].Add(Training[Row, 0]); } // // Now, grab the remaing data items in the last row for (int Column = 1; Column < Training.Columns; Column++) { InputHistory.Add(new List <double>(1)); InputHistory[Training.Rows + Column - 1].Add(Training[Training.Rows - 1, Column]); } } else { // // Create a data set of just the inputs, leaving out the prediction column InputHistory = new List <List <double> >(Training.Rows); for (int Row = 0; Row < Training.Rows; Row++) { InputHistory.Add(new List <double>(Training.Columns)); for (int Column = 0; Column < Training.Columns; Column++) { InputHistory[Row].Add(Training.InputData[Row][Column]); } } } }
/// <summary> /// Default constructor, create the contained GPTrainingData object /// </summary> public GPModelingData() { // // Create the contained GPTrainingData object m_Training = new GPTrainingData(); }
/// <summary> /// Checks to see if the conditions for terminating the modeling run are met /// </summary> /// <returns></returns> private bool IsModelRunDone(double BestFitness, int BestHits, int Generation, GPTrainingData Training) { // // See if number of generations has been exhausted if ((Generation + 1) >= m_Profile.m_maxNumber && m_Profile.m_useMaxNumber) { return(true); } // // See if the number of hits has been maximized if (m_Profile.m_useHitsMaxed && BestHits == Training.Rows) { return(true); } // // See if the fitness has geen minimized - Remember to use // tolerance to test this. if (m_Profile.m_useRawFitness0) { // // Test with tolerance if (BestFitness < GPEnums.RESULTS_TOLERANCE && BestFitness > -GPEnums.RESULTS_TOLERANCE) { return(true); } } // // Return true if the user has aborted, otherwise, let's keep going return(m_AbortSession); }