/// <summary> /// The default implementation of a judgement if the current readout unit is better than for now the best readout unit /// </summary> /// <param name="taskType">Type of the task</param> /// <param name="current">Current readout unit</param> /// <param name="best">For now the best readout unit</param> public static bool IsBetter(CommonEnums.TaskType taskType, ReadoutUnit current, ReadoutUnit best) { switch (taskType) { case CommonEnums.TaskType.Classification: if (current.CombinedBinaryError < best.CombinedBinaryError) { return(true); } else if (current.CombinedBinaryError == best.CombinedBinaryError) { if (current.TestingBinErrorStat.TotalErrStat.Sum < best.TestingBinErrorStat.TotalErrStat.Sum) { return(true); } else if (current.TrainingBinErrorStat.TotalErrStat.Sum < best.TrainingBinErrorStat.TotalErrStat.Sum) { return(true); } else if (current.CombinedPrecisionError < best.CombinedPrecisionError) { return(true); } } break; default: //Prediction or hybrid task type return(current.CombinedPrecisionError < best.CombinedPrecisionError); } return(false); }
/// <summary> /// The deep copy constructor. /// </summary> /// <param name="source">Source instance</param> public ReadoutUnit(ReadoutUnit source) { Network = null; if (source.Network != null) { Network = source.Network.DeepClone(); } TrainingErrorStat = null; if (source.TrainingErrorStat != null) { TrainingErrorStat = new BasicStat(source.TrainingErrorStat); } TrainingBinErrorStat = null; if (source.TrainingBinErrorStat != null) { TrainingBinErrorStat = new BinErrStat(source.TrainingBinErrorStat); } TestingErrorStat = null; if (source.TestingErrorStat != null) { TestingErrorStat = new BasicStat(source.TestingErrorStat); } TestingBinErrorStat = null; if (source.TestingBinErrorStat != null) { TestingBinErrorStat = new BinErrStat(source.TestingBinErrorStat); } OutputWeightsStat = null; if (source.OutputWeightsStat != null) { OutputWeightsStat = new BasicStat(source.OutputWeightsStat); } CombinedPrecisionError = source.CombinedPrecisionError; CombinedBinaryError = source.CombinedBinaryError; return; }
/// <summary> /// Prepares trained readout unit for specified output field and task. /// </summary> /// <param name="taskType">Type of the task</param> /// <param name="readoutUnitIdx">Index of the readout unit (informative only)</param> /// <param name="outputFieldName">Name of the corresponding output field (informative only)</param> /// <param name="foldNum">Current fold number</param> /// <param name="numOfFolds">Total number of the folds</param> /// <param name="refBinDistr">Reference bin distribution (if task type is Classification)</param> /// <param name="trainingPredictorsCollection">Collection of the predictors for training</param> /// <param name="trainingIdealOutputsCollection">Collection of ideal outputs for training. Note that the double array always has only one member.</param> /// <param name="testingPredictorsCollection">Collection of the predictors for testing</param> /// <param name="testingIdealOutputsCollection">Collection of ideal outputs for testing. Note that the double array always has only one member.</param> /// <param name="rand">Random object to be used</param> /// <param name="readoutUnitSettings">Readout unit configuration parameters</param> /// <param name="controller">Regression controller</param> /// <param name="controllerUserObject">An user object to be passed to controller</param> /// <returns>Prepared readout unit</returns> public static ReadoutUnit CreateTrained(CommonEnums.TaskType taskType, int readoutUnitIdx, string outputFieldName, int foldNum, int numOfFolds, BinDistribution refBinDistr, List <double[]> trainingPredictorsCollection, List <double[]> trainingIdealOutputsCollection, List <double[]> testingPredictorsCollection, List <double[]> testingIdealOutputsCollection, System.Random rand, ReadoutLayerSettings.ReadoutUnitSettings readoutUnitSettings, RegressionCallbackDelegate controller = null, Object controllerUserObject = null ) { ReadoutUnit bestReadoutUnit = new ReadoutUnit(); //Regression attempts bool stopRegression = false; for (int regrAttemptNumber = 1; regrAttemptNumber <= readoutUnitSettings.RegressionAttempts; regrAttemptNumber++) { //Create network and trainer INonRecurrentNetwork net; INonRecurrentNetworkTrainer trainer; CreateNetAndTreainer(readoutUnitSettings, trainingPredictorsCollection, trainingIdealOutputsCollection, rand, out net, out trainer ); //Reference binary distribution //Iterate training cycles for (int epoch = 1; epoch <= readoutUnitSettings.RegressionAttemptEpochs; epoch++) { trainer.Iteration(); List <double[]> trainingComputedOutputsCollection = null; List <double[]> testingComputedOutputsCollection = null; //Compute current error statistics after training iteration ReadoutUnit currReadoutUnit = new ReadoutUnit(); currReadoutUnit.Network = net; currReadoutUnit.TrainingErrorStat = net.ComputeBatchErrorStat(trainingPredictorsCollection, trainingIdealOutputsCollection, out trainingComputedOutputsCollection); if (taskType == CommonEnums.TaskType.Classification) { currReadoutUnit.TrainingBinErrorStat = new BinErrStat(refBinDistr, trainingComputedOutputsCollection, trainingIdealOutputsCollection); currReadoutUnit.CombinedBinaryError = currReadoutUnit.TrainingBinErrorStat.TotalErrStat.Sum; //currReadoutUnit.CombinedBinaryError = currReadoutUnit.TrainingBinErrorStat.ProportionalErr; } currReadoutUnit.CombinedPrecisionError = currReadoutUnit.TrainingErrorStat.ArithAvg; if (testingPredictorsCollection != null && testingPredictorsCollection.Count > 0) { currReadoutUnit.TestingErrorStat = net.ComputeBatchErrorStat(testingPredictorsCollection, testingIdealOutputsCollection, out testingComputedOutputsCollection); currReadoutUnit.CombinedPrecisionError = Math.Max(currReadoutUnit.CombinedPrecisionError, currReadoutUnit.TestingErrorStat.ArithAvg); if (taskType == CommonEnums.TaskType.Classification) { currReadoutUnit.TestingBinErrorStat = new BinErrStat(refBinDistr, testingComputedOutputsCollection, testingIdealOutputsCollection); currReadoutUnit.CombinedBinaryError = Math.Max(currReadoutUnit.CombinedBinaryError, currReadoutUnit.TestingBinErrorStat.TotalErrStat.Sum); //currReadoutUnit.CombinedBinaryError = Math.Max(currReadoutUnit.CombinedBinaryError, currReadoutUnit.TestingBinErrorStat.ProportionalErr); } } //Current results processing bool better = false, stopTrainingCycle = false; //Result first initialization if (bestReadoutUnit.CombinedPrecisionError == -1) { //Adopt current regression results bestReadoutUnit = currReadoutUnit.DeepClone(); } //Perform call back if it is defined RegressionControlOutArgs cbOut = null; if (controller != null) { //Evaluation of the improvement is driven externaly RegressionControlInArgs cbIn = new RegressionControlInArgs(); cbIn.TaskType = taskType; cbIn.ReadoutUnitIdx = readoutUnitIdx; cbIn.OutputFieldName = outputFieldName; cbIn.FoldNum = foldNum; cbIn.NumOfFolds = numOfFolds; cbIn.RegrAttemptNumber = regrAttemptNumber; cbIn.RegrMaxAttempts = readoutUnitSettings.RegressionAttempts; cbIn.Epoch = epoch; cbIn.MaxEpochs = readoutUnitSettings.RegressionAttemptEpochs; cbIn.TrainingPredictorsCollection = trainingPredictorsCollection; cbIn.TrainingIdealOutputsCollection = trainingIdealOutputsCollection; cbIn.TrainingComputedOutputsCollection = trainingComputedOutputsCollection; cbIn.TestingPredictorsCollection = testingPredictorsCollection; cbIn.TestingIdealOutputsCollection = testingIdealOutputsCollection; cbIn.TestingComputedOutputsCollection = testingComputedOutputsCollection; cbIn.CurrReadoutUnit = currReadoutUnit; cbIn.BestReadoutUnit = bestReadoutUnit; cbIn.UserObject = controllerUserObject; cbOut = controller(cbIn); better = cbOut.CurrentIsBetter; stopTrainingCycle = cbOut.StopCurrentAttempt; stopRegression = cbOut.StopRegression; } else { //Default implementation better = IsBetter(taskType, currReadoutUnit, bestReadoutUnit); } //Best? if (better) { //Adopt current regression results bestReadoutUnit = currReadoutUnit.DeepClone(); } //Training stop conditions if (stopTrainingCycle || stopRegression) { break; } } //Regression stop conditions if (stopRegression) { break; } } //Create statistics of the best network weights bestReadoutUnit.OutputWeightsStat = bestReadoutUnit.Network.ComputeWeightsStat(); return(bestReadoutUnit); }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="predictorsCollection">Collection of all available predictors</param> /// <param name="idealOutputsCollection">Collection of all available desired outputs related to predictors</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> public ValidationBundle Build(List <double[]> predictorsCollection, List <double[]> idealOutputsCollection, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData ) { //Allocation of computed vectors for validation bundle List <double[]> computedVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { computedVectorCollection.Add(new double[idealOutputsCollection[0].Length]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(_rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.OutputFieldNameCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_taskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_taskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <TimeSeriesBundle> subBundleCollection = null; //Datasets preparation is depending on the task type if (_taskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassification(shuffledData.InputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Prediction or Hybrid task subBundleCollection = DivideSamplesForPredictionOrHybrid(shuffledData.InputVectorCollection, idealValueCollection, testDataSetLength ); } //Readout units in the cluster for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression for the single readout unit _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_taskType, clusterIdx, _settings.OutputFieldNameCollection[clusterIdx], foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, _rand, _settings.ReadoutUnitCfg, regressionController, regressionControllerData ); }//foldIdx //Cluster error statistics & data for validation bundle ClusterErrStatistics ces = new ClusterErrStatistics(_taskType, numOfFolds, refBinDistr); for (int sampleIdx = 0; sampleIdx < idealOutputsCollection.Count; sampleIdx++) { double value = Compute(predictorsCollection[sampleIdx], clusterIdx); ces.Update(value, idealOutputsCollection[sampleIdx][clusterIdx]); computedVectorCollection[sampleIdx][clusterIdx] = value; } _clusterErrStatisticsCollection.Add(ces); }//clusterIdx return(new ValidationBundle(computedVectorCollection, idealOutputsCollection)); }