Beispiel #1
0
        /// <summary>
        /// The default implementation of a judgement if the current readout unit is better than for now the best readout unit
        /// </summary>
        /// <param name="taskType">Type of the task</param>
        /// <param name="current">Current readout unit</param>
        /// <param name="best">For now the best readout unit</param>
        public static bool IsBetter(CommonEnums.TaskType taskType, ReadoutUnit current, ReadoutUnit best)
        {
            switch (taskType)
            {
            case CommonEnums.TaskType.Classification:
                if (current.CombinedBinaryError < best.CombinedBinaryError)
                {
                    return(true);
                }
                else if (current.CombinedBinaryError == best.CombinedBinaryError)
                {
                    if (current.TestingBinErrorStat.TotalErrStat.Sum < best.TestingBinErrorStat.TotalErrStat.Sum)
                    {
                        return(true);
                    }
                    else if (current.TrainingBinErrorStat.TotalErrStat.Sum < best.TrainingBinErrorStat.TotalErrStat.Sum)
                    {
                        return(true);
                    }
                    else if (current.CombinedPrecisionError < best.CombinedPrecisionError)
                    {
                        return(true);
                    }
                }
                return(false);

            default:
                //Forecast task type
                return(current.CombinedPrecisionError < best.CombinedPrecisionError);
            }
        }
Beispiel #2
0
 /// <summary>
 /// Resets the readout layer to its initial untrained state.
 /// </summary>
 public void Reset()
 {
     _predictorFeatureFilterCollection = null;
     _outputFeatureFilterCollection    = null;
     _predictorsMapper      = null;
     _readoutUnitCollection = new ReadoutUnit[ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count];
     for (int i = 0; i < ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; i++)
     {
         ReadoutUnitSettings cfg = ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[i];
         _readoutUnitCollection[i] = new ReadoutUnit(i, cfg, ReadoutLayerCfg.TaskDefaultsCfg);
     }
     _oneTakesAllGroupCollection = null;
     if (ReadoutLayerCfg.OneTakesAllGroupsCfg != null)
     {
         _oneTakesAllGroupCollection = new OneTakesAllGroup[ReadoutLayerCfg.OneTakesAllGroupsCfg.OneTakesAllGroupCfgCollection.Count];
         for (int i = 0; i < ReadoutLayerCfg.OneTakesAllGroupsCfg.OneTakesAllGroupCfgCollection.Count; i++)
         {
             OneTakesAllGroupSettings cfg = ReadoutLayerCfg.OneTakesAllGroupsCfg.OneTakesAllGroupCfgCollection[i];
             _oneTakesAllGroupCollection[i] = new OneTakesAllGroup(i, cfg, ReadoutLayerCfg.GetOneTakesAllGroupMemberRUnitIndexes(cfg.Name));
         }
     }
     Trained = false;
     ResetProgressTracking();
     return;
 }
Beispiel #3
0
        //Static methods
        /// <summary>
        /// Builds report string containing information about the regression progress.
        /// It is usually called from the RegressionControl user implementation.
        /// </summary>
        /// <param name="inArgs">>Contains all the necessary information to control the regression.</param>
        /// <param name="bestReadoutUnit">Currently the best readout unit.</param>
        /// <param name="margin">Specifies how many spaces to be at the begining of the row.</param>
        /// <returns>Built text report</returns>
        public static string GetProgressReport(ReadoutUnit.RegressionControlInArgs inArgs,
                                               ReadoutUnit bestReadoutUnit,
                                               int margin = 0
                                               )
        {
            //Build progress text message
            StringBuilder progressText = new StringBuilder();

            progressText.Append(new string(' ', margin));
            progressText.Append("OutputField: ");
            progressText.Append(inArgs.OutputFieldName);
            progressText.Append(", Fold/Attempt/Epoch: ");
            progressText.Append(inArgs.FoldNum.ToString().PadLeft(inArgs.NumOfFolds.ToString().Length, '0') + "/");
            progressText.Append(inArgs.RegrAttemptNumber.ToString().PadLeft(inArgs.RegrMaxAttempts.ToString().Length, '0') + "/");
            progressText.Append(inArgs.Epoch.ToString().PadLeft(inArgs.MaxEpochs.ToString().Length, '0'));
            progressText.Append(", DSet-Sizes: (");
            progressText.Append(inArgs.CurrReadoutUnit.TrainingErrorStat.NumOfSamples.ToString() + ", ");
            progressText.Append(inArgs.CurrReadoutUnit.TestingErrorStat.NumOfSamples.ToString() + ")");
            progressText.Append(", Best-Train: ");
            progressText.Append(bestReadoutUnit.TrainingErrorStat.ArithAvg.ToString("E3", CultureInfo.InvariantCulture));
            if (inArgs.TaskType == CommonEnums.TaskType.Classification)
            {
                //Append binary errors
                progressText.Append("/" + bestReadoutUnit.TrainingBinErrorStat.TotalErrStat.Sum.ToString(CultureInfo.InvariantCulture));
                progressText.Append("/" + bestReadoutUnit.TrainingBinErrorStat.BinValErrStat[1].Sum.ToString(CultureInfo.InvariantCulture));
            }
            progressText.Append(", Best-Test: ");
            progressText.Append(bestReadoutUnit.TestingErrorStat.ArithAvg.ToString("E3", CultureInfo.InvariantCulture));
            if (inArgs.TaskType == CommonEnums.TaskType.Classification)
            {
                //Append binary errors
                progressText.Append("/" + bestReadoutUnit.TestingBinErrorStat.TotalErrStat.Sum.ToString(CultureInfo.InvariantCulture));
                progressText.Append("/" + bestReadoutUnit.TestingBinErrorStat.BinValErrStat[1].Sum.ToString(CultureInfo.InvariantCulture));
            }
            progressText.Append(", Curr-Train: ");
            progressText.Append(inArgs.CurrReadoutUnit.TrainingErrorStat.ArithAvg.ToString("E3", CultureInfo.InvariantCulture));
            if (inArgs.TaskType == CommonEnums.TaskType.Classification)
            {
                //Append binary errors
                progressText.Append("/" + inArgs.CurrReadoutUnit.TrainingBinErrorStat.TotalErrStat.Sum.ToString(CultureInfo.InvariantCulture));
                progressText.Append("/" + inArgs.CurrReadoutUnit.TrainingBinErrorStat.BinValErrStat[1].Sum.ToString(CultureInfo.InvariantCulture));
            }
            progressText.Append(", Curr-Test: ");
            progressText.Append(inArgs.CurrReadoutUnit.TestingErrorStat.ArithAvg.ToString("E3", CultureInfo.InvariantCulture));
            if (inArgs.TaskType == CommonEnums.TaskType.Classification)
            {
                //Append binary errors
                progressText.Append("/" + inArgs.CurrReadoutUnit.TestingBinErrorStat.TotalErrStat.Sum.ToString(CultureInfo.InvariantCulture));
                progressText.Append("/" + inArgs.CurrReadoutUnit.TestingBinErrorStat.BinValErrStat[1].Sum.ToString(CultureInfo.InvariantCulture));
            }
            progressText.Append($" [{bestReadoutUnit.TrainerInfoMessage}]");
            return(progressText.ToString());
        }
Beispiel #4
0
 /// <summary>
 /// The deep copy constructor.
 /// </summary>
 /// <param name="source">Source instance</param>
 public ReadoutUnit(ReadoutUnit source)
 {
     Network = null;
     if (source.Network != null)
     {
         Network = source.Network.DeepClone();
     }
     TrainerInfoMessage = source.TrainerInfoMessage;
     TrainingErrorStat  = null;
     if (source.TrainingErrorStat != null)
     {
         TrainingErrorStat = new BasicStat(source.TrainingErrorStat);
     }
     TrainingBinErrorStat = null;
     if (source.TrainingBinErrorStat != null)
     {
         TrainingBinErrorStat = new BinErrStat(source.TrainingBinErrorStat);
     }
     TestingErrorStat = null;
     if (source.TestingErrorStat != null)
     {
         TestingErrorStat = new BasicStat(source.TestingErrorStat);
     }
     TestingBinErrorStat = null;
     if (source.TestingBinErrorStat != null)
     {
         TestingBinErrorStat = new BinErrStat(source.TestingBinErrorStat);
     }
     OutputWeightsStat = null;
     if (source.OutputWeightsStat != null)
     {
         OutputWeightsStat = new BasicStat(source.OutputWeightsStat);
     }
     CombinedPrecisionError = source.CombinedPrecisionError;
     CombinedBinaryError    = source.CombinedBinaryError;
     return;
 }
Beispiel #5
0
        /// <summary>
        /// Prepares trained readout unit for specified output field and task.
        /// </summary>
        /// <param name="taskType">Type of the task</param>
        /// <param name="readoutUnitIdx">Index of the readout unit (informative only)</param>
        /// <param name="foldNum">Current fold number</param>
        /// <param name="numOfFolds">Total number of the folds</param>
        /// <param name="refBinDistr">Reference bin distribution (if task type is Classification)</param>
        /// <param name="trainingPredictorsCollection">Collection of the predictors for training</param>
        /// <param name="trainingIdealOutputsCollection">Collection of ideal outputs for training. Note that the double array always has only one member.</param>
        /// <param name="testingPredictorsCollection">Collection of the predictors for testing</param>
        /// <param name="testingIdealOutputsCollection">Collection of ideal outputs for testing. Note that the double array always has only one member.</param>
        /// <param name="rand">Random object to be used</param>
        /// <param name="readoutUnitSettings">Readout unit configuration parameters</param>
        /// <param name="controller">Regression controller</param>
        /// <param name="controllerUserObject">An user object to be passed to controller</param>
        /// <returns>Prepared readout unit</returns>
        public static ReadoutUnit CreateTrained(CommonEnums.TaskType taskType,
                                                int readoutUnitIdx,
                                                int foldNum,
                                                int numOfFolds,
                                                BinDistribution refBinDistr,
                                                List <double[]> trainingPredictorsCollection,
                                                List <double[]> trainingIdealOutputsCollection,
                                                List <double[]> testingPredictorsCollection,
                                                List <double[]> testingIdealOutputsCollection,
                                                Random rand,
                                                ReadoutLayerSettings.ReadoutUnitSettings readoutUnitSettings,
                                                RegressionCallbackDelegate controller = null,
                                                Object controllerUserObject           = null
                                                )
        {
            ReadoutUnit bestReadoutUnit = null;
            //Regression attempts
            bool stopRegression = false;

            for (int regrAttemptNumber = 1; regrAttemptNumber <= readoutUnitSettings.RegressionAttempts; regrAttemptNumber++)
            {
                //Create network and trainer
                CreateNetAndTreainer(readoutUnitSettings,
                                     trainingPredictorsCollection,
                                     trainingIdealOutputsCollection,
                                     rand,
                                     out INonRecurrentNetwork net,
                                     out INonRecurrentNetworkTrainer trainer
                                     );
                //Reference binary distribution
                //Iterate training cycles
                for (int epoch = 1; epoch <= readoutUnitSettings.RegressionAttemptEpochs; epoch++)
                {
                    trainer.Iteration();
                    List <double[]> testingComputedOutputsCollection = null;
                    //Compute current error statistics after training iteration
                    ReadoutUnit currReadoutUnit = new ReadoutUnit();
                    currReadoutUnit.Network           = net;
                    currReadoutUnit.TrainingErrorStat = net.ComputeBatchErrorStat(trainingPredictorsCollection, trainingIdealOutputsCollection, out List <double[]> trainingComputedOutputsCollection);
                    if (taskType == CommonEnums.TaskType.Classification)
                    {
                        currReadoutUnit.TrainingBinErrorStat = new BinErrStat(refBinDistr, trainingComputedOutputsCollection, trainingIdealOutputsCollection);
                        currReadoutUnit.CombinedBinaryError  = currReadoutUnit.TrainingBinErrorStat.TotalErrStat.Sum;
                        //currReadoutUnit.CombinedBinaryError = currReadoutUnit.TrainingBinErrorStat.ProportionalErr;
                    }
                    currReadoutUnit.CombinedPrecisionError = currReadoutUnit.TrainingErrorStat.ArithAvg;
                    if (testingPredictorsCollection != null && testingPredictorsCollection.Count > 0)
                    {
                        currReadoutUnit.TestingErrorStat       = net.ComputeBatchErrorStat(testingPredictorsCollection, testingIdealOutputsCollection, out testingComputedOutputsCollection);
                        currReadoutUnit.CombinedPrecisionError = Math.Max(currReadoutUnit.CombinedPrecisionError, currReadoutUnit.TestingErrorStat.ArithAvg);
                        if (taskType == CommonEnums.TaskType.Classification)
                        {
                            currReadoutUnit.TestingBinErrorStat = new BinErrStat(refBinDistr, testingComputedOutputsCollection, testingIdealOutputsCollection);
                            currReadoutUnit.CombinedBinaryError = Math.Max(currReadoutUnit.CombinedBinaryError, currReadoutUnit.TestingBinErrorStat.TotalErrStat.Sum);
                            //currReadoutUnit.CombinedBinaryError = Math.Max(currReadoutUnit.CombinedBinaryError, currReadoutUnit.TestingBinErrorStat.ProportionalErr);
                        }
                    }
                    //Current results processing
                    bool better = false, stopTrainingCycle = false;
                    //Result first initialization
                    if (bestReadoutUnit == null)
                    {
                        //Adopt current regression results
                        bestReadoutUnit = currReadoutUnit.DeepClone();
                    }
                    //Perform call back if it is defined
                    if (controller != null)
                    {
                        //Evaluation of the improvement is driven externally
                        RegressionControlInArgs cbIn = new RegressionControlInArgs
                        {
                            TaskType                          = taskType,
                            ReadoutUnitIdx                    = readoutUnitIdx,
                            OutputFieldName                   = readoutUnitSettings.Name,
                            FoldNum                           = foldNum,
                            NumOfFolds                        = numOfFolds,
                            RegrAttemptNumber                 = regrAttemptNumber,
                            RegrMaxAttempts                   = readoutUnitSettings.RegressionAttempts,
                            Epoch                             = epoch,
                            MaxEpochs                         = readoutUnitSettings.RegressionAttemptEpochs,
                            TrainingPredictorsCollection      = trainingPredictorsCollection,
                            TrainingIdealOutputsCollection    = trainingIdealOutputsCollection,
                            TrainingComputedOutputsCollection = trainingComputedOutputsCollection,
                            TestingPredictorsCollection       = testingPredictorsCollection,
                            TestingIdealOutputsCollection     = testingIdealOutputsCollection,
                            TestingComputedOutputsCollection  = testingComputedOutputsCollection,
                            CurrReadoutUnit                   = currReadoutUnit,
                            BestReadoutUnit                   = bestReadoutUnit,
                            UserObject                        = controllerUserObject
                        };
                        //Call external controller
                        RegressionControlOutArgs cbOut = controller(cbIn);
                        //Pick up results
                        better            = cbOut.CurrentIsBetter;
                        stopTrainingCycle = cbOut.StopCurrentAttempt;
                        stopRegression    = cbOut.StopRegression;
                    }
                    else
                    {
                        //Default implementation
                        better = IsBetter(taskType, currReadoutUnit, bestReadoutUnit);
                    }
                    //Best?
                    if (better)
                    {
                        //Adopt current regression results
                        bestReadoutUnit = currReadoutUnit.DeepClone();
                    }
                    //Training stop conditions
                    if (stopTrainingCycle || stopRegression)
                    {
                        break;
                    }
                }//epoch
                //Regression stop conditions
                if (stopRegression)
                {
                    break;
                }
            }//regrAttemptNumber
            //Create statistics of the best network weights
            bestReadoutUnit.OutputWeightsStat = bestReadoutUnit.Network.ComputeWeightsStat();
            return(bestReadoutUnit);
        }
Beispiel #6
0
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="predictorsCollection">Collection of predictors</param>
        /// <param name="idealOutputsCollection">Collection of desired outputs related to predictors</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <returns>Returned ValidationBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ValidationBundle Build(List <double[]> predictorsCollection,
                                      List <double[]> idealOutputsCollection,
                                      ReadoutUnit.RegressionCallbackDelegate regressionController,
                                      Object regressionControllerData
                                      )
        {
            //Random object
            Random rand = new Random(0);
            //Allocation of computed and ideal vectors for validation bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[idealOutputsCollection[0].Length]);
                validationIdealVectorCollection.Add(new double[idealOutputsCollection[0].Length]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(_dataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <TimeSeriesBundle> subBundleCollection = null;
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(shuffledData.InputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(shuffledData.InputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Best predicting unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double value = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        ces.Update(value, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0];
                        validationComputedVectorCollection[arrayPos][clusterIdx] = value;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ValidationBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }
Beispiel #7
0
        /// <summary>
        /// Builds trained readout layer.
        /// </summary>
        /// <param name="dataBundle">Collection of input predictors and associated desired output values</param>
        /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param>
        /// <param name="controller">Optional external regression controller</param>
        /// <returns>Results of the regression</returns>
        public RegressionOverview Build(VectorBundle dataBundle,
                                        PredictorsMapper predictorsMapper = null,
                                        TrainedNetworkBuilder.RegressionControllerDelegate controller = null
                                        )
        {
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new InvalidOperationException($"Number of predictors must be greater tham 0.");
            }
            if (numOfOutputs != Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count)
            {
                throw new InvalidOperationException($"Incorrect length of output vectors.");
            }
            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation and preparation of feature filters
            //Predictors
            _predictorFeatureFilterCollection = new FeatureFilterBase[numOfPredictors];
            Parallel.For(0, _predictorFeatureFilterCollection.Length, nrmIdx =>
            {
                _predictorFeatureFilterCollection[nrmIdx] = new RealFeatureFilter(DataRange, true, true);
                for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
                {
                    //Adjust filter
                    _predictorFeatureFilterCollection[nrmIdx].Update(dataBundle.InputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Output values
            _outputFeatureFilterCollection = new FeatureFilterBase[numOfOutputs];
            Parallel.For(0, _outputFeatureFilterCollection.Length, nrmIdx =>
            {
                _outputFeatureFilterCollection[nrmIdx] = FeatureFilterFactory.Create(DataRange, Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[nrmIdx].TaskCfg.FeatureFilterCfg);
                for (int pairIdx = 0; pairIdx < dataBundle.OutputVectorCollection.Count; pairIdx++)
                {
                    //Adjust output normalizer
                    _outputFeatureFilterCollection[nrmIdx].Update(dataBundle.OutputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Data normalization
            //Allocation
            double[][] normalizedPredictorsCollection   = new double[dataBundle.InputVectorCollection.Count][];
            double[][] normalizedIdealOutputsCollection = new double[dataBundle.OutputVectorCollection.Count][];
            //Normalization
            Parallel.For(0, dataBundle.InputVectorCollection.Count, pairIdx =>
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    if (_predictorsMapper.PredictorGeneralSwitchCollection[i])
                    {
                        predictors[i] = _predictorFeatureFilterCollection[i].ApplyFilter(dataBundle.InputVectorCollection[pairIdx][i]);
                    }
                    else
                    {
                        predictors[i] = double.NaN;
                    }
                }
                normalizedPredictorsCollection[pairIdx] = predictors;
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputFeatureFilterCollection[i].ApplyFilter(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                normalizedIdealOutputsCollection[pairIdx] = outputs;
            });

            //Random object initialization
            Random rand = new Random(0);
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection);

            shuffledData.Shuffle(rand);

            //Building of readout units
            for (int unitIdx = 0; unitIdx < Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; unitIdx++)
            {
                List <double[]> idealValueCollection = new List <double[]>(shuffledData.OutputVectorCollection.Count);
                //Transformation of ideal vectors to a single value vectors
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[unitIdx];
                    idealValueCollection.Add(value);
                }
                List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].Name, shuffledData.InputVectorCollection);
                VectorBundle    readoutUnitDataBundle            = new VectorBundle(readoutUnitInputVectorCollection, idealValueCollection);
                TrainedNetworkClusterBuilder readoutUnitBuilder  = new TrainedNetworkClusterBuilder(Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].Name,
                                                                                                    Settings.GetReadoutUnitNetworksCollection(unitIdx),
                                                                                                    DataRange,
                                                                                                    Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].TaskCfg.Type == ReadoutUnit.TaskType.Classification ? BinBorder : double.NaN,
                                                                                                    rand,
                                                                                                    controller
                                                                                                    );
                //Register notification
                readoutUnitBuilder.RegressionEpochDone += OnRegressionEpochDone;
                //Build trained readout unit. Trained unit becomes to be the predicting cluster member
                _readoutUnitCollection[unitIdx] = new ReadoutUnit(unitIdx,
                                                                  readoutUnitBuilder.Build(readoutUnitDataBundle,
                                                                                           Settings.TestDataRatio,
                                                                                           Settings.Folds,
                                                                                           Settings.Repetitions,
                                                                                           new FeatureFilterBase[] { _outputFeatureFilterCollection[unitIdx] }
                                                                                           )
                                                                  );
            }//unitIdx

            //Readout layer is trained and ready
            Trained = true;
            return(new RegressionOverview(ReadoutUnitErrStatCollection));
        }
Beispiel #8
0
 /// <summary>
 /// The deep copy constructor.
 /// </summary>
 /// <param name="source">Source instance</param>
 public ReadoutUnit(ReadoutUnit source)
 {
     Index          = source.Index;
     NetworkCluster = source.NetworkCluster.DeepClone();
     return;
 }
Beispiel #9
0
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="dataBundle">Collection of input predictors and associated desired output values</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param>
        /// <returns>Returned ResultComparativeBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ResultComparativeBundle Build(VectorBundle dataBundle,
                                             ReadoutUnit.RegressionCallbackDelegate regressionController,
                                             Object regressionControllerData,
                                             PredictorsMapper predictorsMapper = null
                                             )
        {
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new Exception("Number of predictors must be greater tham 0.");
            }
            if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count)
            {
                throw new Exception("Incorrect number of ideal output values in the vector.");
            }

            //Normalization of predictors and output data collections
            //Allocation of normalizers
            _predictorNormalizerCollection = new Normalizer[numOfPredictors];
            for (int i = 0; i < numOfPredictors; i++)
            {
                _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false);
            }
            _outputNormalizerCollection = new Normalizer[numOfOutputs];
            for (int i = 0; i < numOfOutputs; i++)
            {
                bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification);
                _outputNormalizerCollection[i] = new Normalizer(DataRange,
                                                                classificationTask ? 0 : NormalizerDefaultReserve,
                                                                classificationTask ? false : true,
                                                                false
                                                                );
            }
            //Normalizers adjustment
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Checks
                if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors)
                {
                    throw new Exception("Inconsistent number of predictors in the predictors collection.");
                }
                if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs)
                {
                    throw new Exception("Inconsistent number of values in the ideal values collection.");
                }
                //Adjust predictors normalizers
                for (int i = 0; i < numOfPredictors; i++)
                {
                    _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                //Adjust outputs normalizers
                for (int i = 0; i < numOfOutputs; i++)
                {
                    _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
            }
            //Data normalization
            //Allocation
            List <double[]> predictorsCollection   = new List <double[]>(dataBundle.InputVectorCollection.Count);
            List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count);

            //Normalization
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                predictorsCollection.Add(predictors);
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                idealOutputsCollection.Add(outputs);
            }
            //Data processing
            //Random object initialization
            Random rand = new Random(0);

            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation of computed and ideal vectors for result comparative bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[numOfOutputs]);
                validationIdealVectorCollection.Add(new double[numOfOutputs]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(DataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <VectorBundle> subBundleCollection = null;
                List <double[]>     readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection);
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Find best unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue);
                        double natIdealValue    = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        ces.Update(nrmComputedValue,
                                   subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0],
                                   natComputedValue,
                                   natIdealValue);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = natIdealValue;
                        validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }