コード例 #1
0
        /// <summary>
        /// Splits this bundle to a collection of smaller folds (sub-bundles) suitable for the cross-validation.
        /// </summary>
        /// <param name="foldDataRatio">The requested ratio of the samples constituting the single fold (sub-bundle).</param>
        /// <param name="binBorder">When the binBorder is specified then all the output features are considered as binary features within the one-takes-all group and function then keeps balanced ratios of 0 and 1 for every output feature and the fold.</param>
        /// <returns>A collection of the created folds.</returns>
        public List <VectorBundle> Folderize(double foldDataRatio, double binBorder = double.NaN)
        {
            if (OutputVectorCollection.Count < 2)
            {
                throw new InvalidOperationException($"Insufficient number of samples ({OutputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)}).");
            }
            List <VectorBundle> foldCollection = new List <VectorBundle>();

            //Fold data ratio basic correction
            if (foldDataRatio > MaxRatioOfFoldData)
            {
                foldDataRatio = MaxRatioOfFoldData;
            }
            //Prelimitary fold size estimation
            int foldSize = Math.Max(1, (int)Math.Round(OutputVectorCollection.Count * foldDataRatio, 0));
            //Prelimitary number of folds
            int numOfFolds = (int)Math.Round((double)OutputVectorCollection.Count / foldSize);

            //Folds creation
            if (double.IsNaN(binBorder))
            {
                //No binary output -> simple split
                int samplesPos = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    VectorBundle fold = new VectorBundle();
                    for (int i = 0; i < foldSize && samplesPos < OutputVectorCollection.Count; i++)
                    {
                        fold.InputVectorCollection.Add(InputVectorCollection[samplesPos]);
                        fold.OutputVectorCollection.Add(OutputVectorCollection[samplesPos]);
                        ++samplesPos;
                    }
                    foldCollection.Add(fold);
                }
                //Remaining samples
                for (int i = 0; i < OutputVectorCollection.Count - samplesPos; i++)
                {
                    int foldIdx = i % foldCollection.Count;
                    foldCollection[foldIdx].InputVectorCollection.Add(InputVectorCollection[samplesPos + i]);
                    foldCollection[foldIdx].OutputVectorCollection.Add(OutputVectorCollection[samplesPos + i]);
                }
            }//Indifferent output
            else
            {
                //Binary outputs -> keep balanced ratios of outputs
                int numOfOutputs = OutputVectorCollection[0].Length;
                if (numOfOutputs == 1)
                {
                    //Special case there is only one binary output
                    //Investigation of the output data metrics
                    BinDistribution refBinDistr = new BinDistribution(binBorder);
                    refBinDistr.Update(OutputVectorCollection, 0);
                    int min01 = Math.Min(refBinDistr.NumOf[0], refBinDistr.NumOf[1]);
                    if (min01 < 2)
                    {
                        throw new InvalidOperationException($"Insufficient bin 0 or 1 samples (less than 2).");
                    }
                    if (numOfFolds > min01)
                    {
                        numOfFolds = min01;
                    }
                    //Scan data
                    int[] bin0SampleIdxs = new int[refBinDistr.NumOf[0]];
                    int   bin0SamplesPos = 0;
                    int[] bin1SampleIdxs = new int[refBinDistr.NumOf[1]];
                    int   bin1SamplesPos = 0;
                    for (int i = 0; i < OutputVectorCollection.Count; i++)
                    {
                        if (OutputVectorCollection[i][0] >= refBinDistr.BinBorder)
                        {
                            bin1SampleIdxs[bin1SamplesPos++] = i;
                        }
                        else
                        {
                            bin0SampleIdxs[bin0SamplesPos++] = i;
                        }
                    }
                    //Determine distributions of 0 and 1 for one fold
                    int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfFolds);
                    int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfFolds);
                    //Bundles creation
                    bin0SamplesPos = 0;
                    bin1SamplesPos = 0;
                    for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                    {
                        VectorBundle fold = new VectorBundle();
                        //Bin 0
                        for (int i = 0; i < bundleBin0Count; i++)
                        {
                            fold.InputVectorCollection.Add(InputVectorCollection[bin0SampleIdxs[bin0SamplesPos]]);
                            fold.OutputVectorCollection.Add(OutputVectorCollection[bin0SampleIdxs[bin0SamplesPos]]);
                            ++bin0SamplesPos;
                        }
                        //Bin 1
                        for (int i = 0; i < bundleBin1Count; i++)
                        {
                            fold.InputVectorCollection.Add(InputVectorCollection[bin1SampleIdxs[bin1SamplesPos]]);
                            fold.OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos]]);
                            ++bin1SamplesPos;
                        }
                        foldCollection.Add(fold);
                    }
                    //Remaining samples
                    for (int i = 0; i < bin0SampleIdxs.Length - bin0SamplesPos; i++)
                    {
                        int foldIdx = i % foldCollection.Count;
                        foldCollection[foldIdx].InputVectorCollection.Add(InputVectorCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
                        foldCollection[foldIdx].OutputVectorCollection.Add(OutputVectorCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
                    }
                    for (int i = 0; i < bin1SampleIdxs.Length - bin1SamplesPos; i++)
                    {
                        int foldIdx = i % foldCollection.Count;
                        foldCollection[foldIdx].InputVectorCollection.Add(InputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
                        foldCollection[foldIdx].OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
                    }
                }//Only 1 binary output
                else
                {
                    //There is more than 1 binary output - "one takes all approach"
                    //Investigation of the output data metrics
                    //Collect bin 1 sample indexes and check "one takes all" consistency for every output feature
                    List <int>[] outBin1SampleIdxs = new List <int> [numOfOutputs];
                    for (int i = 0; i < numOfOutputs; i++)
                    {
                        outBin1SampleIdxs[i] = new List <int>();
                    }
                    for (int sampleIdx = 0; sampleIdx < OutputVectorCollection.Count; sampleIdx++)
                    {
                        int numOf1 = 0;
                        for (int outFeatureIdx = 0; outFeatureIdx < numOfOutputs; outFeatureIdx++)
                        {
                            if (OutputVectorCollection[sampleIdx][outFeatureIdx] >= binBorder)
                            {
                                outBin1SampleIdxs[outFeatureIdx].Add(sampleIdx);
                                ++numOf1;
                            }
                        }
                        if (numOf1 != 1)
                        {
                            throw new ArgumentException($"Data are inconsistent on data index {sampleIdx.ToString(CultureInfo.InvariantCulture)}. Output vector has {numOf1.ToString(CultureInfo.InvariantCulture)} feature(s) having bin value 1.", "binBorder");
                        }
                    }
                    //Determine max possible number of folds
                    int maxNumOfFolds = OutputVectorCollection.Count;
                    for (int outFeatureIdx = 0; outFeatureIdx < numOfOutputs; outFeatureIdx++)
                    {
                        int outFeatureMaxFolds = Math.Min(outBin1SampleIdxs[outFeatureIdx].Count, OutputVectorCollection.Count - outBin1SampleIdxs[outFeatureIdx].Count);
                        maxNumOfFolds = Math.Min(outFeatureMaxFolds, maxNumOfFolds);
                    }
                    //Correct the number of folds to be created
                    if (numOfFolds > maxNumOfFolds)
                    {
                        numOfFolds = maxNumOfFolds;
                    }
                    //Create the folds
                    for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                    {
                        foldCollection.Add(new VectorBundle());
                    }
                    //Samples distribution
                    for (int outFeatureIdx = 0; outFeatureIdx < numOfOutputs; outFeatureIdx++)
                    {
                        for (int bin1SampleRefIdx = 0; bin1SampleRefIdx < outBin1SampleIdxs[outFeatureIdx].Count; bin1SampleRefIdx++)
                        {
                            int foldIdx = bin1SampleRefIdx % foldCollection.Count;
                            int dataIdx = outBin1SampleIdxs[outFeatureIdx][bin1SampleRefIdx];
                            foldCollection[foldIdx].AddPair(InputVectorCollection[dataIdx], OutputVectorCollection[dataIdx]);
                        }
                    }
                } //More binary outputs
            }     //Binary output

            return(foldCollection);
        }
コード例 #2
0
ファイル: ReadoutUnit.cs プロジェクト: krishnanpc/NET
        /// <summary>
        /// Prepares trained readout unit for specified output field and task.
        /// </summary>
        /// <param name="taskType">Type of the task</param>
        /// <param name="readoutUnitIdx">Index of the readout unit (informative only)</param>
        /// <param name="foldNum">Current fold number</param>
        /// <param name="numOfFolds">Total number of the folds</param>
        /// <param name="refBinDistr">Reference bin distribution (if task type is Classification)</param>
        /// <param name="trainingPredictorsCollection">Collection of the predictors for training</param>
        /// <param name="trainingIdealOutputsCollection">Collection of ideal outputs for training. Note that the double array always has only one member.</param>
        /// <param name="testingPredictorsCollection">Collection of the predictors for testing</param>
        /// <param name="testingIdealOutputsCollection">Collection of ideal outputs for testing. Note that the double array always has only one member.</param>
        /// <param name="rand">Random object to be used</param>
        /// <param name="readoutUnitSettings">Readout unit configuration parameters</param>
        /// <param name="controller">Regression controller</param>
        /// <param name="controllerUserObject">An user object to be passed to controller</param>
        /// <returns>Prepared readout unit</returns>
        public static ReadoutUnit CreateTrained(CommonEnums.TaskType taskType,
                                                int readoutUnitIdx,
                                                int foldNum,
                                                int numOfFolds,
                                                BinDistribution refBinDistr,
                                                List <double[]> trainingPredictorsCollection,
                                                List <double[]> trainingIdealOutputsCollection,
                                                List <double[]> testingPredictorsCollection,
                                                List <double[]> testingIdealOutputsCollection,
                                                Random rand,
                                                ReadoutLayerSettings.ReadoutUnitSettings readoutUnitSettings,
                                                RegressionCallbackDelegate controller = null,
                                                Object controllerUserObject           = null
                                                )
        {
            ReadoutUnit bestReadoutUnit = null;
            //Regression attempts
            bool stopRegression = false;

            for (int regrAttemptNumber = 1; regrAttemptNumber <= readoutUnitSettings.RegressionAttempts; regrAttemptNumber++)
            {
                //Create network and trainer
                CreateNetAndTreainer(readoutUnitSettings,
                                     trainingPredictorsCollection,
                                     trainingIdealOutputsCollection,
                                     rand,
                                     out INonRecurrentNetwork net,
                                     out INonRecurrentNetworkTrainer trainer
                                     );
                //Reference binary distribution
                //Iterate training cycles
                for (int epoch = 1; epoch <= readoutUnitSettings.RegressionAttemptEpochs; epoch++)
                {
                    trainer.Iteration();
                    List <double[]> testingComputedOutputsCollection = null;
                    //Compute current error statistics after training iteration
                    ReadoutUnit currReadoutUnit = new ReadoutUnit();
                    currReadoutUnit.Network           = net;
                    currReadoutUnit.TrainingErrorStat = net.ComputeBatchErrorStat(trainingPredictorsCollection, trainingIdealOutputsCollection, out List <double[]> trainingComputedOutputsCollection);
                    if (taskType == CommonEnums.TaskType.Classification)
                    {
                        currReadoutUnit.TrainingBinErrorStat = new BinErrStat(refBinDistr, trainingComputedOutputsCollection, trainingIdealOutputsCollection);
                        currReadoutUnit.CombinedBinaryError  = currReadoutUnit.TrainingBinErrorStat.TotalErrStat.Sum;
                        //currReadoutUnit.CombinedBinaryError = currReadoutUnit.TrainingBinErrorStat.ProportionalErr;
                    }
                    currReadoutUnit.CombinedPrecisionError = currReadoutUnit.TrainingErrorStat.ArithAvg;
                    if (testingPredictorsCollection != null && testingPredictorsCollection.Count > 0)
                    {
                        currReadoutUnit.TestingErrorStat       = net.ComputeBatchErrorStat(testingPredictorsCollection, testingIdealOutputsCollection, out testingComputedOutputsCollection);
                        currReadoutUnit.CombinedPrecisionError = Math.Max(currReadoutUnit.CombinedPrecisionError, currReadoutUnit.TestingErrorStat.ArithAvg);
                        if (taskType == CommonEnums.TaskType.Classification)
                        {
                            currReadoutUnit.TestingBinErrorStat = new BinErrStat(refBinDistr, testingComputedOutputsCollection, testingIdealOutputsCollection);
                            currReadoutUnit.CombinedBinaryError = Math.Max(currReadoutUnit.CombinedBinaryError, currReadoutUnit.TestingBinErrorStat.TotalErrStat.Sum);
                            //currReadoutUnit.CombinedBinaryError = Math.Max(currReadoutUnit.CombinedBinaryError, currReadoutUnit.TestingBinErrorStat.ProportionalErr);
                        }
                    }
                    //Current results processing
                    bool better = false, stopTrainingCycle = false;
                    //Result first initialization
                    if (bestReadoutUnit == null)
                    {
                        //Adopt current regression results
                        bestReadoutUnit = currReadoutUnit.DeepClone();
                    }
                    //Perform call back if it is defined
                    if (controller != null)
                    {
                        //Evaluation of the improvement is driven externally
                        RegressionControlInArgs cbIn = new RegressionControlInArgs
                        {
                            TaskType                          = taskType,
                            ReadoutUnitIdx                    = readoutUnitIdx,
                            OutputFieldName                   = readoutUnitSettings.Name,
                            FoldNum                           = foldNum,
                            NumOfFolds                        = numOfFolds,
                            RegrAttemptNumber                 = regrAttemptNumber,
                            RegrMaxAttempts                   = readoutUnitSettings.RegressionAttempts,
                            Epoch                             = epoch,
                            MaxEpochs                         = readoutUnitSettings.RegressionAttemptEpochs,
                            TrainingPredictorsCollection      = trainingPredictorsCollection,
                            TrainingIdealOutputsCollection    = trainingIdealOutputsCollection,
                            TrainingComputedOutputsCollection = trainingComputedOutputsCollection,
                            TestingPredictorsCollection       = testingPredictorsCollection,
                            TestingIdealOutputsCollection     = testingIdealOutputsCollection,
                            TestingComputedOutputsCollection  = testingComputedOutputsCollection,
                            CurrReadoutUnit                   = currReadoutUnit,
                            BestReadoutUnit                   = bestReadoutUnit,
                            UserObject                        = controllerUserObject
                        };
                        //Call external controller
                        RegressionControlOutArgs cbOut = controller(cbIn);
                        //Pick up results
                        better            = cbOut.CurrentIsBetter;
                        stopTrainingCycle = cbOut.StopCurrentAttempt;
                        stopRegression    = cbOut.StopRegression;
                    }
                    else
                    {
                        //Default implementation
                        better = IsBetter(taskType, currReadoutUnit, bestReadoutUnit);
                    }
                    //Best?
                    if (better)
                    {
                        //Adopt current regression results
                        bestReadoutUnit = currReadoutUnit.DeepClone();
                    }
                    //Training stop conditions
                    if (stopTrainingCycle || stopRegression)
                    {
                        break;
                    }
                }//epoch
                //Regression stop conditions
                if (stopRegression)
                {
                    break;
                }
            }//regrAttemptNumber
            //Create statistics of the best network weights
            bestReadoutUnit.OutputWeightsStat = bestReadoutUnit.Network.ComputeWeightsStat();
            return(bestReadoutUnit);
        }
コード例 #3
0
ファイル: ReadoutLayer.cs プロジェクト: krishnanpc/NET
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="predictorsCollection">Collection of predictors</param>
        /// <param name="idealOutputsCollection">Collection of desired outputs related to predictors</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <returns>Returned ValidationBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ValidationBundle Build(List <double[]> predictorsCollection,
                                      List <double[]> idealOutputsCollection,
                                      ReadoutUnit.RegressionCallbackDelegate regressionController,
                                      Object regressionControllerData
                                      )
        {
            //Random object
            Random rand = new Random(0);
            //Allocation of computed and ideal vectors for validation bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[idealOutputsCollection[0].Length]);
                validationIdealVectorCollection.Add(new double[idealOutputsCollection[0].Length]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(_dataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <TimeSeriesBundle> subBundleCollection = null;
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(shuffledData.InputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(shuffledData.InputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Best predicting unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double value = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        ces.Update(value, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0];
                        validationComputedVectorCollection[arrayPos][clusterIdx] = value;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ValidationBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }
コード例 #4
0
ファイル: ReadoutLayer.cs プロジェクト: krishnanpc/NET
 /// <summary>
 /// Constructs an instance prepared for initialization (updates)
 /// </summary>
 /// <param name="taskType"></param>
 /// <param name="numOfReadoutUnits"></param>
 /// <param name="refBinDistr"></param>
 public ClusterErrStatistics(CommonEnums.TaskType taskType, int numOfReadoutUnits, BinDistribution refBinDistr)
 {
     TaskType          = taskType;
     NumOfReadoutUnits = numOfReadoutUnits;
     PrecissionErrStat = new BasicStat();
     BinaryErrStat     = null;
     if (TaskType == CommonEnums.TaskType.Classification)
     {
         BinaryErrStat = new BinErrStat(refBinDistr);
     }
     return;
 }
コード例 #5
0
ファイル: ReadoutLayer.cs プロジェクト: krishnanpc/NET
        private List <TimeSeriesBundle> DivideSamplesForClassificationTask(List <double[]> predictorsCollection,
                                                                           List <double[]> idealValueCollection,
                                                                           BinDistribution refBinDistr,
                                                                           int bundleSize
                                                                           )
        {
            int numOfBundles = idealValueCollection.Count / bundleSize;
            List <TimeSeriesBundle> bundleCollection = new List <TimeSeriesBundle>(numOfBundles);

            //Scan
            int[] bin0SampleIdxs = new int[refBinDistr.NumOf[0]];
            int   bin0SamplesPos = 0;

            int[] bin1SampleIdxs = new int[refBinDistr.NumOf[1]];
            int   bin1SamplesPos = 0;

            for (int i = 0; i < idealValueCollection.Count; i++)
            {
                if (idealValueCollection[i][0] >= refBinDistr.BinBorder)
                {
                    bin1SampleIdxs[bin1SamplesPos++] = i;
                }
                else
                {
                    bin0SampleIdxs[bin0SamplesPos++] = i;
                }
            }
            //Division
            int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfBundles);
            int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfBundles);

            if (bundleBin0Count * numOfBundles > bin0SampleIdxs.Length)
            {
                throw new Exception("Insufficient bin 0 samples");
            }
            if (bundleBin1Count * numOfBundles > bin1SampleIdxs.Length)
            {
                throw new Exception("Insufficient bin 1 samples");
            }
            //Bundles creation
            bin0SamplesPos = 0;
            bin1SamplesPos = 0;
            for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++)
            {
                TimeSeriesBundle bundle = new TimeSeriesBundle();
                //Bin 0
                for (int i = 0; i < bundleBin0Count; i++)
                {
                    bundle.InputVectorCollection.Add(predictorsCollection[bin0SampleIdxs[bin0SamplesPos]]);
                    bundle.OutputVectorCollection.Add(idealValueCollection[bin0SampleIdxs[bin0SamplesPos]]);
                    ++bin0SamplesPos;
                }
                //Bin 1
                for (int i = 0; i < bundleBin1Count; i++)
                {
                    bundle.InputVectorCollection.Add(predictorsCollection[bin1SampleIdxs[bin1SamplesPos]]);
                    bundle.OutputVectorCollection.Add(idealValueCollection[bin1SampleIdxs[bin1SamplesPos]]);
                    ++bin1SamplesPos;
                }
                bundleCollection.Add(bundle);
            }
            //Remaining samples
            for (int i = 0; i < bin0SampleIdxs.Length - bin0SamplesPos; i++)
            {
                int bundleIdx = i % bundleCollection.Count;
                bundleCollection[bundleIdx].InputVectorCollection.Add(predictorsCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
                bundleCollection[bundleIdx].OutputVectorCollection.Add(idealValueCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
            }
            for (int i = 0; i < bin1SampleIdxs.Length - bin1SamplesPos; i++)
            {
                int bundleIdx = i % bundleCollection.Count;
                bundleCollection[bundleIdx].InputVectorCollection.Add(predictorsCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
                bundleCollection[bundleIdx].OutputVectorCollection.Add(idealValueCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
            }
            return(bundleCollection);
        }
コード例 #6
0
ファイル: VectorBundle.cs プロジェクト: thild/NET
        //Methods
        /// <summary>
        /// Splits this bundle to a collection of smaller bundles.
        /// Method expects length of the output vectors = 1.
        /// </summary>
        /// <param name="subBundleSize">Sub-bundle size</param>
        /// <param name="binBorder">If specified and there is only one output value, method will keep balanced number of output values GE to binBorder in the each sub-bundle</param>
        /// <returns>Collection of extracted sub-bundles</returns>
        public List <VectorBundle> Split(int subBundleSize, double binBorder = double.NaN)
        {
            int numOfBundles = OutputVectorCollection.Count / subBundleSize;
            List <VectorBundle> bundleCollection = new List <VectorBundle>(numOfBundles);

            if (!double.IsNaN(binBorder) && OutputVectorCollection[0].Length == 1)
            {
                BinDistribution refBinDistr = new BinDistribution(binBorder);
                refBinDistr.Update(OutputVectorCollection, 0);
                //Scan
                int[] bin0SampleIdxs = new int[refBinDistr.NumOf[0]];
                int   bin0SamplesPos = 0;
                int[] bin1SampleIdxs = new int[refBinDistr.NumOf[1]];
                int   bin1SamplesPos = 0;
                for (int i = 0; i < OutputVectorCollection.Count; i++)
                {
                    if (OutputVectorCollection[i][0] >= refBinDistr.BinBorder)
                    {
                        bin1SampleIdxs[bin1SamplesPos++] = i;
                    }
                    else
                    {
                        bin0SampleIdxs[bin0SamplesPos++] = i;
                    }
                }
                //Division
                int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfBundles);
                int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfBundles);
                if (bundleBin0Count * numOfBundles > bin0SampleIdxs.Length)
                {
                    throw new InvalidOperationException($"Insufficient bin 0 samples");
                }
                if (bundleBin1Count * numOfBundles > bin1SampleIdxs.Length)
                {
                    throw new InvalidOperationException($"Insufficient bin 1 samples");
                }
                //Bundles creation
                bin0SamplesPos = 0;
                bin1SamplesPos = 0;
                for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++)
                {
                    VectorBundle bundle = new VectorBundle();
                    //Bin 0
                    for (int i = 0; i < bundleBin0Count; i++)
                    {
                        bundle.InputVectorCollection.Add(InputVectorCollection[bin0SampleIdxs[bin0SamplesPos]]);
                        bundle.OutputVectorCollection.Add(OutputVectorCollection[bin0SampleIdxs[bin0SamplesPos]]);
                        ++bin0SamplesPos;
                    }
                    //Bin 1
                    for (int i = 0; i < bundleBin1Count; i++)
                    {
                        bundle.InputVectorCollection.Add(InputVectorCollection[bin1SampleIdxs[bin1SamplesPos]]);
                        bundle.OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos]]);
                        ++bin1SamplesPos;
                    }
                    bundleCollection.Add(bundle);
                }
                //Remaining samples
                for (int i = 0; i < bin0SampleIdxs.Length - bin0SamplesPos; i++)
                {
                    int bundleIdx = i % bundleCollection.Count;
                    bundleCollection[bundleIdx].InputVectorCollection.Add(InputVectorCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
                    bundleCollection[bundleIdx].OutputVectorCollection.Add(OutputVectorCollection[bin0SampleIdxs[bin0SamplesPos + i]]);
                }
                for (int i = 0; i < bin1SampleIdxs.Length - bin1SamplesPos; i++)
                {
                    int bundleIdx = i % bundleCollection.Count;
                    bundleCollection[bundleIdx].InputVectorCollection.Add(InputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
                    bundleCollection[bundleIdx].OutputVectorCollection.Add(OutputVectorCollection[bin1SampleIdxs[bin1SamplesPos + i]]);
                }
            }
            else
            {
                //Bundles creation
                int samplesPos = 0;
                for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++)
                {
                    VectorBundle bundle = new VectorBundle();
                    for (int i = 0; i < subBundleSize && samplesPos < OutputVectorCollection.Count; i++)
                    {
                        bundle.InputVectorCollection.Add(InputVectorCollection[samplesPos]);
                        bundle.OutputVectorCollection.Add(OutputVectorCollection[samplesPos]);
                        ++samplesPos;
                    }
                    bundleCollection.Add(bundle);
                }
                //Remaining samples
                for (int i = 0; i < OutputVectorCollection.Count - samplesPos; i++)
                {
                    int bundleIdx = i % bundleCollection.Count;
                    bundleCollection[bundleIdx].InputVectorCollection.Add(InputVectorCollection[samplesPos + i]);
                    bundleCollection[bundleIdx].OutputVectorCollection.Add(OutputVectorCollection[samplesPos + i]);
                }
            }
            return(bundleCollection);
        }
コード例 #7
0
ファイル: ReadoutLayer.cs プロジェクト: lulzzz/NET
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="dataBundle">Collection of input predictors and associated desired output values</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param>
        /// <returns>Returned ResultComparativeBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ResultComparativeBundle Build(VectorBundle dataBundle,
                                             ReadoutUnit.RegressionCallbackDelegate regressionController,
                                             Object regressionControllerData,
                                             PredictorsMapper predictorsMapper = null
                                             )
        {
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new Exception("Number of predictors must be greater tham 0.");
            }
            if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count)
            {
                throw new Exception("Incorrect number of ideal output values in the vector.");
            }

            //Normalization of predictors and output data collections
            //Allocation of normalizers
            _predictorNormalizerCollection = new Normalizer[numOfPredictors];
            for (int i = 0; i < numOfPredictors; i++)
            {
                _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false);
            }
            _outputNormalizerCollection = new Normalizer[numOfOutputs];
            for (int i = 0; i < numOfOutputs; i++)
            {
                bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification);
                _outputNormalizerCollection[i] = new Normalizer(DataRange,
                                                                classificationTask ? 0 : NormalizerDefaultReserve,
                                                                classificationTask ? false : true,
                                                                false
                                                                );
            }
            //Normalizers adjustment
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Checks
                if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors)
                {
                    throw new Exception("Inconsistent number of predictors in the predictors collection.");
                }
                if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs)
                {
                    throw new Exception("Inconsistent number of values in the ideal values collection.");
                }
                //Adjust predictors normalizers
                for (int i = 0; i < numOfPredictors; i++)
                {
                    _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                //Adjust outputs normalizers
                for (int i = 0; i < numOfOutputs; i++)
                {
                    _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
            }
            //Data normalization
            //Allocation
            List <double[]> predictorsCollection   = new List <double[]>(dataBundle.InputVectorCollection.Count);
            List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count);

            //Normalization
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                predictorsCollection.Add(predictors);
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                idealOutputsCollection.Add(outputs);
            }
            //Data processing
            //Random object initialization
            Random rand = new Random(0);

            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation of computed and ideal vectors for result comparative bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[numOfOutputs]);
                validationIdealVectorCollection.Add(new double[numOfOutputs]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(DataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <VectorBundle> subBundleCollection = null;
                List <double[]>     readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection);
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Find best unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue);
                        double natIdealValue    = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        ces.Update(nrmComputedValue,
                                   subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0],
                                   natComputedValue,
                                   natIdealValue);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = natIdealValue;
                        validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }