示例#1
0
        /// <summary>
        /// Builds the cluster chain.
        /// </summary>
        /// <param name="dataBundle">The data bundle for training.</param>
        /// <param name="filters">The filters to be used to denormalize outputs.</param>
        public TNRNetClusterChain Build(VectorBundle dataBundle, FeatureFilterBase[] filters)
        {
            //The chain to be built
            TNRNetClusterChain chain = new TNRNetClusterChain(_chainName, _clusterChainCfg.Output);
            //Instantiate chained clusters
            List <TNRNetCluster> chainClusters = new List <TNRNetCluster>(_clusterChainCfg.ClusterCfgCollection.Count);

            for (int clusterIdx = 0; clusterIdx < _clusterChainCfg.ClusterCfgCollection.Count; clusterIdx++)
            {
                //Cluster
                chainClusters.Add(new TNRNetCluster(_chainName,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].Output,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].TrainingGroupWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].TestingGroupWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].SamplesWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].NumericalPrecisionWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].MisrecognizedFalseWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].UnrecognizedTrueWeight
                                                    )
                                  );
            }
            //Common crossvalidation configuration
            double boolBorder = _clusterChainCfg.Output == TNRNet.OutputType.Real ? double.NaN : chain.OutputDataRange.Mid;

            VectorBundle localDataBundle = dataBundle.CreateShallowCopy();

            //Member's training
            ResetProgressTracking();
            for (_repetitionIdx = 0; _repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions; _repetitionIdx++)
            {
                //Split data to folds
                List <VectorBundle> foldCollection = localDataBundle.Folderize(_clusterChainCfg.CrossvalidationCfg.FoldDataRatio, boolBorder);
                _numOfFoldsPerRepetition = Math.Min(_clusterChainCfg.CrossvalidationCfg.Folds <= 0 ? foldCollection.Count : _clusterChainCfg.CrossvalidationCfg.Folds, foldCollection.Count);

                List <VectorBundle> currentClusterFoldCollection = CopyFolds(foldCollection);
                List <VectorBundle> nextClusterFoldCollection    = new List <VectorBundle>(foldCollection.Count);
                //For each cluster
                for (_clusterIdx = 0; _clusterIdx < chainClusters.Count; _clusterIdx++)
                {
                    //Train networks for each testing fold.
                    for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++)
                    {
                        //Prepare training data bundle
                        VectorBundle trainingData = new VectorBundle();
                        for (int foldIdx = 0; foldIdx < currentClusterFoldCollection.Count; foldIdx++)
                        {
                            if (foldIdx != _testingFoldIdx)
                            {
                                trainingData.Add(currentClusterFoldCollection[foldIdx]);
                            }
                        }
                        VectorBundle nextClusterUpdatedDataFold = foldCollection[_testingFoldIdx].CreateShallowCopy();
                        for (_netCfgIdx = 0; _netCfgIdx < _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations.Count; _netCfgIdx++)
                        {
                            TNRNetBuilder netBuilder = new TNRNetBuilder(_chainName,
                                                                         _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations[_netCfgIdx],
                                                                         _clusterChainCfg.ClusterCfgCollection[_clusterIdx].Output,
                                                                         trainingData,
                                                                         currentClusterFoldCollection[_testingFoldIdx],
                                                                         _rand,
                                                                         _controller
                                                                         );
                            //Register notification
                            netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged;
                            //Build trained network. Trained network becomes to be the cluster member
                            TNRNet tn         = netBuilder.Build();
                            int    netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx;
                            chainClusters[_clusterIdx].AddMember(tn, netScopeID, currentClusterFoldCollection[_testingFoldIdx], filters);
                            //Update input data in the data fold for the next cluster
                            for (int sampleIdx = 0; sampleIdx < currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection.Count; sampleIdx++)
                            {
                                double[] computedNetData = tn.Network.Compute(currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection[sampleIdx]);
                                nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx] = nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx].Concat(computedNetData);
                            }
                        }//netCfgIdx
                        //Add updated data fold for the next cluster
                        nextClusterFoldCollection.Add(nextClusterUpdatedDataFold);
                    }//testingFoldIdx
                    //Switch fold collection
                    currentClusterFoldCollection = nextClusterFoldCollection;
                    nextClusterFoldCollection    = new List <VectorBundle>(currentClusterFoldCollection.Count);
                }//clusterIdx
                if (_repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions - 1)
                {
                    //Reshuffle the data
                    localDataBundle.Shuffle(_rand);
                }
            }//repetitionIdx
            //Make the clusters operable and add them into the chain
            for (int clusterIdx = 0; clusterIdx < chainClusters.Count; clusterIdx++)
            {
                chainClusters[clusterIdx].FinalizeCluster();
                chain.AddCluster(chainClusters[clusterIdx]);
            }
            //Return the built chain
            return(chain);
        }
示例#2
0
文件: ReadoutLayer.cs 项目: thild/NET
        /// <summary>
        /// Builds trained readout layer.
        /// </summary>
        /// <param name="dataBundle">Collection of input predictors and associated desired output values</param>
        /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param>
        /// <param name="controller">Optional external regression controller</param>
        /// <returns>Results of the regression</returns>
        public RegressionOverview Build(VectorBundle dataBundle,
                                        PredictorsMapper predictorsMapper = null,
                                        TrainedNetworkBuilder.RegressionControllerDelegate controller = null
                                        )
        {
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new InvalidOperationException($"Number of predictors must be greater tham 0.");
            }
            if (numOfOutputs != Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count)
            {
                throw new InvalidOperationException($"Incorrect length of output vectors.");
            }
            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation and preparation of feature filters
            //Predictors
            _predictorFeatureFilterCollection = new FeatureFilterBase[numOfPredictors];
            Parallel.For(0, _predictorFeatureFilterCollection.Length, nrmIdx =>
            {
                _predictorFeatureFilterCollection[nrmIdx] = new RealFeatureFilter(DataRange, true, true);
                for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
                {
                    //Adjust filter
                    _predictorFeatureFilterCollection[nrmIdx].Update(dataBundle.InputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Output values
            _outputFeatureFilterCollection = new FeatureFilterBase[numOfOutputs];
            Parallel.For(0, _outputFeatureFilterCollection.Length, nrmIdx =>
            {
                _outputFeatureFilterCollection[nrmIdx] = FeatureFilterFactory.Create(DataRange, Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[nrmIdx].TaskCfg.FeatureFilterCfg);
                for (int pairIdx = 0; pairIdx < dataBundle.OutputVectorCollection.Count; pairIdx++)
                {
                    //Adjust output normalizer
                    _outputFeatureFilterCollection[nrmIdx].Update(dataBundle.OutputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Data normalization
            //Allocation
            double[][] normalizedPredictorsCollection   = new double[dataBundle.InputVectorCollection.Count][];
            double[][] normalizedIdealOutputsCollection = new double[dataBundle.OutputVectorCollection.Count][];
            //Normalization
            Parallel.For(0, dataBundle.InputVectorCollection.Count, pairIdx =>
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    if (_predictorsMapper.PredictorGeneralSwitchCollection[i])
                    {
                        predictors[i] = _predictorFeatureFilterCollection[i].ApplyFilter(dataBundle.InputVectorCollection[pairIdx][i]);
                    }
                    else
                    {
                        predictors[i] = double.NaN;
                    }
                }
                normalizedPredictorsCollection[pairIdx] = predictors;
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputFeatureFilterCollection[i].ApplyFilter(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                normalizedIdealOutputsCollection[pairIdx] = outputs;
            });

            //Random object initialization
            Random rand = new Random(0);
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection);

            shuffledData.Shuffle(rand);

            //Building of readout units
            for (int unitIdx = 0; unitIdx < Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; unitIdx++)
            {
                List <double[]> idealValueCollection = new List <double[]>(shuffledData.OutputVectorCollection.Count);
                //Transformation of ideal vectors to a single value vectors
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[unitIdx];
                    idealValueCollection.Add(value);
                }
                List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].Name, shuffledData.InputVectorCollection);
                VectorBundle    readoutUnitDataBundle            = new VectorBundle(readoutUnitInputVectorCollection, idealValueCollection);
                TrainedNetworkClusterBuilder readoutUnitBuilder  = new TrainedNetworkClusterBuilder(Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].Name,
                                                                                                    Settings.GetReadoutUnitNetworksCollection(unitIdx),
                                                                                                    DataRange,
                                                                                                    Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].TaskCfg.Type == ReadoutUnit.TaskType.Classification ? BinBorder : double.NaN,
                                                                                                    rand,
                                                                                                    controller
                                                                                                    );
                //Register notification
                readoutUnitBuilder.RegressionEpochDone += OnRegressionEpochDone;
                //Build trained readout unit. Trained unit becomes to be the predicting cluster member
                _readoutUnitCollection[unitIdx] = new ReadoutUnit(unitIdx,
                                                                  readoutUnitBuilder.Build(readoutUnitDataBundle,
                                                                                           Settings.TestDataRatio,
                                                                                           Settings.Folds,
                                                                                           Settings.Repetitions,
                                                                                           new FeatureFilterBase[] { _outputFeatureFilterCollection[unitIdx] }
                                                                                           )
                                                                  );
            }//unitIdx

            //Readout layer is trained and ready
            Trained = true;
            return(new RegressionOverview(ReadoutUnitErrStatCollection));
        }
示例#3
0
        /// <summary>
        /// Builds the cluster.
        /// </summary>
        /// <param name="dataBundle">The data bundle for training.</param>
        /// <param name="filters">The filters to be used to denormalize outputs.</param>
        public TNRNetCluster Build(VectorBundle dataBundle, FeatureFilterBase[] filters)
        {
            VectorBundle localDataBundle = dataBundle.CreateShallowCopy();
            //Cluster of trained networks
            TNRNetCluster cluster = new TNRNetCluster(_clusterName,
                                                      _clusterCfg.Output,
                                                      _clusterCfg.TrainingGroupWeight,
                                                      _clusterCfg.TestingGroupWeight,
                                                      _clusterCfg.SamplesWeight,
                                                      _clusterCfg.NumericalPrecisionWeight,
                                                      _clusterCfg.MisrecognizedFalseWeight,
                                                      _clusterCfg.UnrecognizedTrueWeight
                                                      );

            //Member's training
            ResetProgressTracking();
            for (_repetitionIdx = 0; _repetitionIdx < _crossvalidationCfg.Repetitions; _repetitionIdx++)
            {
                //Data split to folds
                List <VectorBundle> foldCollection = localDataBundle.Folderize(_crossvalidationCfg.FoldDataRatio, _clusterCfg.Output == TNRNet.OutputType.Real ? double.NaN : cluster.OutputDataRange.Mid);
                _numOfFoldsPerRepetition = Math.Min(_crossvalidationCfg.Folds <= 0 ? foldCollection.Count : _crossvalidationCfg.Folds, foldCollection.Count);
                //Train the collection of networks for each processing fold.
                for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++)
                {
                    //Prepare training data bundle
                    VectorBundle trainingData = new VectorBundle();
                    for (int foldIdx = 0; foldIdx < foldCollection.Count; foldIdx++)
                    {
                        if (foldIdx != _testingFoldIdx)
                        {
                            trainingData.Add(foldCollection[foldIdx]);
                        }
                    }
                    for (_netCfgIdx = 0; _netCfgIdx < _clusterCfg.ClusterNetConfigurations.Count; _netCfgIdx++)
                    {
                        TNRNetBuilder netBuilder = new TNRNetBuilder(_clusterName,
                                                                     _clusterCfg.ClusterNetConfigurations[_netCfgIdx],
                                                                     _clusterCfg.Output,
                                                                     trainingData,
                                                                     foldCollection[_testingFoldIdx],
                                                                     _rand,
                                                                     _controller
                                                                     );
                        //Register notification
                        netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged;
                        //Build trained network. Trained network becomes to be the cluster member
                        TNRNet tn = netBuilder.Build();
                        //Build an unique network scope identifier
                        int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx;
                        //Add trained network to a cluster
                        cluster.AddMember(tn, netScopeID, foldCollection[_testingFoldIdx], filters);
                    } //netCfgIdx
                }     //testingFoldIdx
                if (_repetitionIdx < _crossvalidationCfg.Repetitions - 1)
                {
                    //Reshuffle the data
                    localDataBundle.Shuffle(_rand);
                }
            }//repetitionIdx
            //Make the cluster operable
            cluster.FinalizeCluster();
            //Return the built cluster
            return(cluster);
        }
示例#4
0
        /// <summary>
        /// Builds computation cluster of trained networks
        /// </summary>
        /// <param name="dataBundle">Data to be used for training</param>
        /// <param name="testDataRatio">Ratio of test data to be used (determines fold size)</param>
        /// <param name="numOfFolds">Requested number of testing folds (determines number of cluster members). Value LE 0 causes automatic setup. </param>
        /// <param name="repetitions">Defines how many times the generation of folds will be repeated. </param>
        /// <param name="outputFeatureFilterCollection">Output feature filters to be used for output data denormalization.</param>
        public TrainedNetworkCluster Build(VectorBundle dataBundle,
                                           double testDataRatio,
                                           int numOfFolds,
                                           int repetitions,
                                           FeatureFilterBase[] outputFeatureFilterCollection
                                           )
        {
            //Test fold size
            if (testDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test data ratio is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio");
            }
            int testDataSetLength = (int)Math.Round(dataBundle.OutputVectorCollection.Count * testDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of resulting test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio");
            }
            //Number of folds
            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = dataBundle.OutputVectorCollection.Count / testDataSetLength;
            }
            //Cluster of trained networks
            int numOfMembers = numOfFolds * _networkSettingsCollection.Count * repetitions;
            TrainedNetworkCluster cluster = new TrainedNetworkCluster(_clusterName, numOfMembers, _dataRange, _binBorder);

            for (int cycle = 0; cycle < repetitions; cycle++)
            {
                //Data split to folds
                List <VectorBundle> subBundleCollection = dataBundle.Split(testDataSetLength, _binBorder);
                numOfFolds = Math.Min(numOfFolds, subBundleCollection.Count);
                //Train collection of networks for each fold in the cluster.
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    for (int netCfgIdx = 0; netCfgIdx < _networkSettingsCollection.Count; netCfgIdx++)
                    {
                        //Prepare training data bundle
                        VectorBundle trainingData = new VectorBundle();
                        for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                        {
                            if (bundleIdx != foldIdx)
                            {
                                trainingData.Add(subBundleCollection[bundleIdx]);
                            }
                        }
                        TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(_clusterName,
                                                                                     _networkSettingsCollection[netCfgIdx],
                                                                                     (cycle * numOfFolds) + foldIdx + 1,
                                                                                     repetitions * numOfFolds,
                                                                                     netCfgIdx + 1,
                                                                                     _networkSettingsCollection.Count,
                                                                                     trainingData,
                                                                                     subBundleCollection[foldIdx],
                                                                                     _binBorder,
                                                                                     _rand,
                                                                                     _controller
                                                                                     );
                        //Register notification
                        netBuilder.RegressionEpochDone += OnRegressionEpochDone;
                        //Build trained network. Trained network becomes to be the cluster member
                        cluster.Members.Add(netBuilder.Build());
                        //Set member's weight proportionally to train/test number of samples ratio
                        cluster.Weights.Add((double)subBundleCollection[foldIdx].InputVectorCollection.Count / (double)trainingData.InputVectorCollection.Count);
                        //Update cluster error statistics (pesimistic approach)
                        for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                        {
                            double[] nrmComputedValues = cluster.Members.Last().Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx]);
                            for (int i = 0; i < nrmComputedValues.Length; i++)
                            {
                                double naturalComputedValue = outputFeatureFilterCollection[i].ApplyReverse(nrmComputedValues[i]);
                                double naturalIdealValue    = outputFeatureFilterCollection[i].ApplyReverse(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i]);
                                cluster.ErrorStats.Update(nrmComputedValues[i],
                                                          subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i],
                                                          naturalComputedValue,
                                                          naturalIdealValue
                                                          );
                            } //i
                        }     //sampleIdx
                    }         //netCfgIdx
                }             //foldIdx
                if (cycle < repetitions - 1)
                {
                    //Reshuffle data
                    dataBundle.Shuffle(_rand);
                }
            }
            //Return built cluster
            return(cluster);
        }
示例#5
0
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="dataBundle">Collection of input predictors and associated desired output values</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param>
        /// <returns>Returned ResultComparativeBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ResultComparativeBundle Build(VectorBundle dataBundle,
                                             ReadoutUnit.RegressionCallbackDelegate regressionController,
                                             Object regressionControllerData,
                                             PredictorsMapper predictorsMapper = null
                                             )
        {
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new Exception("Number of predictors must be greater tham 0.");
            }
            if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count)
            {
                throw new Exception("Incorrect number of ideal output values in the vector.");
            }

            //Normalization of predictors and output data collections
            //Allocation of normalizers
            _predictorNormalizerCollection = new Normalizer[numOfPredictors];
            for (int i = 0; i < numOfPredictors; i++)
            {
                _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false);
            }
            _outputNormalizerCollection = new Normalizer[numOfOutputs];
            for (int i = 0; i < numOfOutputs; i++)
            {
                bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification);
                _outputNormalizerCollection[i] = new Normalizer(DataRange,
                                                                classificationTask ? 0 : NormalizerDefaultReserve,
                                                                classificationTask ? false : true,
                                                                false
                                                                );
            }
            //Normalizers adjustment
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Checks
                if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors)
                {
                    throw new Exception("Inconsistent number of predictors in the predictors collection.");
                }
                if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs)
                {
                    throw new Exception("Inconsistent number of values in the ideal values collection.");
                }
                //Adjust predictors normalizers
                for (int i = 0; i < numOfPredictors; i++)
                {
                    _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                //Adjust outputs normalizers
                for (int i = 0; i < numOfOutputs; i++)
                {
                    _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
            }
            //Data normalization
            //Allocation
            List <double[]> predictorsCollection   = new List <double[]>(dataBundle.InputVectorCollection.Count);
            List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count);

            //Normalization
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                predictorsCollection.Add(predictors);
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                idealOutputsCollection.Add(outputs);
            }
            //Data processing
            //Random object initialization
            Random rand = new Random(0);

            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation of computed and ideal vectors for result comparative bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[numOfOutputs]);
                validationIdealVectorCollection.Add(new double[numOfOutputs]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(DataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <VectorBundle> subBundleCollection = null;
                List <double[]>     readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection);
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Find best unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue);
                        double natIdealValue    = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        ces.Update(nrmComputedValue,
                                   subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0],
                                   natComputedValue,
                                   natIdealValue);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = natIdealValue;
                        validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }
示例#6
0
        /// <summary>
        /// Builds trained readout layer.
        /// </summary>
        /// <param name="dataBundle">The data to be used for training.</param>
        /// <param name="predictorsMapper">The mapper of specific predictors to readout units (optional).</param>
        /// <param name="controller">The build process controller (optional).</param>
        /// <param name="randomizerSeek">Specifies the random number generator initial seek (optional). A value greater than or equal to 0 will always ensure the same initialization.</param>
        /// <returns>The results of training.</returns>
        public RegressionOverview Build(VectorBundle dataBundle,
                                        PredictorsMapper predictorsMapper = null,
                                        TNRNetBuilder.BuildControllerDelegate controller = null,
                                        int randomizerSeek = 0
                                        )
        {
            if (Trained)
            {
                throw new InvalidOperationException("Readout layer is already built.");
            }
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new InvalidOperationException($"Number of predictors must be greater than 0.");
            }
            if (numOfOutputs != ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count)
            {
                throw new InvalidOperationException($"Incorrect length of output vectors.");
            }
            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation and preparation of feature filters
            //Predictors
            _predictorFeatureFilterCollection = new FeatureFilterBase[numOfPredictors];
            Parallel.For(0, _predictorFeatureFilterCollection.Length, nrmIdx =>
            {
                _predictorFeatureFilterCollection[nrmIdx] = new RealFeatureFilter(InternalDataRange, true, true);
                for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
                {
                    //Adjust filter
                    _predictorFeatureFilterCollection[nrmIdx].Update(dataBundle.InputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Output values
            _outputFeatureFilterCollection = new FeatureFilterBase[numOfOutputs];
            Parallel.For(0, _outputFeatureFilterCollection.Length, nrmIdx =>
            {
                _outputFeatureFilterCollection[nrmIdx] = FeatureFilterFactory.Create(InternalDataRange, ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[nrmIdx].TaskCfg.FeatureFilterCfg);
                for (int pairIdx = 0; pairIdx < dataBundle.OutputVectorCollection.Count; pairIdx++)
                {
                    //Adjust output normalizer
                    _outputFeatureFilterCollection[nrmIdx].Update(dataBundle.OutputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Data normalization
            //Allocation
            double[][] normalizedPredictorsCollection   = new double[dataBundle.InputVectorCollection.Count][];
            double[][] normalizedIdealOutputsCollection = new double[dataBundle.OutputVectorCollection.Count][];
            //Normalization
            Parallel.For(0, dataBundle.InputVectorCollection.Count, pairIdx =>
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    if (_predictorsMapper.PredictorGeneralSwitchCollection[i])
                    {
                        predictors[i] = _predictorFeatureFilterCollection[i].ApplyFilter(dataBundle.InputVectorCollection[pairIdx][i]);
                    }
                    else
                    {
                        predictors[i] = double.NaN;
                    }
                }
                normalizedPredictorsCollection[pairIdx] = predictors;
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputFeatureFilterCollection[i].ApplyFilter(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                normalizedIdealOutputsCollection[pairIdx] = outputs;
            });

            //Random object initialization
            Random rand = (randomizerSeek < 0 ? new Random() : new Random(randomizerSeek));
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection);

            shuffledData.Shuffle(rand);

            //"One Takes All" groups input data space initialization
            List <CompositeResult[]> allReadoutUnitResults = new List <CompositeResult[]>(shuffledData.InputVectorCollection.Count);

            if (_oneTakesAllGroupCollection != null)
            {
                for (int i = 0; i < shuffledData.InputVectorCollection.Count; i++)
                {
                    allReadoutUnitResults.Add(new CompositeResult[ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count]);
                }
            }

            ResetProgressTracking();
            //Building of readout units
            for (_buildReadoutUnitIdx = 0; _buildReadoutUnitIdx < ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; _buildReadoutUnitIdx++)
            {
                List <double[]> idealValueCollection = new List <double[]>(shuffledData.OutputVectorCollection.Count);
                //Transformation of ideal vectors to a single value vectors
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[_buildReadoutUnitIdx];
                    idealValueCollection.Add(value);
                }
                List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[_buildReadoutUnitIdx].Name, shuffledData.InputVectorCollection);
                VectorBundle    readoutUnitDataBundle            = new VectorBundle(readoutUnitInputVectorCollection, idealValueCollection);
                _readoutUnitCollection[_buildReadoutUnitIdx].ReadoutUnitBuildProgressChanged += OnReadoutUnitBuildProgressChanged;
                _readoutUnitCollection[_buildReadoutUnitIdx].Build(readoutUnitDataBundle,
                                                                   _outputFeatureFilterCollection[_buildReadoutUnitIdx],
                                                                   rand,
                                                                   controller
                                                                   );
                //Add unit's all computed results into the input data for "One Takes All" groups
                if (_oneTakesAllGroupCollection != null)
                {
                    for (int sampleIdx = 0; sampleIdx < readoutUnitDataBundle.InputVectorCollection.Count; sampleIdx++)
                    {
                        allReadoutUnitResults[sampleIdx][_buildReadoutUnitIdx] = _readoutUnitCollection[_buildReadoutUnitIdx].Compute(readoutUnitDataBundle.InputVectorCollection[sampleIdx]);
                    }
                }
            }//unitIdx

            //One Takes All groups build
            if (_oneTakesAllGroupCollection != null)
            {
                foreach (OneTakesAllGroup group in _oneTakesAllGroupCollection)
                {
                    //Only the group having inner probabilistic cluster has to be built
                    if (group.DecisionMethod == OneTakesAllGroup.OneTakesAllDecisionMethod.ClusterChain)
                    {
                        BinFeatureFilter[] groupFilters = new BinFeatureFilter[group.NumOfMemberClasses];
                        for (int i = 0; i < group.NumOfMemberClasses; i++)
                        {
                            groupFilters[i] = (BinFeatureFilter)_outputFeatureFilterCollection[group.MemberReadoutUnitIndexCollection[i]];
                        }
                        ++_buildOTAGroupIdx;
                        group.OTAGBuildProgressChanged += OnOTAGBuildProgressChanged;
                        group.Build(allReadoutUnitResults, shuffledData.OutputVectorCollection, groupFilters, rand, controller);
                    }
                }
            }

            //Readout layer is trained and ready
            Trained = true;
            return(new RegressionOverview(ReadoutUnitErrStatCollection));
        }