/// <summary> /// Builds the cluster chain. /// </summary> /// <param name="dataBundle">The data bundle for training.</param> /// <param name="filters">The filters to be used to denormalize outputs.</param> public TNRNetClusterChain Build(VectorBundle dataBundle, FeatureFilterBase[] filters) { //The chain to be built TNRNetClusterChain chain = new TNRNetClusterChain(_chainName, _clusterChainCfg.Output); //Instantiate chained clusters List <TNRNetCluster> chainClusters = new List <TNRNetCluster>(_clusterChainCfg.ClusterCfgCollection.Count); for (int clusterIdx = 0; clusterIdx < _clusterChainCfg.ClusterCfgCollection.Count; clusterIdx++) { //Cluster chainClusters.Add(new TNRNetCluster(_chainName, _clusterChainCfg.ClusterCfgCollection[clusterIdx].Output, _clusterChainCfg.ClusterCfgCollection[clusterIdx].TrainingGroupWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].TestingGroupWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].SamplesWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].NumericalPrecisionWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].MisrecognizedFalseWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].UnrecognizedTrueWeight ) ); } //Common crossvalidation configuration double boolBorder = _clusterChainCfg.Output == TNRNet.OutputType.Real ? double.NaN : chain.OutputDataRange.Mid; VectorBundle localDataBundle = dataBundle.CreateShallowCopy(); //Member's training ResetProgressTracking(); for (_repetitionIdx = 0; _repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions; _repetitionIdx++) { //Split data to folds List <VectorBundle> foldCollection = localDataBundle.Folderize(_clusterChainCfg.CrossvalidationCfg.FoldDataRatio, boolBorder); _numOfFoldsPerRepetition = Math.Min(_clusterChainCfg.CrossvalidationCfg.Folds <= 0 ? foldCollection.Count : _clusterChainCfg.CrossvalidationCfg.Folds, foldCollection.Count); List <VectorBundle> currentClusterFoldCollection = CopyFolds(foldCollection); List <VectorBundle> nextClusterFoldCollection = new List <VectorBundle>(foldCollection.Count); //For each cluster for (_clusterIdx = 0; _clusterIdx < chainClusters.Count; _clusterIdx++) { //Train networks for each testing fold. for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int foldIdx = 0; foldIdx < currentClusterFoldCollection.Count; foldIdx++) { if (foldIdx != _testingFoldIdx) { trainingData.Add(currentClusterFoldCollection[foldIdx]); } } VectorBundle nextClusterUpdatedDataFold = foldCollection[_testingFoldIdx].CreateShallowCopy(); for (_netCfgIdx = 0; _netCfgIdx < _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations.Count; _netCfgIdx++) { TNRNetBuilder netBuilder = new TNRNetBuilder(_chainName, _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations[_netCfgIdx], _clusterChainCfg.ClusterCfgCollection[_clusterIdx].Output, trainingData, currentClusterFoldCollection[_testingFoldIdx], _rand, _controller ); //Register notification netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged; //Build trained network. Trained network becomes to be the cluster member TNRNet tn = netBuilder.Build(); int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx; chainClusters[_clusterIdx].AddMember(tn, netScopeID, currentClusterFoldCollection[_testingFoldIdx], filters); //Update input data in the data fold for the next cluster for (int sampleIdx = 0; sampleIdx < currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection.Count; sampleIdx++) { double[] computedNetData = tn.Network.Compute(currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection[sampleIdx]); nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx] = nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx].Concat(computedNetData); } }//netCfgIdx //Add updated data fold for the next cluster nextClusterFoldCollection.Add(nextClusterUpdatedDataFold); }//testingFoldIdx //Switch fold collection currentClusterFoldCollection = nextClusterFoldCollection; nextClusterFoldCollection = new List <VectorBundle>(currentClusterFoldCollection.Count); }//clusterIdx if (_repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions - 1) { //Reshuffle the data localDataBundle.Shuffle(_rand); } }//repetitionIdx //Make the clusters operable and add them into the chain for (int clusterIdx = 0; clusterIdx < chainClusters.Count; clusterIdx++) { chainClusters[clusterIdx].FinalizeCluster(); chain.AddCluster(chainClusters[clusterIdx]); } //Return the built chain return(chain); }
/// <summary> /// Builds trained readout layer. /// </summary> /// <param name="dataBundle">Collection of input predictors and associated desired output values</param> /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param> /// <param name="controller">Optional external regression controller</param> /// <returns>Results of the regression</returns> public RegressionOverview Build(VectorBundle dataBundle, PredictorsMapper predictorsMapper = null, TrainedNetworkBuilder.RegressionControllerDelegate controller = null ) { //Basic checks int numOfPredictors = dataBundle.InputVectorCollection[0].Length; int numOfOutputs = dataBundle.OutputVectorCollection[0].Length; if (numOfPredictors == 0) { throw new InvalidOperationException($"Number of predictors must be greater tham 0."); } if (numOfOutputs != Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count) { throw new InvalidOperationException($"Incorrect length of output vectors."); } //Predictors mapper (specified or default) _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors); //Allocation and preparation of feature filters //Predictors _predictorFeatureFilterCollection = new FeatureFilterBase[numOfPredictors]; Parallel.For(0, _predictorFeatureFilterCollection.Length, nrmIdx => { _predictorFeatureFilterCollection[nrmIdx] = new RealFeatureFilter(DataRange, true, true); for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Adjust filter _predictorFeatureFilterCollection[nrmIdx].Update(dataBundle.InputVectorCollection[pairIdx][nrmIdx]); } }); //Output values _outputFeatureFilterCollection = new FeatureFilterBase[numOfOutputs]; Parallel.For(0, _outputFeatureFilterCollection.Length, nrmIdx => { _outputFeatureFilterCollection[nrmIdx] = FeatureFilterFactory.Create(DataRange, Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[nrmIdx].TaskCfg.FeatureFilterCfg); for (int pairIdx = 0; pairIdx < dataBundle.OutputVectorCollection.Count; pairIdx++) { //Adjust output normalizer _outputFeatureFilterCollection[nrmIdx].Update(dataBundle.OutputVectorCollection[pairIdx][nrmIdx]); } }); //Data normalization //Allocation double[][] normalizedPredictorsCollection = new double[dataBundle.InputVectorCollection.Count][]; double[][] normalizedIdealOutputsCollection = new double[dataBundle.OutputVectorCollection.Count][]; //Normalization Parallel.For(0, dataBundle.InputVectorCollection.Count, pairIdx => { //Predictors double[] predictors = new double[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { if (_predictorsMapper.PredictorGeneralSwitchCollection[i]) { predictors[i] = _predictorFeatureFilterCollection[i].ApplyFilter(dataBundle.InputVectorCollection[pairIdx][i]); } else { predictors[i] = double.NaN; } } normalizedPredictorsCollection[pairIdx] = predictors; //Outputs double[] outputs = new double[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { outputs[i] = _outputFeatureFilterCollection[i].ApplyFilter(dataBundle.OutputVectorCollection[pairIdx][i]); } normalizedIdealOutputsCollection[pairIdx] = outputs; }); //Random object initialization Random rand = new Random(0); //Create shuffled copy of the data VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection); shuffledData.Shuffle(rand); //Building of readout units for (int unitIdx = 0; unitIdx < Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; unitIdx++) { List <double[]> idealValueCollection = new List <double[]>(shuffledData.OutputVectorCollection.Count); //Transformation of ideal vectors to a single value vectors foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[unitIdx]; idealValueCollection.Add(value); } List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].Name, shuffledData.InputVectorCollection); VectorBundle readoutUnitDataBundle = new VectorBundle(readoutUnitInputVectorCollection, idealValueCollection); TrainedNetworkClusterBuilder readoutUnitBuilder = new TrainedNetworkClusterBuilder(Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].Name, Settings.GetReadoutUnitNetworksCollection(unitIdx), DataRange, Settings.ReadoutUnitsCfg.ReadoutUnitCfgCollection[unitIdx].TaskCfg.Type == ReadoutUnit.TaskType.Classification ? BinBorder : double.NaN, rand, controller ); //Register notification readoutUnitBuilder.RegressionEpochDone += OnRegressionEpochDone; //Build trained readout unit. Trained unit becomes to be the predicting cluster member _readoutUnitCollection[unitIdx] = new ReadoutUnit(unitIdx, readoutUnitBuilder.Build(readoutUnitDataBundle, Settings.TestDataRatio, Settings.Folds, Settings.Repetitions, new FeatureFilterBase[] { _outputFeatureFilterCollection[unitIdx] } ) ); }//unitIdx //Readout layer is trained and ready Trained = true; return(new RegressionOverview(ReadoutUnitErrStatCollection)); }
/// <summary> /// Builds the cluster. /// </summary> /// <param name="dataBundle">The data bundle for training.</param> /// <param name="filters">The filters to be used to denormalize outputs.</param> public TNRNetCluster Build(VectorBundle dataBundle, FeatureFilterBase[] filters) { VectorBundle localDataBundle = dataBundle.CreateShallowCopy(); //Cluster of trained networks TNRNetCluster cluster = new TNRNetCluster(_clusterName, _clusterCfg.Output, _clusterCfg.TrainingGroupWeight, _clusterCfg.TestingGroupWeight, _clusterCfg.SamplesWeight, _clusterCfg.NumericalPrecisionWeight, _clusterCfg.MisrecognizedFalseWeight, _clusterCfg.UnrecognizedTrueWeight ); //Member's training ResetProgressTracking(); for (_repetitionIdx = 0; _repetitionIdx < _crossvalidationCfg.Repetitions; _repetitionIdx++) { //Data split to folds List <VectorBundle> foldCollection = localDataBundle.Folderize(_crossvalidationCfg.FoldDataRatio, _clusterCfg.Output == TNRNet.OutputType.Real ? double.NaN : cluster.OutputDataRange.Mid); _numOfFoldsPerRepetition = Math.Min(_crossvalidationCfg.Folds <= 0 ? foldCollection.Count : _crossvalidationCfg.Folds, foldCollection.Count); //Train the collection of networks for each processing fold. for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int foldIdx = 0; foldIdx < foldCollection.Count; foldIdx++) { if (foldIdx != _testingFoldIdx) { trainingData.Add(foldCollection[foldIdx]); } } for (_netCfgIdx = 0; _netCfgIdx < _clusterCfg.ClusterNetConfigurations.Count; _netCfgIdx++) { TNRNetBuilder netBuilder = new TNRNetBuilder(_clusterName, _clusterCfg.ClusterNetConfigurations[_netCfgIdx], _clusterCfg.Output, trainingData, foldCollection[_testingFoldIdx], _rand, _controller ); //Register notification netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged; //Build trained network. Trained network becomes to be the cluster member TNRNet tn = netBuilder.Build(); //Build an unique network scope identifier int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx; //Add trained network to a cluster cluster.AddMember(tn, netScopeID, foldCollection[_testingFoldIdx], filters); } //netCfgIdx } //testingFoldIdx if (_repetitionIdx < _crossvalidationCfg.Repetitions - 1) { //Reshuffle the data localDataBundle.Shuffle(_rand); } }//repetitionIdx //Make the cluster operable cluster.FinalizeCluster(); //Return the built cluster return(cluster); }
/// <summary> /// Builds computation cluster of trained networks /// </summary> /// <param name="dataBundle">Data to be used for training</param> /// <param name="testDataRatio">Ratio of test data to be used (determines fold size)</param> /// <param name="numOfFolds">Requested number of testing folds (determines number of cluster members). Value LE 0 causes automatic setup. </param> /// <param name="repetitions">Defines how many times the generation of folds will be repeated. </param> /// <param name="outputFeatureFilterCollection">Output feature filters to be used for output data denormalization.</param> public TrainedNetworkCluster Build(VectorBundle dataBundle, double testDataRatio, int numOfFolds, int repetitions, FeatureFilterBase[] outputFeatureFilterCollection ) { //Test fold size if (testDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test data ratio is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio"); } int testDataSetLength = (int)Math.Round(dataBundle.OutputVectorCollection.Count * testDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of resulting test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio"); } //Number of folds if (numOfFolds <= 0) { //Auto setup numOfFolds = dataBundle.OutputVectorCollection.Count / testDataSetLength; } //Cluster of trained networks int numOfMembers = numOfFolds * _networkSettingsCollection.Count * repetitions; TrainedNetworkCluster cluster = new TrainedNetworkCluster(_clusterName, numOfMembers, _dataRange, _binBorder); for (int cycle = 0; cycle < repetitions; cycle++) { //Data split to folds List <VectorBundle> subBundleCollection = dataBundle.Split(testDataSetLength, _binBorder); numOfFolds = Math.Min(numOfFolds, subBundleCollection.Count); //Train collection of networks for each fold in the cluster. for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { for (int netCfgIdx = 0; netCfgIdx < _networkSettingsCollection.Count; netCfgIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingData.Add(subBundleCollection[bundleIdx]); } } TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(_clusterName, _networkSettingsCollection[netCfgIdx], (cycle * numOfFolds) + foldIdx + 1, repetitions * numOfFolds, netCfgIdx + 1, _networkSettingsCollection.Count, trainingData, subBundleCollection[foldIdx], _binBorder, _rand, _controller ); //Register notification netBuilder.RegressionEpochDone += OnRegressionEpochDone; //Build trained network. Trained network becomes to be the cluster member cluster.Members.Add(netBuilder.Build()); //Set member's weight proportionally to train/test number of samples ratio cluster.Weights.Add((double)subBundleCollection[foldIdx].InputVectorCollection.Count / (double)trainingData.InputVectorCollection.Count); //Update cluster error statistics (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double[] nrmComputedValues = cluster.Members.Last().Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx]); for (int i = 0; i < nrmComputedValues.Length; i++) { double naturalComputedValue = outputFeatureFilterCollection[i].ApplyReverse(nrmComputedValues[i]); double naturalIdealValue = outputFeatureFilterCollection[i].ApplyReverse(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i]); cluster.ErrorStats.Update(nrmComputedValues[i], subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i], naturalComputedValue, naturalIdealValue ); } //i } //sampleIdx } //netCfgIdx } //foldIdx if (cycle < repetitions - 1) { //Reshuffle data dataBundle.Shuffle(_rand); } } //Return built cluster return(cluster); }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="dataBundle">Collection of input predictors and associated desired output values</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param> /// <returns>Returned ResultComparativeBundle is something like a protocol. /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values. /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy. /// </returns> public ResultComparativeBundle Build(VectorBundle dataBundle, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData, PredictorsMapper predictorsMapper = null ) { //Basic checks int numOfPredictors = dataBundle.InputVectorCollection[0].Length; int numOfOutputs = dataBundle.OutputVectorCollection[0].Length; if (numOfPredictors == 0) { throw new Exception("Number of predictors must be greater tham 0."); } if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count) { throw new Exception("Incorrect number of ideal output values in the vector."); } //Normalization of predictors and output data collections //Allocation of normalizers _predictorNormalizerCollection = new Normalizer[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false); } _outputNormalizerCollection = new Normalizer[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification); _outputNormalizerCollection[i] = new Normalizer(DataRange, classificationTask ? 0 : NormalizerDefaultReserve, classificationTask ? false : true, false ); } //Normalizers adjustment for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Checks if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors) { throw new Exception("Inconsistent number of predictors in the predictors collection."); } if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs) { throw new Exception("Inconsistent number of values in the ideal values collection."); } //Adjust predictors normalizers for (int i = 0; i < numOfPredictors; i++) { _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]); } //Adjust outputs normalizers for (int i = 0; i < numOfOutputs; i++) { _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]); } } //Data normalization //Allocation List <double[]> predictorsCollection = new List <double[]>(dataBundle.InputVectorCollection.Count); List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count); //Normalization for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Predictors double[] predictors = new double[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]); } predictorsCollection.Add(predictors); //Outputs double[] outputs = new double[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]); } idealOutputsCollection.Add(outputs); } //Data processing //Random object initialization Random rand = new Random(0); //Predictors mapper (specified or default) _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors); //Allocation of computed and ideal vectors for result comparative bundle List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count); List <double[]> validationIdealVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { validationComputedVectorCollection.Add(new double[numOfOutputs]); validationIdealVectorCollection.Add(new double[numOfOutputs]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(DataRange.Mid); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <VectorBundle> subBundleCollection = null; List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection); //Datasets preparation is depending on the task type if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Forecast task subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection, idealValueCollection, testDataSetLength ); } //Find best unit per each fold in the cluster. ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr); int arrayPos = 0; for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression to get the best fold's readout unit. //The best unit becomes to be the predicting cluster member. _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, clusterIdx, foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, rand, _settings.ReadoutUnitCfgCollection[clusterIdx], regressionController, regressionControllerData ); //Cluster error statistics & data for validation bundle (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0]; double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue); double natIdealValue = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]); ces.Update(nrmComputedValue, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0], natComputedValue, natIdealValue); validationIdealVectorCollection[arrayPos][clusterIdx] = natIdealValue; validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue; ++arrayPos; } } //foldIdx _clusterErrStatisticsCollection.Add(ces); } //clusterIdx //Validation bundle is returned. return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection)); }
/// <summary> /// Builds trained readout layer. /// </summary> /// <param name="dataBundle">The data to be used for training.</param> /// <param name="predictorsMapper">The mapper of specific predictors to readout units (optional).</param> /// <param name="controller">The build process controller (optional).</param> /// <param name="randomizerSeek">Specifies the random number generator initial seek (optional). A value greater than or equal to 0 will always ensure the same initialization.</param> /// <returns>The results of training.</returns> public RegressionOverview Build(VectorBundle dataBundle, PredictorsMapper predictorsMapper = null, TNRNetBuilder.BuildControllerDelegate controller = null, int randomizerSeek = 0 ) { if (Trained) { throw new InvalidOperationException("Readout layer is already built."); } //Basic checks int numOfPredictors = dataBundle.InputVectorCollection[0].Length; int numOfOutputs = dataBundle.OutputVectorCollection[0].Length; if (numOfPredictors == 0) { throw new InvalidOperationException($"Number of predictors must be greater than 0."); } if (numOfOutputs != ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count) { throw new InvalidOperationException($"Incorrect length of output vectors."); } //Predictors mapper (specified or default) _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors); //Allocation and preparation of feature filters //Predictors _predictorFeatureFilterCollection = new FeatureFilterBase[numOfPredictors]; Parallel.For(0, _predictorFeatureFilterCollection.Length, nrmIdx => { _predictorFeatureFilterCollection[nrmIdx] = new RealFeatureFilter(InternalDataRange, true, true); for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Adjust filter _predictorFeatureFilterCollection[nrmIdx].Update(dataBundle.InputVectorCollection[pairIdx][nrmIdx]); } }); //Output values _outputFeatureFilterCollection = new FeatureFilterBase[numOfOutputs]; Parallel.For(0, _outputFeatureFilterCollection.Length, nrmIdx => { _outputFeatureFilterCollection[nrmIdx] = FeatureFilterFactory.Create(InternalDataRange, ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[nrmIdx].TaskCfg.FeatureFilterCfg); for (int pairIdx = 0; pairIdx < dataBundle.OutputVectorCollection.Count; pairIdx++) { //Adjust output normalizer _outputFeatureFilterCollection[nrmIdx].Update(dataBundle.OutputVectorCollection[pairIdx][nrmIdx]); } }); //Data normalization //Allocation double[][] normalizedPredictorsCollection = new double[dataBundle.InputVectorCollection.Count][]; double[][] normalizedIdealOutputsCollection = new double[dataBundle.OutputVectorCollection.Count][]; //Normalization Parallel.For(0, dataBundle.InputVectorCollection.Count, pairIdx => { //Predictors double[] predictors = new double[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { if (_predictorsMapper.PredictorGeneralSwitchCollection[i]) { predictors[i] = _predictorFeatureFilterCollection[i].ApplyFilter(dataBundle.InputVectorCollection[pairIdx][i]); } else { predictors[i] = double.NaN; } } normalizedPredictorsCollection[pairIdx] = predictors; //Outputs double[] outputs = new double[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { outputs[i] = _outputFeatureFilterCollection[i].ApplyFilter(dataBundle.OutputVectorCollection[pairIdx][i]); } normalizedIdealOutputsCollection[pairIdx] = outputs; }); //Random object initialization Random rand = (randomizerSeek < 0 ? new Random() : new Random(randomizerSeek)); //Create shuffled copy of the data VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection); shuffledData.Shuffle(rand); //"One Takes All" groups input data space initialization List <CompositeResult[]> allReadoutUnitResults = new List <CompositeResult[]>(shuffledData.InputVectorCollection.Count); if (_oneTakesAllGroupCollection != null) { for (int i = 0; i < shuffledData.InputVectorCollection.Count; i++) { allReadoutUnitResults.Add(new CompositeResult[ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count]); } } ResetProgressTracking(); //Building of readout units for (_buildReadoutUnitIdx = 0; _buildReadoutUnitIdx < ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; _buildReadoutUnitIdx++) { List <double[]> idealValueCollection = new List <double[]>(shuffledData.OutputVectorCollection.Count); //Transformation of ideal vectors to a single value vectors foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[_buildReadoutUnitIdx]; idealValueCollection.Add(value); } List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[_buildReadoutUnitIdx].Name, shuffledData.InputVectorCollection); VectorBundle readoutUnitDataBundle = new VectorBundle(readoutUnitInputVectorCollection, idealValueCollection); _readoutUnitCollection[_buildReadoutUnitIdx].ReadoutUnitBuildProgressChanged += OnReadoutUnitBuildProgressChanged; _readoutUnitCollection[_buildReadoutUnitIdx].Build(readoutUnitDataBundle, _outputFeatureFilterCollection[_buildReadoutUnitIdx], rand, controller ); //Add unit's all computed results into the input data for "One Takes All" groups if (_oneTakesAllGroupCollection != null) { for (int sampleIdx = 0; sampleIdx < readoutUnitDataBundle.InputVectorCollection.Count; sampleIdx++) { allReadoutUnitResults[sampleIdx][_buildReadoutUnitIdx] = _readoutUnitCollection[_buildReadoutUnitIdx].Compute(readoutUnitDataBundle.InputVectorCollection[sampleIdx]); } } }//unitIdx //One Takes All groups build if (_oneTakesAllGroupCollection != null) { foreach (OneTakesAllGroup group in _oneTakesAllGroupCollection) { //Only the group having inner probabilistic cluster has to be built if (group.DecisionMethod == OneTakesAllGroup.OneTakesAllDecisionMethod.ClusterChain) { BinFeatureFilter[] groupFilters = new BinFeatureFilter[group.NumOfMemberClasses]; for (int i = 0; i < group.NumOfMemberClasses; i++) { groupFilters[i] = (BinFeatureFilter)_outputFeatureFilterCollection[group.MemberReadoutUnitIndexCollection[i]]; } ++_buildOTAGroupIdx; group.OTAGBuildProgressChanged += OnOTAGBuildProgressChanged; group.Build(allReadoutUnitResults, shuffledData.OutputVectorCollection, groupFilters, rand, controller); } } } //Readout layer is trained and ready Trained = true; return(new RegressionOverview(ReadoutUnitErrStatCollection)); }