/// <summary> /// Builds the cluster. /// </summary> /// <param name="dataBundle">The data bundle for training.</param> /// <param name="filters">The filters to be used to denormalize outputs.</param> public TNRNetCluster Build(VectorBundle dataBundle, FeatureFilterBase[] filters) { VectorBundle localDataBundle = dataBundle.CreateShallowCopy(); //Cluster of trained networks TNRNetCluster cluster = new TNRNetCluster(_clusterName, _clusterCfg.Output, _clusterCfg.TrainingGroupWeight, _clusterCfg.TestingGroupWeight, _clusterCfg.SamplesWeight, _clusterCfg.NumericalPrecisionWeight, _clusterCfg.MisrecognizedFalseWeight, _clusterCfg.UnrecognizedTrueWeight ); //Member's training ResetProgressTracking(); for (_repetitionIdx = 0; _repetitionIdx < _crossvalidationCfg.Repetitions; _repetitionIdx++) { //Data split to folds List <VectorBundle> foldCollection = localDataBundle.Folderize(_crossvalidationCfg.FoldDataRatio, _clusterCfg.Output == TNRNet.OutputType.Real ? double.NaN : cluster.OutputDataRange.Mid); _numOfFoldsPerRepetition = Math.Min(_crossvalidationCfg.Folds <= 0 ? foldCollection.Count : _crossvalidationCfg.Folds, foldCollection.Count); //Train the collection of networks for each processing fold. for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int foldIdx = 0; foldIdx < foldCollection.Count; foldIdx++) { if (foldIdx != _testingFoldIdx) { trainingData.Add(foldCollection[foldIdx]); } } for (_netCfgIdx = 0; _netCfgIdx < _clusterCfg.ClusterNetConfigurations.Count; _netCfgIdx++) { TNRNetBuilder netBuilder = new TNRNetBuilder(_clusterName, _clusterCfg.ClusterNetConfigurations[_netCfgIdx], _clusterCfg.Output, trainingData, foldCollection[_testingFoldIdx], _rand, _controller ); //Register notification netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged; //Build trained network. Trained network becomes to be the cluster member TNRNet tn = netBuilder.Build(); //Build an unique network scope identifier int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx; //Add trained network to a cluster cluster.AddMember(tn, netScopeID, foldCollection[_testingFoldIdx], filters); } //netCfgIdx } //testingFoldIdx if (_repetitionIdx < _crossvalidationCfg.Repetitions - 1) { //Reshuffle the data localDataBundle.Shuffle(_rand); } }//repetitionIdx //Make the cluster operable cluster.FinalizeCluster(); //Return the built cluster return(cluster); }
/// <summary> /// Builds the cluster chain. /// </summary> /// <param name="dataBundle">The data bundle for training.</param> /// <param name="filters">The filters to be used to denormalize outputs.</param> public TNRNetClusterChain Build(VectorBundle dataBundle, FeatureFilterBase[] filters) { //The chain to be built TNRNetClusterChain chain = new TNRNetClusterChain(_chainName, _clusterChainCfg.Output); //Instantiate chained clusters List <TNRNetCluster> chainClusters = new List <TNRNetCluster>(_clusterChainCfg.ClusterCfgCollection.Count); for (int clusterIdx = 0; clusterIdx < _clusterChainCfg.ClusterCfgCollection.Count; clusterIdx++) { //Cluster chainClusters.Add(new TNRNetCluster(_chainName, _clusterChainCfg.ClusterCfgCollection[clusterIdx].Output, _clusterChainCfg.ClusterCfgCollection[clusterIdx].TrainingGroupWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].TestingGroupWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].SamplesWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].NumericalPrecisionWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].MisrecognizedFalseWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].UnrecognizedTrueWeight ) ); } //Common crossvalidation configuration double boolBorder = _clusterChainCfg.Output == TNRNet.OutputType.Real ? double.NaN : chain.OutputDataRange.Mid; VectorBundle localDataBundle = dataBundle.CreateShallowCopy(); //Member's training ResetProgressTracking(); for (_repetitionIdx = 0; _repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions; _repetitionIdx++) { //Split data to folds List <VectorBundle> foldCollection = localDataBundle.Folderize(_clusterChainCfg.CrossvalidationCfg.FoldDataRatio, boolBorder); _numOfFoldsPerRepetition = Math.Min(_clusterChainCfg.CrossvalidationCfg.Folds <= 0 ? foldCollection.Count : _clusterChainCfg.CrossvalidationCfg.Folds, foldCollection.Count); List <VectorBundle> currentClusterFoldCollection = CopyFolds(foldCollection); List <VectorBundle> nextClusterFoldCollection = new List <VectorBundle>(foldCollection.Count); //For each cluster for (_clusterIdx = 0; _clusterIdx < chainClusters.Count; _clusterIdx++) { //Train networks for each testing fold. for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int foldIdx = 0; foldIdx < currentClusterFoldCollection.Count; foldIdx++) { if (foldIdx != _testingFoldIdx) { trainingData.Add(currentClusterFoldCollection[foldIdx]); } } VectorBundle nextClusterUpdatedDataFold = foldCollection[_testingFoldIdx].CreateShallowCopy(); for (_netCfgIdx = 0; _netCfgIdx < _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations.Count; _netCfgIdx++) { TNRNetBuilder netBuilder = new TNRNetBuilder(_chainName, _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations[_netCfgIdx], _clusterChainCfg.ClusterCfgCollection[_clusterIdx].Output, trainingData, currentClusterFoldCollection[_testingFoldIdx], _rand, _controller ); //Register notification netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged; //Build trained network. Trained network becomes to be the cluster member TNRNet tn = netBuilder.Build(); int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx; chainClusters[_clusterIdx].AddMember(tn, netScopeID, currentClusterFoldCollection[_testingFoldIdx], filters); //Update input data in the data fold for the next cluster for (int sampleIdx = 0; sampleIdx < currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection.Count; sampleIdx++) { double[] computedNetData = tn.Network.Compute(currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection[sampleIdx]); nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx] = nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx].Concat(computedNetData); } }//netCfgIdx //Add updated data fold for the next cluster nextClusterFoldCollection.Add(nextClusterUpdatedDataFold); }//testingFoldIdx //Switch fold collection currentClusterFoldCollection = nextClusterFoldCollection; nextClusterFoldCollection = new List <VectorBundle>(currentClusterFoldCollection.Count); }//clusterIdx if (_repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions - 1) { //Reshuffle the data localDataBundle.Shuffle(_rand); } }//repetitionIdx //Make the clusters operable and add them into the chain for (int clusterIdx = 0; clusterIdx < chainClusters.Count; clusterIdx++) { chainClusters[clusterIdx].FinalizeCluster(); chain.AddCluster(chainClusters[clusterIdx]); } //Return the built chain return(chain); }
/// <summary> /// Builds computation cluster of trained networks /// </summary> /// <param name="dataBundle">Data to be used for training</param> /// <param name="testDataRatio">Ratio of test data to be used (determines fold size)</param> /// <param name="numOfFolds">Requested number of testing folds (determines number of cluster members). Value LE 0 causes automatic setup. </param> /// <param name="repetitions">Defines how many times the generation of folds will be repeated. </param> /// <param name="outputFeatureFilterCollection">Output feature filters to be used for output data denormalization.</param> public TrainedNetworkCluster Build(VectorBundle dataBundle, double testDataRatio, int numOfFolds, int repetitions, FeatureFilterBase[] outputFeatureFilterCollection ) { //Test fold size if (testDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test data ratio is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio"); } int testDataSetLength = (int)Math.Round(dataBundle.OutputVectorCollection.Count * testDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of resulting test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio"); } //Number of folds if (numOfFolds <= 0) { //Auto setup numOfFolds = dataBundle.OutputVectorCollection.Count / testDataSetLength; } //Cluster of trained networks int numOfMembers = numOfFolds * _networkSettingsCollection.Count * repetitions; TrainedNetworkCluster cluster = new TrainedNetworkCluster(_clusterName, numOfMembers, _dataRange, _binBorder); for (int cycle = 0; cycle < repetitions; cycle++) { //Data split to folds List <VectorBundle> subBundleCollection = dataBundle.Split(testDataSetLength, _binBorder); numOfFolds = Math.Min(numOfFolds, subBundleCollection.Count); //Train collection of networks for each fold in the cluster. for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { for (int netCfgIdx = 0; netCfgIdx < _networkSettingsCollection.Count; netCfgIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingData.Add(subBundleCollection[bundleIdx]); } } TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(_clusterName, _networkSettingsCollection[netCfgIdx], (cycle * numOfFolds) + foldIdx + 1, repetitions * numOfFolds, netCfgIdx + 1, _networkSettingsCollection.Count, trainingData, subBundleCollection[foldIdx], _binBorder, _rand, _controller ); //Register notification netBuilder.RegressionEpochDone += OnRegressionEpochDone; //Build trained network. Trained network becomes to be the cluster member cluster.Members.Add(netBuilder.Build()); //Set member's weight proportionally to train/test number of samples ratio cluster.Weights.Add((double)subBundleCollection[foldIdx].InputVectorCollection.Count / (double)trainingData.InputVectorCollection.Count); //Update cluster error statistics (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double[] nrmComputedValues = cluster.Members.Last().Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx]); for (int i = 0; i < nrmComputedValues.Length; i++) { double naturalComputedValue = outputFeatureFilterCollection[i].ApplyReverse(nrmComputedValues[i]); double naturalIdealValue = outputFeatureFilterCollection[i].ApplyReverse(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i]); cluster.ErrorStats.Update(nrmComputedValues[i], subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i], naturalComputedValue, naturalIdealValue ); } //i } //sampleIdx } //netCfgIdx } //foldIdx if (cycle < repetitions - 1) { //Reshuffle data dataBundle.Shuffle(_rand); } } //Return built cluster return(cluster); }