private void TestDataBundleFolderization(string dataFile, int numOfClasses) { //Load csv data CsvDataHolder csvData = new CsvDataHolder(dataFile); //Convert csv data to a VectorBundle VectorBundle vectorData = VectorBundle.Load(csvData, numOfClasses); double binBorder = 0.5d; double[] foldDataRatios = { -1d, 0d, 0.1d, 0.5d, 0.75d, 1d, 2d }; Console.WriteLine($"Folderization test of {dataFile}. NumOfSamples={vectorData.InputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)}, NumOfFoldDataRatios={foldDataRatios.Length.ToString(CultureInfo.InvariantCulture)}"); foreach (double foldDataRatio in foldDataRatios) { Console.WriteLine($" Testing fold data ratio = {foldDataRatio.ToString(CultureInfo.InvariantCulture)}"); List <VectorBundle> folds = vectorData.Folderize(foldDataRatio, binBorder); Console.WriteLine($" Number of resulting folds = {folds.Count.ToString(CultureInfo.InvariantCulture)}"); for (int foldIdx = 0; foldIdx < folds.Count; foldIdx++) { int numOfFoldSamples = folds[foldIdx].InputVectorCollection.Count; Console.WriteLine($" FoldIdx={foldIdx.ToString(CultureInfo.InvariantCulture),-4} FoldSize={numOfFoldSamples.ToString(CultureInfo.InvariantCulture),-4}"); int[] classesBin1Counts = new int[numOfClasses]; classesBin1Counts.Populate(0); for (int sampleIdx = 0; sampleIdx < numOfFoldSamples; sampleIdx++) { for (int classIdx = 0; classIdx < numOfClasses; classIdx++) { if (folds[foldIdx].OutputVectorCollection[sampleIdx][classIdx] >= binBorder) { ++classesBin1Counts[classIdx]; } } } Console.WriteLine($" Number of positive samples per class"); for (int classIdx = 0; classIdx < numOfClasses; classIdx++) { Console.WriteLine($" ClassID={classIdx.ToString(CultureInfo.InvariantCulture),-3}, Bin1Samples={classesBin1Counts[classIdx].ToString(CultureInfo.InvariantCulture)}"); } } Console.ReadLine(); } return; }
/// <summary> /// Builds the cluster chain. /// </summary> /// <param name="dataBundle">The data bundle for training.</param> /// <param name="filters">The filters to be used to denormalize outputs.</param> public TNRNetClusterChain Build(VectorBundle dataBundle, FeatureFilterBase[] filters) { //The chain to be built TNRNetClusterChain chain = new TNRNetClusterChain(_chainName, _clusterChainCfg.Output); //Instantiate chained clusters List <TNRNetCluster> chainClusters = new List <TNRNetCluster>(_clusterChainCfg.ClusterCfgCollection.Count); for (int clusterIdx = 0; clusterIdx < _clusterChainCfg.ClusterCfgCollection.Count; clusterIdx++) { //Cluster chainClusters.Add(new TNRNetCluster(_chainName, _clusterChainCfg.ClusterCfgCollection[clusterIdx].Output, _clusterChainCfg.ClusterCfgCollection[clusterIdx].TrainingGroupWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].TestingGroupWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].SamplesWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].NumericalPrecisionWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].MisrecognizedFalseWeight, _clusterChainCfg.ClusterCfgCollection[clusterIdx].UnrecognizedTrueWeight ) ); } //Common crossvalidation configuration double boolBorder = _clusterChainCfg.Output == TNRNet.OutputType.Real ? double.NaN : chain.OutputDataRange.Mid; VectorBundle localDataBundle = dataBundle.CreateShallowCopy(); //Member's training ResetProgressTracking(); for (_repetitionIdx = 0; _repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions; _repetitionIdx++) { //Split data to folds List <VectorBundle> foldCollection = localDataBundle.Folderize(_clusterChainCfg.CrossvalidationCfg.FoldDataRatio, boolBorder); _numOfFoldsPerRepetition = Math.Min(_clusterChainCfg.CrossvalidationCfg.Folds <= 0 ? foldCollection.Count : _clusterChainCfg.CrossvalidationCfg.Folds, foldCollection.Count); List <VectorBundle> currentClusterFoldCollection = CopyFolds(foldCollection); List <VectorBundle> nextClusterFoldCollection = new List <VectorBundle>(foldCollection.Count); //For each cluster for (_clusterIdx = 0; _clusterIdx < chainClusters.Count; _clusterIdx++) { //Train networks for each testing fold. for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int foldIdx = 0; foldIdx < currentClusterFoldCollection.Count; foldIdx++) { if (foldIdx != _testingFoldIdx) { trainingData.Add(currentClusterFoldCollection[foldIdx]); } } VectorBundle nextClusterUpdatedDataFold = foldCollection[_testingFoldIdx].CreateShallowCopy(); for (_netCfgIdx = 0; _netCfgIdx < _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations.Count; _netCfgIdx++) { TNRNetBuilder netBuilder = new TNRNetBuilder(_chainName, _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations[_netCfgIdx], _clusterChainCfg.ClusterCfgCollection[_clusterIdx].Output, trainingData, currentClusterFoldCollection[_testingFoldIdx], _rand, _controller ); //Register notification netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged; //Build trained network. Trained network becomes to be the cluster member TNRNet tn = netBuilder.Build(); int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx; chainClusters[_clusterIdx].AddMember(tn, netScopeID, currentClusterFoldCollection[_testingFoldIdx], filters); //Update input data in the data fold for the next cluster for (int sampleIdx = 0; sampleIdx < currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection.Count; sampleIdx++) { double[] computedNetData = tn.Network.Compute(currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection[sampleIdx]); nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx] = nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx].Concat(computedNetData); } }//netCfgIdx //Add updated data fold for the next cluster nextClusterFoldCollection.Add(nextClusterUpdatedDataFold); }//testingFoldIdx //Switch fold collection currentClusterFoldCollection = nextClusterFoldCollection; nextClusterFoldCollection = new List <VectorBundle>(currentClusterFoldCollection.Count); }//clusterIdx if (_repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions - 1) { //Reshuffle the data localDataBundle.Shuffle(_rand); } }//repetitionIdx //Make the clusters operable and add them into the chain for (int clusterIdx = 0; clusterIdx < chainClusters.Count; clusterIdx++) { chainClusters[clusterIdx].FinalizeCluster(); chain.AddCluster(chainClusters[clusterIdx]); } //Return the built chain return(chain); }
/// <summary> /// Builds the cluster. /// </summary> /// <param name="dataBundle">The data bundle for training.</param> /// <param name="filters">The filters to be used to denormalize outputs.</param> public TNRNetCluster Build(VectorBundle dataBundle, FeatureFilterBase[] filters) { VectorBundle localDataBundle = dataBundle.CreateShallowCopy(); //Cluster of trained networks TNRNetCluster cluster = new TNRNetCluster(_clusterName, _clusterCfg.Output, _clusterCfg.TrainingGroupWeight, _clusterCfg.TestingGroupWeight, _clusterCfg.SamplesWeight, _clusterCfg.NumericalPrecisionWeight, _clusterCfg.MisrecognizedFalseWeight, _clusterCfg.UnrecognizedTrueWeight ); //Member's training ResetProgressTracking(); for (_repetitionIdx = 0; _repetitionIdx < _crossvalidationCfg.Repetitions; _repetitionIdx++) { //Data split to folds List <VectorBundle> foldCollection = localDataBundle.Folderize(_crossvalidationCfg.FoldDataRatio, _clusterCfg.Output == TNRNet.OutputType.Real ? double.NaN : cluster.OutputDataRange.Mid); _numOfFoldsPerRepetition = Math.Min(_crossvalidationCfg.Folds <= 0 ? foldCollection.Count : _crossvalidationCfg.Folds, foldCollection.Count); //Train the collection of networks for each processing fold. for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++) { //Prepare training data bundle VectorBundle trainingData = new VectorBundle(); for (int foldIdx = 0; foldIdx < foldCollection.Count; foldIdx++) { if (foldIdx != _testingFoldIdx) { trainingData.Add(foldCollection[foldIdx]); } } for (_netCfgIdx = 0; _netCfgIdx < _clusterCfg.ClusterNetConfigurations.Count; _netCfgIdx++) { TNRNetBuilder netBuilder = new TNRNetBuilder(_clusterName, _clusterCfg.ClusterNetConfigurations[_netCfgIdx], _clusterCfg.Output, trainingData, foldCollection[_testingFoldIdx], _rand, _controller ); //Register notification netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged; //Build trained network. Trained network becomes to be the cluster member TNRNet tn = netBuilder.Build(); //Build an unique network scope identifier int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx; //Add trained network to a cluster cluster.AddMember(tn, netScopeID, foldCollection[_testingFoldIdx], filters); } //netCfgIdx } //testingFoldIdx if (_repetitionIdx < _crossvalidationCfg.Repetitions - 1) { //Reshuffle the data localDataBundle.Shuffle(_rand); } }//repetitionIdx //Make the cluster operable cluster.FinalizeCluster(); //Return the built cluster return(cluster); }