Beispiel #1
0
        /// <summary>
        /// Builds the cluster.
        /// </summary>
        /// <param name="dataBundle">The data bundle for training.</param>
        /// <param name="filters">The filters to be used to denormalize outputs.</param>
        public TNRNetCluster Build(VectorBundle dataBundle, FeatureFilterBase[] filters)
        {
            VectorBundle localDataBundle = dataBundle.CreateShallowCopy();
            //Cluster of trained networks
            TNRNetCluster cluster = new TNRNetCluster(_clusterName,
                                                      _clusterCfg.Output,
                                                      _clusterCfg.TrainingGroupWeight,
                                                      _clusterCfg.TestingGroupWeight,
                                                      _clusterCfg.SamplesWeight,
                                                      _clusterCfg.NumericalPrecisionWeight,
                                                      _clusterCfg.MisrecognizedFalseWeight,
                                                      _clusterCfg.UnrecognizedTrueWeight
                                                      );

            //Member's training
            ResetProgressTracking();
            for (_repetitionIdx = 0; _repetitionIdx < _crossvalidationCfg.Repetitions; _repetitionIdx++)
            {
                //Data split to folds
                List <VectorBundle> foldCollection = localDataBundle.Folderize(_crossvalidationCfg.FoldDataRatio, _clusterCfg.Output == TNRNet.OutputType.Real ? double.NaN : cluster.OutputDataRange.Mid);
                _numOfFoldsPerRepetition = Math.Min(_crossvalidationCfg.Folds <= 0 ? foldCollection.Count : _crossvalidationCfg.Folds, foldCollection.Count);
                //Train the collection of networks for each processing fold.
                for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++)
                {
                    //Prepare training data bundle
                    VectorBundle trainingData = new VectorBundle();
                    for (int foldIdx = 0; foldIdx < foldCollection.Count; foldIdx++)
                    {
                        if (foldIdx != _testingFoldIdx)
                        {
                            trainingData.Add(foldCollection[foldIdx]);
                        }
                    }
                    for (_netCfgIdx = 0; _netCfgIdx < _clusterCfg.ClusterNetConfigurations.Count; _netCfgIdx++)
                    {
                        TNRNetBuilder netBuilder = new TNRNetBuilder(_clusterName,
                                                                     _clusterCfg.ClusterNetConfigurations[_netCfgIdx],
                                                                     _clusterCfg.Output,
                                                                     trainingData,
                                                                     foldCollection[_testingFoldIdx],
                                                                     _rand,
                                                                     _controller
                                                                     );
                        //Register notification
                        netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged;
                        //Build trained network. Trained network becomes to be the cluster member
                        TNRNet tn = netBuilder.Build();
                        //Build an unique network scope identifier
                        int netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx;
                        //Add trained network to a cluster
                        cluster.AddMember(tn, netScopeID, foldCollection[_testingFoldIdx], filters);
                    } //netCfgIdx
                }     //testingFoldIdx
                if (_repetitionIdx < _crossvalidationCfg.Repetitions - 1)
                {
                    //Reshuffle the data
                    localDataBundle.Shuffle(_rand);
                }
            }//repetitionIdx
            //Make the cluster operable
            cluster.FinalizeCluster();
            //Return the built cluster
            return(cluster);
        }
        /// <summary>
        /// Builds the cluster chain.
        /// </summary>
        /// <param name="dataBundle">The data bundle for training.</param>
        /// <param name="filters">The filters to be used to denormalize outputs.</param>
        public TNRNetClusterChain Build(VectorBundle dataBundle, FeatureFilterBase[] filters)
        {
            //The chain to be built
            TNRNetClusterChain chain = new TNRNetClusterChain(_chainName, _clusterChainCfg.Output);
            //Instantiate chained clusters
            List <TNRNetCluster> chainClusters = new List <TNRNetCluster>(_clusterChainCfg.ClusterCfgCollection.Count);

            for (int clusterIdx = 0; clusterIdx < _clusterChainCfg.ClusterCfgCollection.Count; clusterIdx++)
            {
                //Cluster
                chainClusters.Add(new TNRNetCluster(_chainName,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].Output,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].TrainingGroupWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].TestingGroupWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].SamplesWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].NumericalPrecisionWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].MisrecognizedFalseWeight,
                                                    _clusterChainCfg.ClusterCfgCollection[clusterIdx].UnrecognizedTrueWeight
                                                    )
                                  );
            }
            //Common crossvalidation configuration
            double boolBorder = _clusterChainCfg.Output == TNRNet.OutputType.Real ? double.NaN : chain.OutputDataRange.Mid;

            VectorBundle localDataBundle = dataBundle.CreateShallowCopy();

            //Member's training
            ResetProgressTracking();
            for (_repetitionIdx = 0; _repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions; _repetitionIdx++)
            {
                //Split data to folds
                List <VectorBundle> foldCollection = localDataBundle.Folderize(_clusterChainCfg.CrossvalidationCfg.FoldDataRatio, boolBorder);
                _numOfFoldsPerRepetition = Math.Min(_clusterChainCfg.CrossvalidationCfg.Folds <= 0 ? foldCollection.Count : _clusterChainCfg.CrossvalidationCfg.Folds, foldCollection.Count);

                List <VectorBundle> currentClusterFoldCollection = CopyFolds(foldCollection);
                List <VectorBundle> nextClusterFoldCollection    = new List <VectorBundle>(foldCollection.Count);
                //For each cluster
                for (_clusterIdx = 0; _clusterIdx < chainClusters.Count; _clusterIdx++)
                {
                    //Train networks for each testing fold.
                    for (_testingFoldIdx = 0; _testingFoldIdx < _numOfFoldsPerRepetition; _testingFoldIdx++)
                    {
                        //Prepare training data bundle
                        VectorBundle trainingData = new VectorBundle();
                        for (int foldIdx = 0; foldIdx < currentClusterFoldCollection.Count; foldIdx++)
                        {
                            if (foldIdx != _testingFoldIdx)
                            {
                                trainingData.Add(currentClusterFoldCollection[foldIdx]);
                            }
                        }
                        VectorBundle nextClusterUpdatedDataFold = foldCollection[_testingFoldIdx].CreateShallowCopy();
                        for (_netCfgIdx = 0; _netCfgIdx < _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations.Count; _netCfgIdx++)
                        {
                            TNRNetBuilder netBuilder = new TNRNetBuilder(_chainName,
                                                                         _clusterChainCfg.ClusterCfgCollection[_clusterIdx].ClusterNetConfigurations[_netCfgIdx],
                                                                         _clusterChainCfg.ClusterCfgCollection[_clusterIdx].Output,
                                                                         trainingData,
                                                                         currentClusterFoldCollection[_testingFoldIdx],
                                                                         _rand,
                                                                         _controller
                                                                         );
                            //Register notification
                            netBuilder.NetworkBuildProgressChanged += OnNetworkBuildProgressChanged;
                            //Build trained network. Trained network becomes to be the cluster member
                            TNRNet tn         = netBuilder.Build();
                            int    netScopeID = _repetitionIdx * NetScopeDelimiterCoeff + _testingFoldIdx;
                            chainClusters[_clusterIdx].AddMember(tn, netScopeID, currentClusterFoldCollection[_testingFoldIdx], filters);
                            //Update input data in the data fold for the next cluster
                            for (int sampleIdx = 0; sampleIdx < currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection.Count; sampleIdx++)
                            {
                                double[] computedNetData = tn.Network.Compute(currentClusterFoldCollection[_testingFoldIdx].InputVectorCollection[sampleIdx]);
                                nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx] = nextClusterUpdatedDataFold.InputVectorCollection[sampleIdx].Concat(computedNetData);
                            }
                        }//netCfgIdx
                        //Add updated data fold for the next cluster
                        nextClusterFoldCollection.Add(nextClusterUpdatedDataFold);
                    }//testingFoldIdx
                    //Switch fold collection
                    currentClusterFoldCollection = nextClusterFoldCollection;
                    nextClusterFoldCollection    = new List <VectorBundle>(currentClusterFoldCollection.Count);
                }//clusterIdx
                if (_repetitionIdx < _clusterChainCfg.CrossvalidationCfg.Repetitions - 1)
                {
                    //Reshuffle the data
                    localDataBundle.Shuffle(_rand);
                }
            }//repetitionIdx
            //Make the clusters operable and add them into the chain
            for (int clusterIdx = 0; clusterIdx < chainClusters.Count; clusterIdx++)
            {
                chainClusters[clusterIdx].FinalizeCluster();
                chain.AddCluster(chainClusters[clusterIdx]);
            }
            //Return the built chain
            return(chain);
        }
Beispiel #3
0
        /// <summary>
        /// Builds computation cluster of trained networks
        /// </summary>
        /// <param name="dataBundle">Data to be used for training</param>
        /// <param name="testDataRatio">Ratio of test data to be used (determines fold size)</param>
        /// <param name="numOfFolds">Requested number of testing folds (determines number of cluster members). Value LE 0 causes automatic setup. </param>
        /// <param name="repetitions">Defines how many times the generation of folds will be repeated. </param>
        /// <param name="outputFeatureFilterCollection">Output feature filters to be used for output data denormalization.</param>
        public TrainedNetworkCluster Build(VectorBundle dataBundle,
                                           double testDataRatio,
                                           int numOfFolds,
                                           int repetitions,
                                           FeatureFilterBase[] outputFeatureFilterCollection
                                           )
        {
            //Test fold size
            if (testDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test data ratio is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio");
            }
            int testDataSetLength = (int)Math.Round(dataBundle.OutputVectorCollection.Count * testDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of resulting test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "testingDataRatio");
            }
            //Number of folds
            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = dataBundle.OutputVectorCollection.Count / testDataSetLength;
            }
            //Cluster of trained networks
            int numOfMembers = numOfFolds * _networkSettingsCollection.Count * repetitions;
            TrainedNetworkCluster cluster = new TrainedNetworkCluster(_clusterName, numOfMembers, _dataRange, _binBorder);

            for (int cycle = 0; cycle < repetitions; cycle++)
            {
                //Data split to folds
                List <VectorBundle> subBundleCollection = dataBundle.Split(testDataSetLength, _binBorder);
                numOfFolds = Math.Min(numOfFolds, subBundleCollection.Count);
                //Train collection of networks for each fold in the cluster.
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    for (int netCfgIdx = 0; netCfgIdx < _networkSettingsCollection.Count; netCfgIdx++)
                    {
                        //Prepare training data bundle
                        VectorBundle trainingData = new VectorBundle();
                        for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                        {
                            if (bundleIdx != foldIdx)
                            {
                                trainingData.Add(subBundleCollection[bundleIdx]);
                            }
                        }
                        TrainedNetworkBuilder netBuilder = new TrainedNetworkBuilder(_clusterName,
                                                                                     _networkSettingsCollection[netCfgIdx],
                                                                                     (cycle * numOfFolds) + foldIdx + 1,
                                                                                     repetitions * numOfFolds,
                                                                                     netCfgIdx + 1,
                                                                                     _networkSettingsCollection.Count,
                                                                                     trainingData,
                                                                                     subBundleCollection[foldIdx],
                                                                                     _binBorder,
                                                                                     _rand,
                                                                                     _controller
                                                                                     );
                        //Register notification
                        netBuilder.RegressionEpochDone += OnRegressionEpochDone;
                        //Build trained network. Trained network becomes to be the cluster member
                        cluster.Members.Add(netBuilder.Build());
                        //Set member's weight proportionally to train/test number of samples ratio
                        cluster.Weights.Add((double)subBundleCollection[foldIdx].InputVectorCollection.Count / (double)trainingData.InputVectorCollection.Count);
                        //Update cluster error statistics (pesimistic approach)
                        for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                        {
                            double[] nrmComputedValues = cluster.Members.Last().Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx]);
                            for (int i = 0; i < nrmComputedValues.Length; i++)
                            {
                                double naturalComputedValue = outputFeatureFilterCollection[i].ApplyReverse(nrmComputedValues[i]);
                                double naturalIdealValue    = outputFeatureFilterCollection[i].ApplyReverse(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i]);
                                cluster.ErrorStats.Update(nrmComputedValues[i],
                                                          subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][i],
                                                          naturalComputedValue,
                                                          naturalIdealValue
                                                          );
                            } //i
                        }     //sampleIdx
                    }         //netCfgIdx
                }             //foldIdx
                if (cycle < repetitions - 1)
                {
                    //Reshuffle data
                    dataBundle.Shuffle(_rand);
                }
            }
            //Return built cluster
            return(cluster);
        }