示例#1
0
        private void TestBinFeatureFilter()
        {
            BinFeatureFilter filter = new BinFeatureFilter(Interval.IntZP1, new BinFeatureFilterSettings());
            Random           rand   = new Random();

            for (int i = 0; i < 200; i++)
            {
                filter.Update((double)rand.Next(0, 1));
            }

            Console.WriteLine($"{filter.GetType().Name} ApplyFilter");
            for (int i = 0; i <= 1; i++)
            {
                Console.WriteLine($"    {i.ToString(CultureInfo.InvariantCulture),-20} {filter.ApplyFilter(i)}");
            }

            Console.WriteLine($"{filter.GetType().Name} ApplyReverse");
            int pieces = 10;

            for (int i = 0; i <= pieces; i++)
            {
                double value = (double)i * (1d / pieces);
                Console.WriteLine($"    {value.ToString(CultureInfo.InvariantCulture),-20} {filter.ApplyReverse(value)}");
            }
            Console.ReadLine();
        }
示例#2
0
        /// <summary>
        /// Trains the network cluster to perform classification task and then verifies its performance.
        /// </summary>
        /// <param name="name">The name of a classification task.</param>
        /// <param name="trainDataFile">The name of a csv datafile containing the training data.</param>
        /// <param name="verifyDataFile">The name of a csv datafile containing the verification data.</param>
        /// <param name="numOfClasses">The number of classes.</param>
        /// <param name="foldDataRatio">Specifies what part of training data is reserved for testing. It determines the size of data fold and also number of networks within the cluster.</param>
        private void PerformClassification(string name, string trainDataFile, string verifyDataFile, int numOfClasses, double foldDataRatio)
        {
            _log.Write($"{name} classification performed by the Probabilistic cluster chain ({numOfClasses.ToString(CultureInfo.InvariantCulture)} classes).");
            //Load csv data and create vector bundles
            _log.Write($"Loading {trainDataFile}...");
            CsvDataHolder trainCsvData = new CsvDataHolder(trainDataFile);
            VectorBundle  trainData    = VectorBundle.Load(trainCsvData, numOfClasses);

            _log.Write($"Loading {verifyDataFile}...");
            CsvDataHolder verifyCsvData = new CsvDataHolder(verifyDataFile);
            VectorBundle  verifyData    = VectorBundle.Load(verifyCsvData, numOfClasses);

            //Input data standardization
            //Allocation and preparation of the input feature filters
            FeatureFilterBase[] inputFeatureFilters = PrepareInputFeatureFilters(trainData);
            //Standardize training input data
            StandardizeInputVectors(trainData, inputFeatureFilters);
            //Standardize verification input data
            StandardizeInputVectors(verifyData, inputFeatureFilters);
            //Output data
            //Output data is already in the 0/1 form requested by the SoftMax activation so we don't
            //need to modify it. We only allocate the binary feature filters requested by the cluster chain builder.
            FeatureFilterBase[] outputFeatureFilters = new BinFeatureFilter[numOfClasses];
            for (int i = 0; i < numOfClasses; i++)
            {
                outputFeatureFilters[i] = new BinFeatureFilter(Interval.IntZP1);
            }
            //Cluster chain configuration (we will have two chained clusters)
            //Configuration of the first cluster in the chain
            //End-networks configuration for the first cluster in the chain. For every testing fold will be trained two end-networks with different structure.
            List <FeedForwardNetworkSettings> netCfgs1 = new List <FeedForwardNetworkSettings>
            {
                //The first FF network will have two hidden layers of 30 TanH activated neurons.
                //Output layer will have the SoftMax activation (it must be SoftMax because we will use the Probabilistic cluster).
                new FeedForwardNetworkSettings(new AFAnalogSoftMaxSettings(),
                                               new HiddenLayersSettings(new HiddenLayerSettings(30, new AFAnalogTanHSettings()),
                                                                        new HiddenLayerSettings(30, new AFAnalogTanHSettings())
                                                                        ),
                                               new RPropTrainerSettings(3, 200)
                                               ),
                //The second FF network will have two hidden layers of 30 LeakyReLU activated neurons.
                //Output layer will have the SoftMax activation (it must be SoftMax because we will use the Probabilistic cluster).
                new FeedForwardNetworkSettings(new AFAnalogSoftMaxSettings(),
                                               new HiddenLayersSettings(new HiddenLayerSettings(30, new AFAnalogLeakyReLUSettings()),
                                                                        new HiddenLayerSettings(30, new AFAnalogLeakyReLUSettings())
                                                                        ),
                                               new RPropTrainerSettings(3, 200)
                                               )
            };
            //The first probabilistic network cluster configuration instance
            TNRNetClusterProbabilisticSettings clusterCfg1 =
                new TNRNetClusterProbabilisticSettings(new TNRNetClusterProbabilisticNetworksSettings(netCfgs1),
                                                       new TNRNetClusterProbabilisticWeightsSettings()
                                                       );
            //Configuration of the second cluster in the chain
            //End-network configuration for the second cluster in the chain. For every testing fold will be trained one end-network.
            List <FeedForwardNetworkSettings> netCfgs2 = new List <FeedForwardNetworkSettings>
            {
                //FF network will have two hidden layers of 30 Elliot activated neurons.
                //Output layer will have the SoftMax activation (it must be SoftMax because we will use the Probabilistic cluster chain).
                new FeedForwardNetworkSettings(new AFAnalogSoftMaxSettings(),
                                               new HiddenLayersSettings(new HiddenLayerSettings(30, new AFAnalogElliotSettings()),
                                                                        new HiddenLayerSettings(30, new AFAnalogElliotSettings())
                                                                        ),
                                               new RPropTrainerSettings(3, 200)
                                               )
            };
            //The second probabilistic network cluster configuration instance
            TNRNetClusterProbabilisticSettings clusterCfg2 =
                new TNRNetClusterProbabilisticSettings(new TNRNetClusterProbabilisticNetworksSettings(netCfgs2),
                                                       new TNRNetClusterProbabilisticWeightsSettings()
                                                       );

            //Probabilistic network cluster chain configuration instance
            ITNRNetClusterChainSettings chainCfg =
                new TNRNetClusterChainProbabilisticSettings(new CrossvalidationSettings(foldDataRatio),
                                                            new TNRNetClustersProbabilisticSettings(clusterCfg1,
                                                                                                    clusterCfg2
                                                                                                    )
                                                            );

            _log.Write($"Cluster configuration xml:");
            _log.Write(chainCfg.GetXml(true).ToString());
            //Training
            _log.Write($"Cluster chain training on {trainDataFile}...");
            //An instance of network cluster chain builder.
            TNRNetClusterChainBuilder builder =
                new TNRNetClusterChainBuilder("Probabilistic Cluster Chain", chainCfg);

            //Register progress event handler
            builder.ChainBuildProgressChanged += OnClusterChainBuildProgressChanged;
            //Build the trained network cluster chain.
            TNRNetClusterChain trainedClusterChain = builder.Build(trainData, outputFeatureFilters);

            //Verification
            _log.Write(string.Empty);
            _log.Write(string.Empty);
            _log.Write($"Cluster chain verification on {verifyDataFile}...");
            _log.Write(string.Empty);
            int numOfErrors = 0;

            for (int i = 0; i < verifyData.InputVectorCollection.Count; i++)
            {
                double[] computed = trainedClusterChain.Compute(verifyData.InputVectorCollection[i], out _);
                //Cluster result
                int computedWinnerIdx = computed.MaxIdx();
                //Real result
                int realWinnerIdx = verifyData.OutputVectorCollection[i].MaxIdx();

                if (computedWinnerIdx != realWinnerIdx)
                {
                    ++numOfErrors;
                }
                _log.Write($"({i + 1}/{verifyData.InputVectorCollection.Count}) Errors: {numOfErrors}", true);
            }
            _log.Write(string.Empty);
            _log.Write($"Accuracy {(1d - (double)numOfErrors / (double)verifyData.InputVectorCollection.Count).ToString(CultureInfo.InvariantCulture)}");
            _log.Write(string.Empty);

            return;
        }
示例#3
0
        /// <summary>
        /// Builds trained readout layer.
        /// </summary>
        /// <param name="dataBundle">The data to be used for training.</param>
        /// <param name="predictorsMapper">The mapper of specific predictors to readout units (optional).</param>
        /// <param name="controller">The build process controller (optional).</param>
        /// <param name="randomizerSeek">Specifies the random number generator initial seek (optional). A value greater than or equal to 0 will always ensure the same initialization.</param>
        /// <returns>The results of training.</returns>
        public RegressionOverview Build(VectorBundle dataBundle,
                                        PredictorsMapper predictorsMapper = null,
                                        TNRNetBuilder.BuildControllerDelegate controller = null,
                                        int randomizerSeek = 0
                                        )
        {
            if (Trained)
            {
                throw new InvalidOperationException("Readout layer is already built.");
            }
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new InvalidOperationException($"Number of predictors must be greater than 0.");
            }
            if (numOfOutputs != ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count)
            {
                throw new InvalidOperationException($"Incorrect length of output vectors.");
            }
            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation and preparation of feature filters
            //Predictors
            _predictorFeatureFilterCollection = new FeatureFilterBase[numOfPredictors];
            Parallel.For(0, _predictorFeatureFilterCollection.Length, nrmIdx =>
            {
                _predictorFeatureFilterCollection[nrmIdx] = new RealFeatureFilter(InternalDataRange, true, true);
                for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
                {
                    //Adjust filter
                    _predictorFeatureFilterCollection[nrmIdx].Update(dataBundle.InputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Output values
            _outputFeatureFilterCollection = new FeatureFilterBase[numOfOutputs];
            Parallel.For(0, _outputFeatureFilterCollection.Length, nrmIdx =>
            {
                _outputFeatureFilterCollection[nrmIdx] = FeatureFilterFactory.Create(InternalDataRange, ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[nrmIdx].TaskCfg.FeatureFilterCfg);
                for (int pairIdx = 0; pairIdx < dataBundle.OutputVectorCollection.Count; pairIdx++)
                {
                    //Adjust output normalizer
                    _outputFeatureFilterCollection[nrmIdx].Update(dataBundle.OutputVectorCollection[pairIdx][nrmIdx]);
                }
            });
            //Data normalization
            //Allocation
            double[][] normalizedPredictorsCollection   = new double[dataBundle.InputVectorCollection.Count][];
            double[][] normalizedIdealOutputsCollection = new double[dataBundle.OutputVectorCollection.Count][];
            //Normalization
            Parallel.For(0, dataBundle.InputVectorCollection.Count, pairIdx =>
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    if (_predictorsMapper.PredictorGeneralSwitchCollection[i])
                    {
                        predictors[i] = _predictorFeatureFilterCollection[i].ApplyFilter(dataBundle.InputVectorCollection[pairIdx][i]);
                    }
                    else
                    {
                        predictors[i] = double.NaN;
                    }
                }
                normalizedPredictorsCollection[pairIdx] = predictors;
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputFeatureFilterCollection[i].ApplyFilter(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                normalizedIdealOutputsCollection[pairIdx] = outputs;
            });

            //Random object initialization
            Random rand = (randomizerSeek < 0 ? new Random() : new Random(randomizerSeek));
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(normalizedPredictorsCollection, normalizedIdealOutputsCollection);

            shuffledData.Shuffle(rand);

            //"One Takes All" groups input data space initialization
            List <CompositeResult[]> allReadoutUnitResults = new List <CompositeResult[]>(shuffledData.InputVectorCollection.Count);

            if (_oneTakesAllGroupCollection != null)
            {
                for (int i = 0; i < shuffledData.InputVectorCollection.Count; i++)
                {
                    allReadoutUnitResults.Add(new CompositeResult[ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count]);
                }
            }

            ResetProgressTracking();
            //Building of readout units
            for (_buildReadoutUnitIdx = 0; _buildReadoutUnitIdx < ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection.Count; _buildReadoutUnitIdx++)
            {
                List <double[]> idealValueCollection = new List <double[]>(shuffledData.OutputVectorCollection.Count);
                //Transformation of ideal vectors to a single value vectors
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[_buildReadoutUnitIdx];
                    idealValueCollection.Add(value);
                }
                List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(ReadoutLayerCfg.ReadoutUnitsCfg.ReadoutUnitCfgCollection[_buildReadoutUnitIdx].Name, shuffledData.InputVectorCollection);
                VectorBundle    readoutUnitDataBundle            = new VectorBundle(readoutUnitInputVectorCollection, idealValueCollection);
                _readoutUnitCollection[_buildReadoutUnitIdx].ReadoutUnitBuildProgressChanged += OnReadoutUnitBuildProgressChanged;
                _readoutUnitCollection[_buildReadoutUnitIdx].Build(readoutUnitDataBundle,
                                                                   _outputFeatureFilterCollection[_buildReadoutUnitIdx],
                                                                   rand,
                                                                   controller
                                                                   );
                //Add unit's all computed results into the input data for "One Takes All" groups
                if (_oneTakesAllGroupCollection != null)
                {
                    for (int sampleIdx = 0; sampleIdx < readoutUnitDataBundle.InputVectorCollection.Count; sampleIdx++)
                    {
                        allReadoutUnitResults[sampleIdx][_buildReadoutUnitIdx] = _readoutUnitCollection[_buildReadoutUnitIdx].Compute(readoutUnitDataBundle.InputVectorCollection[sampleIdx]);
                    }
                }
            }//unitIdx

            //One Takes All groups build
            if (_oneTakesAllGroupCollection != null)
            {
                foreach (OneTakesAllGroup group in _oneTakesAllGroupCollection)
                {
                    //Only the group having inner probabilistic cluster has to be built
                    if (group.DecisionMethod == OneTakesAllGroup.OneTakesAllDecisionMethod.ClusterChain)
                    {
                        BinFeatureFilter[] groupFilters = new BinFeatureFilter[group.NumOfMemberClasses];
                        for (int i = 0; i < group.NumOfMemberClasses; i++)
                        {
                            groupFilters[i] = (BinFeatureFilter)_outputFeatureFilterCollection[group.MemberReadoutUnitIndexCollection[i]];
                        }
                        ++_buildOTAGroupIdx;
                        group.OTAGBuildProgressChanged += OnOTAGBuildProgressChanged;
                        group.Build(allReadoutUnitResults, shuffledData.OutputVectorCollection, groupFilters, rand, controller);
                    }
                }
            }

            //Readout layer is trained and ready
            Trained = true;
            return(new RegressionOverview(ReadoutUnitErrStatCollection));
        }