コード例 #1
0
ファイル: TNRNetCluster.cs プロジェクト: okozelsk/NET
 /// <summary>
 /// The deep copy constructor.
 /// </summary>
 /// <param name="source">The source instance.</param>
 public ClusterErrStatistics(ClusterErrStatistics source)
 {
     ClusterName          = source.ClusterName;
     NatPrecissionErrStat = new BasicStat(source.NatPrecissionErrStat);
     NrmPrecissionErrStat = new BasicStat(source.NrmPrecissionErrStat);
     BinaryErrStat        = source.BinaryErrStat?.DeepClone();
     return;
 }
コード例 #2
0
 /// <summary>
 /// A deep copy constructor
 /// </summary>
 /// <param name="source">Source instance</param>
 public ClusterErrStatistics(ClusterErrStatistics source)
 {
     ClusterName          = source.ClusterName;
     BinBorder            = source.BinBorder;
     NumOfMembers         = source.NumOfMembers;
     NatPrecissionErrStat = new BasicStat(source.NatPrecissionErrStat);
     NrmPrecissionErrStat = new BasicStat(source.NrmPrecissionErrStat);
     BinaryErrStat        = null;
     BinaryErrStat        = source.BinaryErrStat?.DeepClone();
     return;
 }
コード例 #3
0
ファイル: ReadoutLayer.cs プロジェクト: krishnanpc/NET
 /// <summary>
 /// A deep copy constructor
 /// </summary>
 /// <param name="source">Source instance</param>
 public ClusterErrStatistics(ClusterErrStatistics source)
 {
     TaskType          = source.TaskType;
     NumOfReadoutUnits = source.NumOfReadoutUnits;
     PrecissionErrStat = new BasicStat(source.PrecissionErrStat);
     BinaryErrStat     = null;
     if (TaskType == CommonEnums.TaskType.Classification)
     {
         BinaryErrStat = new BinErrStat(source.BinaryErrStat);
     }
     return;
 }
コード例 #4
0
 /// <summary>
 /// Copy constructor
 /// </summary>
 /// <param name="source">Source cluster</param>
 public TrainedNetworkCluster(TrainedNetworkCluster source)
 {
     ClusterName = source.ClusterName;
     BinBorder   = source.BinBorder;
     Members     = new List <TrainedNetwork>(source.Members.Count);
     foreach (TrainedNetwork tn in source.Members)
     {
         Members.Add(tn.DeepClone());
     }
     Weights    = new List <double>(source.Weights);
     ErrorStats = source.ErrorStats.DeepClone();
 }
コード例 #5
0
 //Constructors
 /// <summary>
 /// Creates an uninitialized instance
 /// </summary>
 /// <param name="clusterName">Name of the cluster</param>
 /// <param name="numOfMembers">Expected number of trained networks in the cluster</param>
 /// <param name="dataRange">Range of input and output data</param>
 /// <param name="binBorder">If specified, it indicates that the whole network output is binary and specifies numeric border where GE network output is decided as a 1 and LT output as a 0.</param>
 public TrainedNetworkCluster(string clusterName,
                              int numOfMembers,
                              Interval dataRange,
                              double binBorder = double.NaN
                              )
 {
     ClusterName = clusterName;
     BinBorder   = binBorder;
     DataRange   = dataRange.DeepClone();
     Members     = new List <TrainedNetwork>(numOfMembers);
     Weights     = new List <double>(numOfMembers);
     ErrorStats  = new ClusterErrStatistics(ClusterName, numOfMembers, BinBorder);
     return;
 }
コード例 #6
0
ファイル: TNRNetCluster.cs プロジェクト: okozelsk/NET
 //Constructor
 /// <summary>
 /// Creates an uninitialized instance.
 /// </summary>
 /// <param name="clusterName">The name of the cluster.</param>
 /// <param name="outputType">The type of output.</param>
 /// <param name="trainingGroupWeight">The macro-weight of the group of metrics related to training.</param>
 /// <param name="testingGroupWeight">The macro-weight of the group of metrics related to testing.</param>
 /// <param name="samplesWeight">The weight of the number of samples metric.</param>
 /// <param name="precisionWeight">The weight of the numerical precision metric.</param>
 /// <param name="misrecognizedFalseWeight">The weight of the "misrecognized falses" metric.</param>
 /// <param name="unrecognizedTrueWeight">The weight of the "unrecognized trues" metric.</param>
 public TNRNetCluster(string clusterName,
                      TNRNet.OutputType outputType,
                      double trainingGroupWeight      = 1d,
                      double testingGroupWeight       = 1d,
                      double samplesWeight            = 1d,
                      double precisionWeight          = 1d,
                      double misrecognizedFalseWeight = 1d,
                      double unrecognizedTrueWeight   = 0d
                      )
 {
     ClusterName                 = clusterName;
     Output                      = outputType;
     _trainingGroupWeight        = trainingGroupWeight;
     _testingGroupWeight         = testingGroupWeight;
     _samplesWeight              = samplesWeight;
     _precisionWeight            = precisionWeight;
     _misrecognizedFalseWeight   = misrecognizedFalseWeight;
     _unrecognizedTrueWeight     = unrecognizedTrueWeight;
     ErrorStats                  = new ClusterErrStatistics(ClusterName, outputType);
     _memberNetCollection        = new List <TNRNet>();
     _memberNetScopeIDCollection = new List <int>();
     _memberNetWeights           = null;
     return;
 }
コード例 #7
0
ファイル: ReadoutLayer.cs プロジェクト: krishnanpc/NET
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="predictorsCollection">Collection of predictors</param>
        /// <param name="idealOutputsCollection">Collection of desired outputs related to predictors</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <returns>Returned ValidationBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ValidationBundle Build(List <double[]> predictorsCollection,
                                      List <double[]> idealOutputsCollection,
                                      ReadoutUnit.RegressionCallbackDelegate regressionController,
                                      Object regressionControllerData
                                      )
        {
            //Random object
            Random rand = new Random(0);
            //Allocation of computed and ideal vectors for validation bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[idealOutputsCollection[0].Length]);
                validationIdealVectorCollection.Add(new double[idealOutputsCollection[0].Length]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(_dataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <TimeSeriesBundle> subBundleCollection = null;
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(shuffledData.InputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(shuffledData.InputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Best predicting unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double value = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        ces.Update(value, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0];
                        validationComputedVectorCollection[arrayPos][clusterIdx] = value;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ValidationBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }
コード例 #8
0
ファイル: ReadoutLayer.cs プロジェクト: lulzzz/NET
        /// <summary>
        /// Builds readout layer.
        /// Prepares prediction clusters containing trained readout units.
        /// </summary>
        /// <param name="dataBundle">Collection of input predictors and associated desired output values</param>
        /// <param name="regressionController">Regression controller delegate</param>
        /// <param name="regressionControllerData">An user object</param>
        /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param>
        /// <returns>Returned ResultComparativeBundle is something like a protocol.
        /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values.
        /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy.
        /// </returns>
        public ResultComparativeBundle Build(VectorBundle dataBundle,
                                             ReadoutUnit.RegressionCallbackDelegate regressionController,
                                             Object regressionControllerData,
                                             PredictorsMapper predictorsMapper = null
                                             )
        {
            //Basic checks
            int numOfPredictors = dataBundle.InputVectorCollection[0].Length;
            int numOfOutputs    = dataBundle.OutputVectorCollection[0].Length;

            if (numOfPredictors == 0)
            {
                throw new Exception("Number of predictors must be greater tham 0.");
            }
            if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count)
            {
                throw new Exception("Incorrect number of ideal output values in the vector.");
            }

            //Normalization of predictors and output data collections
            //Allocation of normalizers
            _predictorNormalizerCollection = new Normalizer[numOfPredictors];
            for (int i = 0; i < numOfPredictors; i++)
            {
                _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false);
            }
            _outputNormalizerCollection = new Normalizer[numOfOutputs];
            for (int i = 0; i < numOfOutputs; i++)
            {
                bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification);
                _outputNormalizerCollection[i] = new Normalizer(DataRange,
                                                                classificationTask ? 0 : NormalizerDefaultReserve,
                                                                classificationTask ? false : true,
                                                                false
                                                                );
            }
            //Normalizers adjustment
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Checks
                if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors)
                {
                    throw new Exception("Inconsistent number of predictors in the predictors collection.");
                }
                if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs)
                {
                    throw new Exception("Inconsistent number of values in the ideal values collection.");
                }
                //Adjust predictors normalizers
                for (int i = 0; i < numOfPredictors; i++)
                {
                    _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                //Adjust outputs normalizers
                for (int i = 0; i < numOfOutputs; i++)
                {
                    _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
            }
            //Data normalization
            //Allocation
            List <double[]> predictorsCollection   = new List <double[]>(dataBundle.InputVectorCollection.Count);
            List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count);

            //Normalization
            for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++)
            {
                //Predictors
                double[] predictors = new double[numOfPredictors];
                for (int i = 0; i < numOfPredictors; i++)
                {
                    predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]);
                }
                predictorsCollection.Add(predictors);
                //Outputs
                double[] outputs = new double[numOfOutputs];
                for (int i = 0; i < numOfOutputs; i++)
                {
                    outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]);
                }
                idealOutputsCollection.Add(outputs);
            }
            //Data processing
            //Random object initialization
            Random rand = new Random(0);

            //Predictors mapper (specified or default)
            _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors);
            //Allocation of computed and ideal vectors for result comparative bundle
            List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count);
            List <double[]> validationIdealVectorCollection    = new List <double[]>(idealOutputsCollection.Count);

            for (int i = 0; i < idealOutputsCollection.Count; i++)
            {
                validationComputedVectorCollection.Add(new double[numOfOutputs]);
                validationIdealVectorCollection.Add(new double[numOfOutputs]);
            }
            //Test dataset size
            if (_settings.TestDataRatio > MaxRatioOfTestData)
            {
                throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0);

            if (testDataSetLength < MinLengthOfTestDataset)
            {
                throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize");
            }
            //Number of folds
            int numOfFolds = _settings.NumOfFolds;

            if (numOfFolds <= 0)
            {
                //Auto setup
                numOfFolds = idealOutputsCollection.Count / testDataSetLength;
                if (numOfFolds > MaxNumOfFolds)
                {
                    numOfFolds = MaxNumOfFolds;
                }
            }
            //Create shuffled copy of the data
            VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection);

            shuffledData.Shuffle(rand);
            //Data inspection, preparation of datasets and training of ReadoutUnits
            //Clusters of readout units (one cluster for each output field)
            for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++)
            {
                _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds];
                List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count);
                BinDistribution refBinDistr          = null;
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Reference binary distribution is relevant only for classification task
                    refBinDistr = new BinDistribution(DataRange.Mid);
                }
                //Transformation to a single value vectors and data analysis
                foreach (double[] idealVector in shuffledData.OutputVectorCollection)
                {
                    double[] value = new double[1];
                    value[0] = idealVector[clusterIdx];
                    idealValueCollection.Add(value);
                    if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                    {
                        //Reference binary distribution is relevant only for classification task
                        refBinDistr.Update(value);
                    }
                }
                List <VectorBundle> subBundleCollection = null;
                List <double[]>     readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection);
                //Datasets preparation is depending on the task type
                if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification)
                {
                    //Classification task
                    subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection,
                                                                             idealValueCollection,
                                                                             refBinDistr,
                                                                             testDataSetLength
                                                                             );
                }
                else
                {
                    //Forecast task
                    subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection,
                                                                       idealValueCollection,
                                                                       testDataSetLength
                                                                       );
                }
                //Find best unit per each fold in the cluster.
                ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr);
                int arrayPos             = 0;
                for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++)
                {
                    //Build training samples
                    List <double[]> trainingPredictorsCollection = new List <double[]>();
                    List <double[]> trainingIdealValueCollection = new List <double[]>();
                    for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++)
                    {
                        if (bundleIdx != foldIdx)
                        {
                            trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection);
                            trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection);
                        }
                    }
                    //Call training regression to get the best fold's readout unit.
                    //The best unit becomes to be the predicting cluster member.
                    _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType,
                                                                                        clusterIdx,
                                                                                        foldIdx + 1,
                                                                                        numOfFolds,
                                                                                        refBinDistr,
                                                                                        trainingPredictorsCollection,
                                                                                        trainingIdealValueCollection,
                                                                                        subBundleCollection[foldIdx].InputVectorCollection,
                                                                                        subBundleCollection[foldIdx].OutputVectorCollection,
                                                                                        rand,
                                                                                        _settings.ReadoutUnitCfgCollection[clusterIdx],
                                                                                        regressionController,
                                                                                        regressionControllerData
                                                                                        );
                    //Cluster error statistics & data for validation bundle (pesimistic approach)
                    for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++)
                    {
                        double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0];
                        double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue);
                        double natIdealValue    = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]);
                        ces.Update(nrmComputedValue,
                                   subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0],
                                   natComputedValue,
                                   natIdealValue);
                        validationIdealVectorCollection[arrayPos][clusterIdx]    = natIdealValue;
                        validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue;
                        ++arrayPos;
                    }
                } //foldIdx
                _clusterErrStatisticsCollection.Add(ces);
            }     //clusterIdx
            //Validation bundle is returned.
            return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection));
        }