/// <summary> /// The deep copy constructor. /// </summary> /// <param name="source">The source instance.</param> public ClusterErrStatistics(ClusterErrStatistics source) { ClusterName = source.ClusterName; NatPrecissionErrStat = new BasicStat(source.NatPrecissionErrStat); NrmPrecissionErrStat = new BasicStat(source.NrmPrecissionErrStat); BinaryErrStat = source.BinaryErrStat?.DeepClone(); return; }
/// <summary> /// A deep copy constructor /// </summary> /// <param name="source">Source instance</param> public ClusterErrStatistics(ClusterErrStatistics source) { ClusterName = source.ClusterName; BinBorder = source.BinBorder; NumOfMembers = source.NumOfMembers; NatPrecissionErrStat = new BasicStat(source.NatPrecissionErrStat); NrmPrecissionErrStat = new BasicStat(source.NrmPrecissionErrStat); BinaryErrStat = null; BinaryErrStat = source.BinaryErrStat?.DeepClone(); return; }
/// <summary> /// A deep copy constructor /// </summary> /// <param name="source">Source instance</param> public ClusterErrStatistics(ClusterErrStatistics source) { TaskType = source.TaskType; NumOfReadoutUnits = source.NumOfReadoutUnits; PrecissionErrStat = new BasicStat(source.PrecissionErrStat); BinaryErrStat = null; if (TaskType == CommonEnums.TaskType.Classification) { BinaryErrStat = new BinErrStat(source.BinaryErrStat); } return; }
/// <summary> /// Copy constructor /// </summary> /// <param name="source">Source cluster</param> public TrainedNetworkCluster(TrainedNetworkCluster source) { ClusterName = source.ClusterName; BinBorder = source.BinBorder; Members = new List <TrainedNetwork>(source.Members.Count); foreach (TrainedNetwork tn in source.Members) { Members.Add(tn.DeepClone()); } Weights = new List <double>(source.Weights); ErrorStats = source.ErrorStats.DeepClone(); }
//Constructors /// <summary> /// Creates an uninitialized instance /// </summary> /// <param name="clusterName">Name of the cluster</param> /// <param name="numOfMembers">Expected number of trained networks in the cluster</param> /// <param name="dataRange">Range of input and output data</param> /// <param name="binBorder">If specified, it indicates that the whole network output is binary and specifies numeric border where GE network output is decided as a 1 and LT output as a 0.</param> public TrainedNetworkCluster(string clusterName, int numOfMembers, Interval dataRange, double binBorder = double.NaN ) { ClusterName = clusterName; BinBorder = binBorder; DataRange = dataRange.DeepClone(); Members = new List <TrainedNetwork>(numOfMembers); Weights = new List <double>(numOfMembers); ErrorStats = new ClusterErrStatistics(ClusterName, numOfMembers, BinBorder); return; }
//Constructor /// <summary> /// Creates an uninitialized instance. /// </summary> /// <param name="clusterName">The name of the cluster.</param> /// <param name="outputType">The type of output.</param> /// <param name="trainingGroupWeight">The macro-weight of the group of metrics related to training.</param> /// <param name="testingGroupWeight">The macro-weight of the group of metrics related to testing.</param> /// <param name="samplesWeight">The weight of the number of samples metric.</param> /// <param name="precisionWeight">The weight of the numerical precision metric.</param> /// <param name="misrecognizedFalseWeight">The weight of the "misrecognized falses" metric.</param> /// <param name="unrecognizedTrueWeight">The weight of the "unrecognized trues" metric.</param> public TNRNetCluster(string clusterName, TNRNet.OutputType outputType, double trainingGroupWeight = 1d, double testingGroupWeight = 1d, double samplesWeight = 1d, double precisionWeight = 1d, double misrecognizedFalseWeight = 1d, double unrecognizedTrueWeight = 0d ) { ClusterName = clusterName; Output = outputType; _trainingGroupWeight = trainingGroupWeight; _testingGroupWeight = testingGroupWeight; _samplesWeight = samplesWeight; _precisionWeight = precisionWeight; _misrecognizedFalseWeight = misrecognizedFalseWeight; _unrecognizedTrueWeight = unrecognizedTrueWeight; ErrorStats = new ClusterErrStatistics(ClusterName, outputType); _memberNetCollection = new List <TNRNet>(); _memberNetScopeIDCollection = new List <int>(); _memberNetWeights = null; return; }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="predictorsCollection">Collection of predictors</param> /// <param name="idealOutputsCollection">Collection of desired outputs related to predictors</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> /// <returns>Returned ValidationBundle is something like a protocol. /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values. /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy. /// </returns> public ValidationBundle Build(List <double[]> predictorsCollection, List <double[]> idealOutputsCollection, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData ) { //Random object Random rand = new Random(0); //Allocation of computed and ideal vectors for validation bundle List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count); List <double[]> validationIdealVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { validationComputedVectorCollection.Add(new double[idealOutputsCollection[0].Length]); validationIdealVectorCollection.Add(new double[idealOutputsCollection[0].Length]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(_dataRange.Mid); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <TimeSeriesBundle> subBundleCollection = null; //Datasets preparation is depending on the task type if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassificationTask(shuffledData.InputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Forecast task subBundleCollection = DivideSamplesForForecastTask(shuffledData.InputVectorCollection, idealValueCollection, testDataSetLength ); } //Best predicting unit per each fold in the cluster. ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr); int arrayPos = 0; for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression to get the best fold's readout unit. //The best unit becomes to be the predicting cluster member. _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, clusterIdx, foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, rand, _settings.ReadoutUnitCfgCollection[clusterIdx], regressionController, regressionControllerData ); //Cluster error statistics & data for validation bundle (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double value = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0]; ces.Update(value, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]); validationIdealVectorCollection[arrayPos][clusterIdx] = subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]; validationComputedVectorCollection[arrayPos][clusterIdx] = value; ++arrayPos; } } //foldIdx _clusterErrStatisticsCollection.Add(ces); } //clusterIdx //Validation bundle is returned. return(new ValidationBundle(validationComputedVectorCollection, validationIdealVectorCollection)); }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="dataBundle">Collection of input predictors and associated desired output values</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param> /// <returns>Returned ResultComparativeBundle is something like a protocol. /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values. /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy. /// </returns> public ResultComparativeBundle Build(VectorBundle dataBundle, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData, PredictorsMapper predictorsMapper = null ) { //Basic checks int numOfPredictors = dataBundle.InputVectorCollection[0].Length; int numOfOutputs = dataBundle.OutputVectorCollection[0].Length; if (numOfPredictors == 0) { throw new Exception("Number of predictors must be greater tham 0."); } if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count) { throw new Exception("Incorrect number of ideal output values in the vector."); } //Normalization of predictors and output data collections //Allocation of normalizers _predictorNormalizerCollection = new Normalizer[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false); } _outputNormalizerCollection = new Normalizer[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification); _outputNormalizerCollection[i] = new Normalizer(DataRange, classificationTask ? 0 : NormalizerDefaultReserve, classificationTask ? false : true, false ); } //Normalizers adjustment for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Checks if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors) { throw new Exception("Inconsistent number of predictors in the predictors collection."); } if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs) { throw new Exception("Inconsistent number of values in the ideal values collection."); } //Adjust predictors normalizers for (int i = 0; i < numOfPredictors; i++) { _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]); } //Adjust outputs normalizers for (int i = 0; i < numOfOutputs; i++) { _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]); } } //Data normalization //Allocation List <double[]> predictorsCollection = new List <double[]>(dataBundle.InputVectorCollection.Count); List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count); //Normalization for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Predictors double[] predictors = new double[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]); } predictorsCollection.Add(predictors); //Outputs double[] outputs = new double[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]); } idealOutputsCollection.Add(outputs); } //Data processing //Random object initialization Random rand = new Random(0); //Predictors mapper (specified or default) _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors); //Allocation of computed and ideal vectors for result comparative bundle List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count); List <double[]> validationIdealVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { validationComputedVectorCollection.Add(new double[numOfOutputs]); validationIdealVectorCollection.Add(new double[numOfOutputs]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(DataRange.Mid); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <VectorBundle> subBundleCollection = null; List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection); //Datasets preparation is depending on the task type if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Forecast task subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection, idealValueCollection, testDataSetLength ); } //Find best unit per each fold in the cluster. ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr); int arrayPos = 0; for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression to get the best fold's readout unit. //The best unit becomes to be the predicting cluster member. _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, clusterIdx, foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, rand, _settings.ReadoutUnitCfgCollection[clusterIdx], regressionController, regressionControllerData ); //Cluster error statistics & data for validation bundle (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0]; double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue); double natIdealValue = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]); ces.Update(nrmComputedValue, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0], natComputedValue, natIdealValue); validationIdealVectorCollection[arrayPos][clusterIdx] = natIdealValue; validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue; ++arrayPos; } } //foldIdx _clusterErrStatisticsCollection.Add(ces); } //clusterIdx //Validation bundle is returned. return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection)); }