/// <summary> /// Creates and trains the State Machine readout layer. /// </summary> /// <param name="regressionInput"> /// RegressionInput object prepared by PrepareRegressionData function /// </param> /// <param name="regressionController"> /// Optional. see Regression.RegressionCallbackDelegate /// </param> /// <param name="regressionControllerData"> /// Optional custom object to be passed to regressionController together with other standard information /// </param> public ResultComparativeBundle BuildReadoutLayer(RegressionInput regressionInput, ReadoutUnit.RegressionCallbackDelegate regressionController = null, Object regressionControllerData = null ) { //Readout layer instance RL = new ReadoutLayer(_settings.ReadoutLayerConfig); //Optional mapper of predictors to readout units ReadoutLayer.PredictorsMapper mapper = null; if (_settings.MapperConfig != null) { //Create empty instance of the mapper mapper = new ReadoutLayer.PredictorsMapper(NP.NumOfPredictors); //Expand list of predicting neurons to array of predictor origin StateMachineSettings.MapperSettings.PoolRef[] neuronPoolRefCollection = new StateMachineSettings.MapperSettings.PoolRef[NP.NumOfPredictors]; int idx = 0; foreach (Reservoir.PredictorNeuron pn in NP.PredictorNeuronCollection) { neuronPoolRefCollection[idx] = new StateMachineSettings.MapperSettings.PoolRef { _reservoirInstanceIdx = pn.Neuron.Placement.ReservoirID, _poolIdx = pn.Neuron.Placement.PoolID }; ++idx; if (pn.UseSecondaryPredictor) { neuronPoolRefCollection[idx] = neuronPoolRefCollection[idx - 1]; ++idx; } } //Iterate readout units having specific predictors mapping foreach (string readoutUnitName in _settings.MapperConfig.Map.Keys) { bool[] switches = new bool[NP.NumOfPredictors]; switches.Populate(false); foreach (StateMachineSettings.MapperSettings.PoolRef allowedPool in _settings.MapperConfig.Map[readoutUnitName]) { //Enable specific predictors from allowed pool (origin) for (int i = 0; i < neuronPoolRefCollection.Length; i++) { if (neuronPoolRefCollection[i]._reservoirInstanceIdx == allowedPool._reservoirInstanceIdx && neuronPoolRefCollection[i]._poolIdx == allowedPool._poolIdx) { switches[i] = true; } } } //Add mapping to mapper mapper.Add(readoutUnitName, switches); } } //Training return(RL.Build(regressionInput.PreprocessedData, regressionController, regressionControllerData, mapper )); }
/// <summary> /// Trains the State Machine readout layer. /// </summary> /// <param name="rsi"> /// RegressionStageInput object prepared by PrepareRegressionStageInput function /// </param> /// <param name="regressionController"> /// Optional. see Regression.RegressionCallbackDelegate /// </param> /// <param name="regressionControllerData"> /// Optional custom object to be passed to regressionController together with other standard information /// </param> public ValidationBundle RegressionStage(RegressionStageInput rsi, ReadoutUnit.RegressionCallbackDelegate regressionController = null, Object regressionControllerData = null ) { return(_readoutLayer.Build(rsi.PredictorsCollection, rsi.IdealOutputsCollection, regressionController, regressionControllerData )); }
/// <summary> /// Trains the State Machine readout layer. /// </summary> /// <param name="rsi"> /// RegressionStageInput object prepared by PrepareRegressionStageInput function /// </param> /// <param name="regressionController"> /// Optional. see Regression.RegressionCallbackDelegate /// </param> /// <param name="regressionControllerData"> /// Optional custom object to be passed to regressionController together with other standard information /// </param> public ValidationBundle RegressionStage(RegressionStageInput rsi, ReadoutUnit.RegressionCallbackDelegate regressionController = null, Object regressionControllerData = null ) { //Readout layer instance _readoutLayer = new ReadoutLayer(_settings.ReadoutLayerConfig, DataRange); //Training return(_readoutLayer.Build(rsi.PredictorsCollection, rsi.IdealOutputsCollection, regressionController, regressionControllerData )); }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="predictorsCollection">Collection of predictors</param> /// <param name="idealOutputsCollection">Collection of desired outputs related to predictors</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> /// <returns>Returned ValidationBundle is something like a protocol. /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values. /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy. /// </returns> public ValidationBundle Build(List <double[]> predictorsCollection, List <double[]> idealOutputsCollection, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData ) { //Random object Random rand = new Random(0); //Allocation of computed and ideal vectors for validation bundle List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count); List <double[]> validationIdealVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { validationComputedVectorCollection.Add(new double[idealOutputsCollection[0].Length]); validationIdealVectorCollection.Add(new double[idealOutputsCollection[0].Length]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(_dataRange.Mid); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <TimeSeriesBundle> subBundleCollection = null; //Datasets preparation is depending on the task type if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassificationTask(shuffledData.InputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Forecast task subBundleCollection = DivideSamplesForForecastTask(shuffledData.InputVectorCollection, idealValueCollection, testDataSetLength ); } //Best predicting unit per each fold in the cluster. ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr); int arrayPos = 0; for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression to get the best fold's readout unit. //The best unit becomes to be the predicting cluster member. _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, clusterIdx, foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, rand, _settings.ReadoutUnitCfgCollection[clusterIdx], regressionController, regressionControllerData ); //Cluster error statistics & data for validation bundle (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double value = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0]; ces.Update(value, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]); validationIdealVectorCollection[arrayPos][clusterIdx] = subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]; validationComputedVectorCollection[arrayPos][clusterIdx] = value; ++arrayPos; } } //foldIdx _clusterErrStatisticsCollection.Add(ces); } //clusterIdx //Validation bundle is returned. return(new ValidationBundle(validationComputedVectorCollection, validationIdealVectorCollection)); }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="dataBundle">Collection of input predictors and associated desired output values</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> /// <param name="predictorsMapper">Optional specific mapping of predictors to readout units</param> /// <returns>Returned ResultComparativeBundle is something like a protocol. /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values. /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy. /// </returns> public ResultComparativeBundle Build(VectorBundle dataBundle, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData, PredictorsMapper predictorsMapper = null ) { //Basic checks int numOfPredictors = dataBundle.InputVectorCollection[0].Length; int numOfOutputs = dataBundle.OutputVectorCollection[0].Length; if (numOfPredictors == 0) { throw new Exception("Number of predictors must be greater tham 0."); } if (numOfOutputs != _settings.ReadoutUnitCfgCollection.Count) { throw new Exception("Incorrect number of ideal output values in the vector."); } //Normalization of predictors and output data collections //Allocation of normalizers _predictorNormalizerCollection = new Normalizer[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { _predictorNormalizerCollection[i] = new Normalizer(DataRange, NormalizerDefaultReserve, true, false); } _outputNormalizerCollection = new Normalizer[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { bool classificationTask = (_settings.ReadoutUnitCfgCollection[i].TaskType == CommonEnums.TaskType.Classification); _outputNormalizerCollection[i] = new Normalizer(DataRange, classificationTask ? 0 : NormalizerDefaultReserve, classificationTask ? false : true, false ); } //Normalizers adjustment for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Checks if (dataBundle.InputVectorCollection[pairIdx].Length != numOfPredictors) { throw new Exception("Inconsistent number of predictors in the predictors collection."); } if (dataBundle.OutputVectorCollection[pairIdx].Length != numOfOutputs) { throw new Exception("Inconsistent number of values in the ideal values collection."); } //Adjust predictors normalizers for (int i = 0; i < numOfPredictors; i++) { _predictorNormalizerCollection[i].Adjust(dataBundle.InputVectorCollection[pairIdx][i]); } //Adjust outputs normalizers for (int i = 0; i < numOfOutputs; i++) { _outputNormalizerCollection[i].Adjust(dataBundle.OutputVectorCollection[pairIdx][i]); } } //Data normalization //Allocation List <double[]> predictorsCollection = new List <double[]>(dataBundle.InputVectorCollection.Count); List <double[]> idealOutputsCollection = new List <double[]>(dataBundle.OutputVectorCollection.Count); //Normalization for (int pairIdx = 0; pairIdx < dataBundle.InputVectorCollection.Count; pairIdx++) { //Predictors double[] predictors = new double[numOfPredictors]; for (int i = 0; i < numOfPredictors; i++) { predictors[i] = _predictorNormalizerCollection[i].Normalize(dataBundle.InputVectorCollection[pairIdx][i]); } predictorsCollection.Add(predictors); //Outputs double[] outputs = new double[numOfOutputs]; for (int i = 0; i < numOfOutputs; i++) { outputs[i] = _outputNormalizerCollection[i].Normalize(dataBundle.OutputVectorCollection[pairIdx][i]); } idealOutputsCollection.Add(outputs); } //Data processing //Random object initialization Random rand = new Random(0); //Predictors mapper (specified or default) _predictorsMapper = predictorsMapper ?? new PredictorsMapper(numOfPredictors); //Allocation of computed and ideal vectors for result comparative bundle List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count); List <double[]> validationIdealVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { validationComputedVectorCollection.Add(new double[numOfOutputs]); validationIdealVectorCollection.Add(new double[numOfOutputs]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data VectorBundle shuffledData = new VectorBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(DataRange.Mid); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <VectorBundle> subBundleCollection = null; List <double[]> readoutUnitInputVectorCollection = _predictorsMapper.CreateVectorCollection(_settings.ReadoutUnitCfgCollection[clusterIdx].Name, shuffledData.InputVectorCollection); //Datasets preparation is depending on the task type if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassificationTask(readoutUnitInputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Forecast task subBundleCollection = DivideSamplesForForecastTask(readoutUnitInputVectorCollection, idealValueCollection, testDataSetLength ); } //Find best unit per each fold in the cluster. ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr); int arrayPos = 0; for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression to get the best fold's readout unit. //The best unit becomes to be the predicting cluster member. _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, clusterIdx, foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, rand, _settings.ReadoutUnitCfgCollection[clusterIdx], regressionController, regressionControllerData ); //Cluster error statistics & data for validation bundle (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double nrmComputedValue = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0]; double natComputedValue = _outputNormalizerCollection[clusterIdx].Naturalize(nrmComputedValue); double natIdealValue = _outputNormalizerCollection[clusterIdx].Naturalize(subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]); ces.Update(nrmComputedValue, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0], natComputedValue, natIdealValue); validationIdealVectorCollection[arrayPos][clusterIdx] = natIdealValue; validationComputedVectorCollection[arrayPos][clusterIdx] = natComputedValue; ++arrayPos; } } //foldIdx _clusterErrStatisticsCollection.Add(ces); } //clusterIdx //Validation bundle is returned. return(new ResultComparativeBundle(validationComputedVectorCollection, validationIdealVectorCollection)); }
/// <summary> /// Creates and trains the State Machine readout layer. /// Function uses specific mapping of predictors to readout units, if available. /// Function also rejects unusable predictors having no reasonable fluctuation of values. /// </summary> /// <param name="regressionInput"> /// RegressionInput object prepared by PrepareRegressionData function /// </param> /// <param name="regressionController"> /// Optional. see Regression.RegressionCallbackDelegate /// </param> /// <param name="regressionControllerData"> /// Optional custom object to be passed to regressionController together with other standard information /// </param> public ResultBundle BuildReadoutLayer(RegressionInput regressionInput, ReadoutUnit.RegressionCallbackDelegate regressionController = null, Object regressionControllerData = null ) { //Readout layer instance RL = new ReadoutLayer(_settings.ReadoutLayerConfig); //Create empty instance of the mapper ReadoutLayer.PredictorsMapper mapper = new ReadoutLayer.PredictorsMapper(PredictorGeneralSwitchCollection); if (_settings.MapperCfg != null) { //Expand list of predicting neurons to array of predictor origin StateMachineSettings.MapperSettings.AllowedPool[] neuronPoolRefCollection = new StateMachineSettings.MapperSettings.AllowedPool[NP.NumOfPredictors]; int idx = 0; foreach (HiddenNeuron neuron in NP.PredictorNeuronCollection) { StateMachineSettings.MapperSettings.AllowedPool poolRef = new StateMachineSettings.MapperSettings.AllowedPool { _reservoirInstanceIdx = neuron.Placement.ReservoirID, _poolIdx = neuron.Placement.PoolID }; for (int i = 0; i < neuron.PredictorsCfg.NumOfEnabledPredictors; i++) { neuronPoolRefCollection[idx] = poolRef; ++idx; } } //Iterate all readout units foreach (string readoutUnitName in _settings.ReadoutLayerConfig.OutputFieldNameCollection) { bool[] switches = new bool[NP.NumOfPredictors]; //Initially allow all valid predictors PredictorGeneralSwitchCollection.CopyTo(switches, 0); //Exists specific mapping? if (_settings.MapperCfg != null && (_settings.MapperCfg.PoolsMap.ContainsKey(readoutUnitName) || _settings.MapperCfg.RoutedInputFieldsMap.ContainsKey(readoutUnitName))) { //Routed input fields if (_settings.MapperCfg.RoutedInputFieldsMap.ContainsKey(readoutUnitName)) { //Initially disable all routed input fields for (int i = NP.PredictorNeuronCollection.Count; i < NP.NumOfPredictors; i++) { switches[i] = false; } //Enable enabled routed input fields List <int> enabledRoutedFieldsIdxs = _settings.MapperCfg.RoutedInputFieldsMap[readoutUnitName]; for (int i = 0; i < enabledRoutedFieldsIdxs.Count; i++) { switches[NP.PredictorNeuronCollection.Count + enabledRoutedFieldsIdxs[i]] = PredictorGeneralSwitchCollection[NP.PredictorNeuronCollection.Count + enabledRoutedFieldsIdxs[i]]; } } //Neuron predictors if (_settings.MapperCfg.PoolsMap.ContainsKey(readoutUnitName)) { //Initially disable all neuron predictors for (int i = 0; i < NP.PredictorNeuronCollection.Count; i++) { switches[i] = false; } //Enable allowed neuron predictors foreach (StateMachineSettings.MapperSettings.AllowedPool allowedPool in _settings.MapperCfg.PoolsMap[readoutUnitName]) { //Enable specific predictors from allowed pool (origin) for (int i = 0; i < NP.PredictorNeuronCollection.Count; i++) { if (neuronPoolRefCollection[i]._reservoirInstanceIdx == allowedPool._reservoirInstanceIdx && neuronPoolRefCollection[i]._poolIdx == allowedPool._poolIdx) { //Enable predictor if it is valid switches[i] = PredictorGeneralSwitchCollection[i]; } } } } } //Add mapping to mapper mapper.Add(readoutUnitName, switches); } } //Training return(RL.Build(regressionInput.PreprocessedData, regressionController, regressionControllerData, mapper )); }