private List <TimeSeriesBundle> DivideSamplesForForecastTask(List <double[]> predictorsCollection, List <double[]> idealValueCollection, int bundleSize ) { int numOfBundles = idealValueCollection.Count / bundleSize; List <TimeSeriesBundle> bundleCollection = new List <TimeSeriesBundle>(numOfBundles); //Bundles creation int samplesPos = 0; for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++) { TimeSeriesBundle bundle = new TimeSeriesBundle(); for (int i = 0; i < bundleSize && samplesPos < idealValueCollection.Count; i++) { bundle.InputVectorCollection.Add(predictorsCollection[samplesPos]); bundle.OutputVectorCollection.Add(idealValueCollection[samplesPos]); ++samplesPos; } bundleCollection.Add(bundle); } //Remaining samples for (int i = 0; i < idealValueCollection.Count - samplesPos; i++) { int bundleIdx = i % bundleCollection.Count; bundleCollection[bundleIdx].InputVectorCollection.Add(predictorsCollection[samplesPos + i]); bundleCollection[bundleIdx].OutputVectorCollection.Add(idealValueCollection[samplesPos + i]); } return(bundleCollection); }
/// <summary> /// Prepares input for regression stage of State Machine training for the time series prediction task. /// All input vectors are processed by internal reservoirs and the corresponding network predictors are recorded. /// </summary> /// <param name="dataSet"> /// The bundle containing known sample input and desired output vectors (in time order) /// </param> /// <param name="numOfBootSamples"> /// Number of boot samples from the beginning of all samples. /// The purpose of the boot samples is to ensure that the states of the neurons in the reservoir /// depend only on the time series data and not on the initial state of the neurons in the reservoir. /// The number of boot samples depends on the size and configuration of the reservoirs. /// It is usually sufficient to set the number of boot samples equal to the number of neurons in the largest reservoir. /// </param> /// <param name="informativeCallback"> /// Function to be called after each processed input. /// </param> /// <param name="userObject"> /// The user object to be passed to informativeCallback. /// </param> public RegressionStageInput PrepareRegressionStageInput(TimeSeriesBundle dataSet, int numOfBootSamples, PredictorsCollectionCallbackDelegate informativeCallback = null, Object userObject = null ) { if (_settings.TaskType != CommonEnums.TaskType.Prediction) { throw new Exception("This version of PrepareRegressionStageInput function is useable only for the prediction task type."); } int dataSetLength = dataSet.InputVectorCollection.Count; //RegressionStageInput allocation RegressionStageInput rsi = new RegressionStageInput(); rsi.PredictorsCollection = new List <double[]>(dataSetLength - numOfBootSamples); rsi.IdealOutputsCollection = new List <double[]>(dataSetLength - numOfBootSamples); //Reset the internal states and statistics Reset(true); //Collection for (int dataSetIdx = 0; dataSetIdx < dataSetLength; dataSetIdx++) { bool afterBoot = (dataSetIdx >= numOfBootSamples); //Push input data into the network double[] predictors = PushInput(dataSet.InputVectorCollection[dataSetIdx], afterBoot); //Is boot sequence passed? Collect predictors? if (afterBoot) { //YES rsi.PredictorsCollection.Add(predictors); //Desired outputs rsi.IdealOutputsCollection.Add(dataSet.OutputVectorCollection[dataSetIdx]); } //An informative callback if (informativeCallback != null) { informativeCallback(dataSetLength, dataSetIdx + 1, userObject); } } //Collect reservoirs statistics rsi.ReservoirStatCollection = CollectReservoirInstancesStatatistics(); return(rsi); }
/// <summary> /// Prepares input for regression stage of State Machine training. /// All input vectors are processed by internal reservoirs and the corresponding network predictors are recorded. /// </summary> /// <param name="dataSet"> /// The bundle containing known sample input and desired output vectors (in time order) /// </param> /// <param name="informativeCallback"> /// Function to be called after each processed input. /// </param> /// <param name="userObject"> /// The user object to be passed to informativeCallback. /// </param> public RegressionStageInput PrepareRegressionStageInput(TimeSeriesBundle dataSet, PredictorsCollectionCallbackDelegate informativeCallback = null, Object userObject = null ) { if (_settings.InputConfig.FeedingType == CommonEnums.InputFeedingType.Patterned) { throw new Exception("This version of PrepareRegressionStageInput function is not useable for patterned input feeding."); } int dataSetLength = dataSet.InputVectorCollection.Count; //RegressionStageInput allocation RegressionStageInput rsi = new RegressionStageInput { PredictorsCollection = new List <double[]>(dataSetLength - _settings.InputConfig.BootCycles), IdealOutputsCollection = new List <double[]>(dataSetLength - _settings.InputConfig.BootCycles) }; //Reset the internal states and statistics Reset(true); //Collection for (int dataSetIdx = 0; dataSetIdx < dataSetLength; dataSetIdx++) { bool afterBoot = (dataSetIdx >= _settings.InputConfig.BootCycles); //Push input data into the network double[] predictors = PushInput(dataSet.InputVectorCollection[dataSetIdx], afterBoot); //Is boot sequence passed? Collect predictors? if (afterBoot) { //YES rsi.PredictorsCollection.Add(predictors); //Desired outputs rsi.IdealOutputsCollection.Add(dataSet.OutputVectorCollection[dataSetIdx]); } //An informative callback informativeCallback?.Invoke(dataSetLength, dataSetIdx + 1, userObject); } //Collect reservoirs statistics rsi.ReservoirStatCollection = CollectReservoirInstancesStatatistics(); return(rsi); }
/// <summary> /// Performs specified demo case. /// Loads and prepares sample data, trains State Machine and displayes results /// </summary> /// <param name="log">Into this interface are written output messages</param> /// <param name="demoCaseParams">An instance of DemoSettings.CaseSettings to be performed</param> public static void PerformDemoCase(IOutputLog log, DemoSettings.CaseSettings demoCaseParams) { //For demo purposes is allowed only the normalization range (-1, 1) Interval normRange = new Interval(-1, 1); log.Write(" Performing demo case " + demoCaseParams.Name, false); //Bundle normalizer object BundleNormalizer bundleNormalizer = null; //Prediction input vector (relevant only for time series prediction task) double[] predictionInputVector = null; //Instantiate an State Machine StateMachine stateMachine = new StateMachine(demoCaseParams.stateMachineCfg, normRange); //Prepare regression stage input object log.Write(" ", false); StateMachine.RegressionStageInput rsi = null; if (demoCaseParams.stateMachineCfg.TaskType == CommonEnums.TaskType.Prediction) { //Time series prediction task //Load data bundle from csv file TimeSeriesBundle data = TimeSeriesDataLoader.Load(demoCaseParams.FileName, demoCaseParams.stateMachineCfg.InputFieldNameCollection, demoCaseParams.stateMachineCfg.ReadoutLayerConfig.OutputFieldNameCollection, normRange, demoCaseParams.NormalizerReserveRatio, true, demoCaseParams.SingleNormalizer, out bundleNormalizer, out predictionInputVector ); rsi = stateMachine.PrepareRegressionStageInput(data, demoCaseParams.NumOfBootSamples, PredictorsCollectionCallback, log); } else { //Classification or hybrid task //Load data bundle from csv file PatternBundle data = PatternDataLoader.Load(demoCaseParams.stateMachineCfg.TaskType == CommonEnums.TaskType.Classification, demoCaseParams.FileName, demoCaseParams.stateMachineCfg.InputFieldNameCollection, demoCaseParams.stateMachineCfg.ReadoutLayerConfig.OutputFieldNameCollection, normRange, demoCaseParams.NormalizerReserveRatio, true, out bundleNormalizer ); rsi = stateMachine.PrepareRegressionStageInput(data, PredictorsCollectionCallback, log); } //Report reservoirs statistics ReportReservoirsStatistics(rsi.ReservoirStatCollection, log); //Regression stage log.Write(" Regression stage", false); //Training - State Machine regression stage ValidationBundle vb = stateMachine.RegressionStage(rsi, RegressionControl, log); //Perform prediction if the task type is Prediction double[] predictionOutputVector = null; if (demoCaseParams.stateMachineCfg.TaskType == CommonEnums.TaskType.Prediction) { predictionOutputVector = stateMachine.Compute(predictionInputVector); //Values are normalized so they have to be denormalized bundleNormalizer.NaturalizeOutputVector(predictionOutputVector); } //Display results //Report training (regression) results and prediction log.Write(" Results", false); List <ReadoutLayer.ClusterErrStatistics> clusterErrStatisticsCollection = stateMachine.ClusterErrStatisticsCollection; //Classification results for (int outputIdx = 0; outputIdx < demoCaseParams.stateMachineCfg.ReadoutLayerConfig.OutputFieldNameCollection.Count; outputIdx++) { ReadoutLayer.ClusterErrStatistics ces = clusterErrStatisticsCollection[outputIdx]; if (demoCaseParams.stateMachineCfg.TaskType == CommonEnums.TaskType.Classification) { //Classification task report log.Write(" OutputField: " + demoCaseParams.stateMachineCfg.ReadoutLayerConfig.OutputFieldNameCollection[outputIdx], false); log.Write(" Num of bin 0 samples: " + ces.BinaryErrStat.BinValErrStat[0].NumOfSamples.ToString(), false); log.Write(" Bad bin 0 classif.: " + ces.BinaryErrStat.BinValErrStat[0].Sum.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 0 error rate: " + ces.BinaryErrStat.BinValErrStat[0].ArithAvg.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 0 accuracy: " + (1 - ces.BinaryErrStat.BinValErrStat[0].ArithAvg).ToString(CultureInfo.InvariantCulture), false); log.Write(" Num of bin 1 samples: " + ces.BinaryErrStat.BinValErrStat[1].NumOfSamples.ToString(), false); log.Write(" Bad bin 1 classif.: " + ces.BinaryErrStat.BinValErrStat[1].Sum.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 1 error rate: " + ces.BinaryErrStat.BinValErrStat[1].ArithAvg.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 1 accuracy: " + (1 - ces.BinaryErrStat.BinValErrStat[1].ArithAvg).ToString(CultureInfo.InvariantCulture), false); log.Write(" Total num of samples: " + ces.BinaryErrStat.TotalErrStat.NumOfSamples.ToString(), false); log.Write(" Total bad classif.: " + ces.BinaryErrStat.TotalErrStat.Sum.ToString(CultureInfo.InvariantCulture), false); log.Write(" Total error rate: " + ces.BinaryErrStat.TotalErrStat.ArithAvg.ToString(CultureInfo.InvariantCulture), false); log.Write(" Total accuracy: " + (1 - ces.BinaryErrStat.TotalErrStat.ArithAvg).ToString(CultureInfo.InvariantCulture), false); } else { //Prediction task report log.Write(" OutputField: " + demoCaseParams.stateMachineCfg.ReadoutLayerConfig.OutputFieldNameCollection[outputIdx], false); log.Write(" Predicted next value: " + predictionOutputVector[outputIdx].ToString(CultureInfo.InvariantCulture), false); log.Write(" Total num of samples: " + ces.PrecissionErrStat.NumOfSamples.ToString(), false); log.Write(" Total Max Real Err: " + (bundleNormalizer.OutputFieldNormalizerRefCollection[outputIdx].ComputeNaturalSpan(ces.PrecissionErrStat.Max)).ToString(CultureInfo.InvariantCulture), false); log.Write(" Total Avg Real Err: " + (bundleNormalizer.OutputFieldNormalizerRefCollection[outputIdx].ComputeNaturalSpan(ces.PrecissionErrStat.ArithAvg)).ToString(CultureInfo.InvariantCulture), false); } log.Write(" ", false); } log.Write(" ", false); return; }
/// <summary> /// Performs one demo case. /// Loads and prepares sample data, trains State Machine and displayes results /// </summary> /// <param name="log">Into this interface are written output messages</param> /// <param name="demoCaseParams">An instance of DemoSettings.CaseSettings to be performed</param> public static void PerformDemoCase(IOutputLog log, DemoSettings.CaseSettings demoCaseParams) { log.Write(" Performing demo case " + demoCaseParams.Name, false); //Bundle normalizer object BundleNormalizer bundleNormalizer = null; //Prediction input vector (relevant only for input continuous feeding) double[] predictionInputVector = null; //Instantiate the State Machine StateMachine stateMachine = new StateMachine(demoCaseParams.StateMachineCfg); //Prepare input object for regression stage log.Write(" ", false); StateMachine.RegressionStageInput rsi = null; List <string> outputFieldNameCollection = (from rus in demoCaseParams.StateMachineCfg.ReadoutLayerConfig.ReadoutUnitCfgCollection select rus.Name).ToList(); List <CommonEnums.TaskType> outputFieldTaskCollection = (from rus in demoCaseParams.StateMachineCfg.ReadoutLayerConfig.ReadoutUnitCfgCollection select rus.TaskType).ToList(); if (demoCaseParams.StateMachineCfg.InputConfig.FeedingType == CommonEnums.InputFeedingType.Continuous) { //Continuous input feeding //Load data bundle from csv file TimeSeriesBundle data = TimeSeriesBundle.LoadFromCsv(demoCaseParams.FileName, demoCaseParams.StateMachineCfg.InputConfig.ExternalFieldNameCollection(), outputFieldNameCollection, outputFieldTaskCollection, StateMachine.DataRange, demoCaseParams.NormalizerReserveRatio, true, out bundleNormalizer, out predictionInputVector ); rsi = stateMachine.PrepareRegressionStageInput(data, PredictorsCollectionCallback, log); } else { //Patterned input feeding //Load data bundle from csv file PatternBundle data = PatternBundle.LoadFromCsv(demoCaseParams.FileName, demoCaseParams.StateMachineCfg.InputConfig.ExternalFieldNameCollection(), outputFieldNameCollection, outputFieldTaskCollection, StateMachine.DataRange, demoCaseParams.NormalizerReserveRatio, true, out bundleNormalizer ); rsi = stateMachine.PrepareRegressionStageInput(data, PredictorsCollectionCallback, log); } //Report statistics of the State Machine's reservoirs ReportReservoirsStatistics(rsi.ReservoirStatCollection, log); //Regression stage log.Write(" Regression stage", false); //Perform the regression ValidationBundle vb = stateMachine.RegressionStage(rsi, RegressionControl, log); //Perform prediction if the input feeding is continuous (we know the input but we don't know the ideal output) double[] predictionOutputVector = null; if (demoCaseParams.StateMachineCfg.InputConfig.FeedingType == CommonEnums.InputFeedingType.Continuous) { predictionOutputVector = stateMachine.Compute(predictionInputVector); //Values are normalized so they have to be denormalized bundleNormalizer.NaturalizeOutputVector(predictionOutputVector); } //Display results //Report training (regression) results and prediction log.Write(" Results", false); List <ReadoutLayer.ClusterErrStatistics> clusterErrStatisticsCollection = stateMachine.ClusterErrStatisticsCollection; //Results for (int outputIdx = 0; outputIdx < demoCaseParams.StateMachineCfg.ReadoutLayerConfig.ReadoutUnitCfgCollection.Count; outputIdx++) { ReadoutLayer.ClusterErrStatistics ces = clusterErrStatisticsCollection[outputIdx]; if (demoCaseParams.StateMachineCfg.ReadoutLayerConfig.ReadoutUnitCfgCollection[outputIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task report log.Write(" OutputField: " + demoCaseParams.StateMachineCfg.ReadoutLayerConfig.ReadoutUnitCfgCollection[outputIdx].Name, false); log.Write(" Num of bin 0 samples: " + ces.BinaryErrStat.BinValErrStat[0].NumOfSamples.ToString(), false); log.Write(" Bad bin 0 classif.: " + ces.BinaryErrStat.BinValErrStat[0].Sum.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 0 error rate: " + ces.BinaryErrStat.BinValErrStat[0].ArithAvg.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 0 accuracy: " + (1 - ces.BinaryErrStat.BinValErrStat[0].ArithAvg).ToString(CultureInfo.InvariantCulture), false); log.Write(" Num of bin 1 samples: " + ces.BinaryErrStat.BinValErrStat[1].NumOfSamples.ToString(), false); log.Write(" Bad bin 1 classif.: " + ces.BinaryErrStat.BinValErrStat[1].Sum.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 1 error rate: " + ces.BinaryErrStat.BinValErrStat[1].ArithAvg.ToString(CultureInfo.InvariantCulture), false); log.Write(" Bin 1 accuracy: " + (1 - ces.BinaryErrStat.BinValErrStat[1].ArithAvg).ToString(CultureInfo.InvariantCulture), false); log.Write(" Total num of samples: " + ces.BinaryErrStat.TotalErrStat.NumOfSamples.ToString(), false); log.Write(" Total bad classif.: " + ces.BinaryErrStat.TotalErrStat.Sum.ToString(CultureInfo.InvariantCulture), false); log.Write(" Total error rate: " + ces.BinaryErrStat.TotalErrStat.ArithAvg.ToString(CultureInfo.InvariantCulture), false); log.Write(" Total accuracy: " + (1 - ces.BinaryErrStat.TotalErrStat.ArithAvg).ToString(CultureInfo.InvariantCulture), false); } else { //Forecast task report log.Write(" OutputField: " + demoCaseParams.StateMachineCfg.ReadoutLayerConfig.ReadoutUnitCfgCollection[outputIdx].Name, false); log.Write(" Predicted next value: " + predictionOutputVector[outputIdx].ToString(CultureInfo.InvariantCulture), false); log.Write(" Total num of samples: " + ces.PrecissionErrStat.NumOfSamples.ToString(), false); log.Write(" Total Max Real Err: " + (bundleNormalizer.OutputFieldNormalizerRefCollection[outputIdx].ComputeNaturalSpan(ces.PrecissionErrStat.Max)).ToString(CultureInfo.InvariantCulture), false); log.Write(" Total Avg Real Err: " + (bundleNormalizer.OutputFieldNormalizerRefCollection[outputIdx].ComputeNaturalSpan(ces.PrecissionErrStat.ArithAvg)).ToString(CultureInfo.InvariantCulture), false); } log.Write(" ", false); } log.Write(" ", false); return; }
/// <summary> /// Builds readout layer. /// Prepares prediction clusters containing trained readout units. /// </summary> /// <param name="predictorsCollection">Collection of predictors</param> /// <param name="idealOutputsCollection">Collection of desired outputs related to predictors</param> /// <param name="regressionController">Regression controller delegate</param> /// <param name="regressionControllerData">An user object</param> /// <returns>Returned ValidationBundle is something like a protocol. /// There is recorded fold by fold (unit by unit) predicted and corresponding ideal values. /// This is the pesimistic approach. Real results on unseen data could be better due to the clustering synergy. /// </returns> public ValidationBundle Build(List <double[]> predictorsCollection, List <double[]> idealOutputsCollection, ReadoutUnit.RegressionCallbackDelegate regressionController, Object regressionControllerData ) { //Random object Random rand = new Random(0); //Allocation of computed and ideal vectors for validation bundle List <double[]> validationComputedVectorCollection = new List <double[]>(idealOutputsCollection.Count); List <double[]> validationIdealVectorCollection = new List <double[]>(idealOutputsCollection.Count); for (int i = 0; i < idealOutputsCollection.Count; i++) { validationComputedVectorCollection.Add(new double[idealOutputsCollection[0].Length]); validationIdealVectorCollection.Add(new double[idealOutputsCollection[0].Length]); } //Test dataset size if (_settings.TestDataRatio > MaxRatioOfTestData) { throw new ArgumentException($"Test dataset size is greater than {MaxRatioOfTestData.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } int testDataSetLength = (int)Math.Round(idealOutputsCollection.Count * _settings.TestDataRatio, 0); if (testDataSetLength < MinLengthOfTestDataset) { throw new ArgumentException($"Num of test samples is less than {MinLengthOfTestDataset.ToString(CultureInfo.InvariantCulture)}", "TestDataSetSize"); } //Number of folds int numOfFolds = _settings.NumOfFolds; if (numOfFolds <= 0) { //Auto setup numOfFolds = idealOutputsCollection.Count / testDataSetLength; if (numOfFolds > MaxNumOfFolds) { numOfFolds = MaxNumOfFolds; } } //Create shuffled copy of the data TimeSeriesBundle shuffledData = new TimeSeriesBundle(predictorsCollection, idealOutputsCollection); shuffledData.Shuffle(rand); //Data inspection, preparation of datasets and training of ReadoutUnits //Clusters of readout units (one cluster for each output field) for (int clusterIdx = 0; clusterIdx < _settings.ReadoutUnitCfgCollection.Count; clusterIdx++) { _clusterCollection[clusterIdx] = new ReadoutUnit[numOfFolds]; List <double[]> idealValueCollection = new List <double[]>(idealOutputsCollection.Count); BinDistribution refBinDistr = null; if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr = new BinDistribution(_dataRange.Mid); } //Transformation to a single value vectors and data analysis foreach (double[] idealVector in shuffledData.OutputVectorCollection) { double[] value = new double[1]; value[0] = idealVector[clusterIdx]; idealValueCollection.Add(value); if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Reference binary distribution is relevant only for classification task refBinDistr.Update(value); } } List <TimeSeriesBundle> subBundleCollection = null; //Datasets preparation is depending on the task type if (_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType == CommonEnums.TaskType.Classification) { //Classification task subBundleCollection = DivideSamplesForClassificationTask(shuffledData.InputVectorCollection, idealValueCollection, refBinDistr, testDataSetLength ); } else { //Forecast task subBundleCollection = DivideSamplesForForecastTask(shuffledData.InputVectorCollection, idealValueCollection, testDataSetLength ); } //Best predicting unit per each fold in the cluster. ClusterErrStatistics ces = new ClusterErrStatistics(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, numOfFolds, refBinDistr); int arrayPos = 0; for (int foldIdx = 0; foldIdx < numOfFolds; foldIdx++) { //Build training samples List <double[]> trainingPredictorsCollection = new List <double[]>(); List <double[]> trainingIdealValueCollection = new List <double[]>(); for (int bundleIdx = 0; bundleIdx < subBundleCollection.Count; bundleIdx++) { if (bundleIdx != foldIdx) { trainingPredictorsCollection.AddRange(subBundleCollection[bundleIdx].InputVectorCollection); trainingIdealValueCollection.AddRange(subBundleCollection[bundleIdx].OutputVectorCollection); } } //Call training regression to get the best fold's readout unit. //The best unit becomes to be the predicting cluster member. _clusterCollection[clusterIdx][foldIdx] = ReadoutUnit.CreateTrained(_settings.ReadoutUnitCfgCollection[clusterIdx].TaskType, clusterIdx, foldIdx + 1, numOfFolds, refBinDistr, trainingPredictorsCollection, trainingIdealValueCollection, subBundleCollection[foldIdx].InputVectorCollection, subBundleCollection[foldIdx].OutputVectorCollection, rand, _settings.ReadoutUnitCfgCollection[clusterIdx], regressionController, regressionControllerData ); //Cluster error statistics & data for validation bundle (pesimistic approach) for (int sampleIdx = 0; sampleIdx < subBundleCollection[foldIdx].OutputVectorCollection.Count; sampleIdx++) { double value = _clusterCollection[clusterIdx][foldIdx].Network.Compute(subBundleCollection[foldIdx].InputVectorCollection[sampleIdx])[0]; ces.Update(value, subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]); validationIdealVectorCollection[arrayPos][clusterIdx] = subBundleCollection[foldIdx].OutputVectorCollection[sampleIdx][0]; validationComputedVectorCollection[arrayPos][clusterIdx] = value; ++arrayPos; } } //foldIdx _clusterErrStatisticsCollection.Add(ces); } //clusterIdx //Validation bundle is returned. return(new ValidationBundle(validationComputedVectorCollection, validationIdealVectorCollection)); }
private List <TimeSeriesBundle> DivideSamplesForClassificationTask(List <double[]> predictorsCollection, List <double[]> idealValueCollection, BinDistribution refBinDistr, int bundleSize ) { int numOfBundles = idealValueCollection.Count / bundleSize; List <TimeSeriesBundle> bundleCollection = new List <TimeSeriesBundle>(numOfBundles); //Scan int[] bin0SampleIdxs = new int[refBinDistr.NumOf[0]]; int bin0SamplesPos = 0; int[] bin1SampleIdxs = new int[refBinDistr.NumOf[1]]; int bin1SamplesPos = 0; for (int i = 0; i < idealValueCollection.Count; i++) { if (idealValueCollection[i][0] >= refBinDistr.BinBorder) { bin1SampleIdxs[bin1SamplesPos++] = i; } else { bin0SampleIdxs[bin0SamplesPos++] = i; } } //Division int bundleBin0Count = Math.Max(1, refBinDistr.NumOf[0] / numOfBundles); int bundleBin1Count = Math.Max(1, refBinDistr.NumOf[1] / numOfBundles); if (bundleBin0Count * numOfBundles > bin0SampleIdxs.Length) { throw new Exception("Insufficient bin 0 samples"); } if (bundleBin1Count * numOfBundles > bin1SampleIdxs.Length) { throw new Exception("Insufficient bin 1 samples"); } //Bundles creation bin0SamplesPos = 0; bin1SamplesPos = 0; for (int bundleNum = 0; bundleNum < numOfBundles; bundleNum++) { TimeSeriesBundle bundle = new TimeSeriesBundle(); //Bin 0 for (int i = 0; i < bundleBin0Count; i++) { bundle.InputVectorCollection.Add(predictorsCollection[bin0SampleIdxs[bin0SamplesPos]]); bundle.OutputVectorCollection.Add(idealValueCollection[bin0SampleIdxs[bin0SamplesPos]]); ++bin0SamplesPos; } //Bin 1 for (int i = 0; i < bundleBin1Count; i++) { bundle.InputVectorCollection.Add(predictorsCollection[bin1SampleIdxs[bin1SamplesPos]]); bundle.OutputVectorCollection.Add(idealValueCollection[bin1SampleIdxs[bin1SamplesPos]]); ++bin1SamplesPos; } bundleCollection.Add(bundle); } //Remaining samples for (int i = 0; i < bin0SampleIdxs.Length - bin0SamplesPos; i++) { int bundleIdx = i % bundleCollection.Count; bundleCollection[bundleIdx].InputVectorCollection.Add(predictorsCollection[bin0SampleIdxs[bin0SamplesPos + i]]); bundleCollection[bundleIdx].OutputVectorCollection.Add(idealValueCollection[bin0SampleIdxs[bin0SamplesPos + i]]); } for (int i = 0; i < bin1SampleIdxs.Length - bin1SamplesPos; i++) { int bundleIdx = i % bundleCollection.Count; bundleCollection[bundleIdx].InputVectorCollection.Add(predictorsCollection[bin1SampleIdxs[bin1SamplesPos + i]]); bundleCollection[bundleIdx].OutputVectorCollection.Add(idealValueCollection[bin1SampleIdxs[bin1SamplesPos + i]]); } return(bundleCollection); }