/// <summary> /// Loads the specified file and executes the StateMachine training. /// </summary> /// <param name="stateMachine">An instance of StateMachine to be trained.</param> /// <param name="trainingDataFileName">The name of the csv file containing the training data.</param> /// <param name="predictionInputVector">The vector to be used for next prediction (relevant only in case of continuous feeding of the input).</param> protected void TrainStateMachine(StateMachine stateMachine, string trainingDataFileName, out double[] predictionInputVector) { //Register to EpochDone event stateMachine.RL.RLBuildProgressChanged += OnRLBuildProgressChanged; //Load csv data CsvDataHolder trainingCsvData = new CsvDataHolder(trainingDataFileName); //Convert csv data to VectorBundle useable for StateMachine training VectorBundle trainingData; if (stateMachine.Config.NeuralPreprocessorCfg != null) { //Neural preprocessing is enabled if (stateMachine.Config.NeuralPreprocessorCfg.InputEncoderCfg.FeedingCfg.FeedingType == InputEncoder.InputFeedingType.Continuous) { //Continuous feeding data format trainingData = VectorBundle.Load(trainingCsvData, stateMachine.Config.NeuralPreprocessorCfg.InputEncoderCfg.VaryingFieldsCfg.ExternalFieldsCfg.GetFieldNames(), stateMachine.Config.ReadoutLayerCfg.OutputFieldNameCollection, out predictionInputVector ); } else { //Patterned feeding data format predictionInputVector = null; trainingData = VectorBundle.Load(trainingCsvData, stateMachine.Config.ReadoutLayerCfg.OutputFieldNameCollection.Count ); } //Register to PreprocessingProgressChanged event stateMachine.NP.PreprocessingProgressChanged += OnPreprocessingProgressChanged; } else { //Neural preprocessing is bypassed predictionInputVector = null; trainingData = VectorBundle.Load(trainingCsvData, stateMachine.Config.ReadoutLayerCfg.OutputFieldNameCollection.Count ); } //StateMachine training StateMachine.TrainingResults trainingResults = stateMachine.Train(trainingData); _log.Write(string.Empty); //Report training results _log.Write(" Training results", false); string trainingReport = trainingResults.RegressionResults.GetTrainingResultsReport(6); _log.Write(trainingReport); _log.Write(string.Empty); //Finished return; }
/// <summary> /// Loads the specified file and executes the StateMachine verification. /// </summary> /// <param name="stateMachine">An instance of StateMachine to be verified.</param> /// <param name="verificationDataFileName">The name of the csv file containing the verification data.</param> /// <param name="omittedInputVector">Remaining input vector from training phase (relevant only in case of continuous feeding of the input).</param> /// <param name="predictionInputVector">The vector to be used for next prediction (relevant only in case of continuous feeding of the input).</param> protected void VerifyStateMachine(StateMachine stateMachine, string verificationDataFileName, double[] omittedInputVector, out double[] predictionInputVector) { //Load csv data CsvDataHolder verificationCsvData = new CsvDataHolder(verificationDataFileName); //Convert csv data to VectorBundle useable for StateMachine verification VectorBundle verificationData; //Check NeuralPreprocessor is configured if (stateMachine.Config.NeuralPreprocessorCfg != null) { //Neural preprocessing is enabled if (stateMachine.Config.NeuralPreprocessorCfg.InputEncoderCfg.FeedingCfg.FeedingType == InputEncoder.InputFeedingType.Continuous) { //Continuous input feeding //Last known input values from training (predictionInputVector) must be pushed into the reservoirs to keep time series continuity //(first input data in verification.csv is output of the last data in training.csv) double[] tmp = stateMachine.Compute(omittedInputVector, out ReadoutLayer.ReadoutData readoutData); //Load verification data and get new predictionInputVector for final prediction verificationData = VectorBundle.Load(verificationCsvData, stateMachine.Config.NeuralPreprocessorCfg.InputEncoderCfg.VaryingFieldsCfg.ExternalFieldsCfg.GetFieldNames(), stateMachine.Config.ReadoutLayerCfg.OutputFieldNameCollection, out predictionInputVector ); } else { predictionInputVector = null; //Patterned feeding data format verificationData = VectorBundle.Load(verificationCsvData, stateMachine.Config.ReadoutLayerCfg.OutputFieldNameCollection.Count); } } else { //Neural preprocessing is bypassed predictionInputVector = null; verificationData = VectorBundle.Load(verificationCsvData, stateMachine.Config.ReadoutLayerCfg.OutputFieldNameCollection.Count); } //StateMachine verification //Register to VerificationProgressChanged event stateMachine.VerificationProgressChanged += OnVerificationProgressChanged; StateMachine.VerificationResults verificationResults = stateMachine.Verify(verificationData); _log.Write(string.Empty); //Report verification results _log.Write(" Verification results", false); _log.Write(verificationResults.GetReport(6)); _log.Write(string.Empty); //Finished return; }
private void TestDataBundleFolderization(string dataFile, int numOfClasses) { //Load csv data CsvDataHolder csvData = new CsvDataHolder(dataFile); //Convert csv data to a VectorBundle VectorBundle vectorData = VectorBundle.Load(csvData, numOfClasses); double binBorder = 0.5d; double[] foldDataRatios = { -1d, 0d, 0.1d, 0.5d, 0.75d, 1d, 2d }; Console.WriteLine($"Folderization test of {dataFile}. NumOfSamples={vectorData.InputVectorCollection.Count.ToString(CultureInfo.InvariantCulture)}, NumOfFoldDataRatios={foldDataRatios.Length.ToString(CultureInfo.InvariantCulture)}"); foreach (double foldDataRatio in foldDataRatios) { Console.WriteLine($" Testing fold data ratio = {foldDataRatio.ToString(CultureInfo.InvariantCulture)}"); List <VectorBundle> folds = vectorData.Folderize(foldDataRatio, binBorder); Console.WriteLine($" Number of resulting folds = {folds.Count.ToString(CultureInfo.InvariantCulture)}"); for (int foldIdx = 0; foldIdx < folds.Count; foldIdx++) { int numOfFoldSamples = folds[foldIdx].InputVectorCollection.Count; Console.WriteLine($" FoldIdx={foldIdx.ToString(CultureInfo.InvariantCulture),-4} FoldSize={numOfFoldSamples.ToString(CultureInfo.InvariantCulture),-4}"); int[] classesBin1Counts = new int[numOfClasses]; classesBin1Counts.Populate(0); for (int sampleIdx = 0; sampleIdx < numOfFoldSamples; sampleIdx++) { for (int classIdx = 0; classIdx < numOfClasses; classIdx++) { if (folds[foldIdx].OutputVectorCollection[sampleIdx][classIdx] >= binBorder) { ++classesBin1Counts[classIdx]; } } } Console.WriteLine($" Number of positive samples per class"); for (int classIdx = 0; classIdx < numOfClasses; classIdx++) { Console.WriteLine($" ClassID={classIdx.ToString(CultureInfo.InvariantCulture),-3}, Bin1Samples={classesBin1Counts[classIdx].ToString(CultureInfo.InvariantCulture)}"); } } Console.ReadLine(); } return; }
//Methods /// <summary> /// Performs specified demo case. /// </summary> /// <param name="demoCaseParams">An instance of DemoSettings.CaseSettings to be performed</param> public void PerformDemoCase(SMDemoSettings.CaseSettings demoCaseParams) { bool continuousFeedingDataFormat = false; //Prediction input vector (relevant only for input continuous feeding) double[] predictionInputVector = null; //Log start _log.Write(" Performing demo case " + demoCaseParams.Name, false); _log.Write(" ", false); //Instantiate the StateMachine StateMachine stateMachine = new StateMachine(demoCaseParams.StateMachineCfg); ////////////////////////////////////////////////////////////////////////////////////// //Train StateMachine //Register to RegressionEpochDone event stateMachine.RL.RegressionEpochDone += OnRegressionEpochDone; StateMachine.TrainingResults trainingResults; CsvDataHolder trainingCsvData = new CsvDataHolder(demoCaseParams.TrainingDataFileName); VectorBundle trainingData; if (trainingCsvData.ColNameCollection.NumOfStringValues > 0) { //Continuous feeding data format continuousFeedingDataFormat = true; //Check NeuralPreprocessor is not bypassed if (stateMachine.NP == null) { throw new InvalidOperationException($"Incorrect file format. When NeuralPreprocessor is bypassed, only patterned data are allowed."); } trainingData = VectorBundle.Load(trainingCsvData, demoCaseParams.StateMachineCfg.NeuralPreprocessorCfg.InputEncoderCfg.VaryingFieldsCfg.ExternalFieldsCfg.GetFieldNames(), demoCaseParams.StateMachineCfg.ReadoutLayerCfg.OutputFieldNameCollection, out predictionInputVector ); } else { //Patterned feeding data format trainingData = VectorBundle.Load(trainingCsvData, demoCaseParams.StateMachineCfg.ReadoutLayerCfg.OutputFieldNameCollection.Count); } if (stateMachine.NP != null) { //Register to PreprocessingProgressChanged event stateMachine.NP.PreprocessingProgressChanged += OnPreprocessingProgressChanged; } //Training trainingResults = stateMachine.Train(trainingData); _log.Write(string.Empty); //Report training (regression) results _log.Write(" Training results", false); string trainingReport = trainingResults.RegressionResults.GetTrainingResultsReport(6); _log.Write(trainingReport); _log.Write(string.Empty); ////////////////////////////////////////////////////////////////////////////////////// //Verification of training quality on verification data if (demoCaseParams.VerificationDataFileName.Length > 0) { stateMachine.VerificationProgressChanged += OnVerificationProgressChanged; StateMachine.VerificationResults verificationResults; CsvDataHolder verificationCsvData = new CsvDataHolder(demoCaseParams.VerificationDataFileName); VectorBundle verificationData; if (continuousFeedingDataFormat) { //Continuous input feeding //Last known input values from training (predictionInputVector) must be pushed into the reservoirs to keep time series continuity //(first input data in verification.csv is output of the last data in training.csv) double[] tmp = stateMachine.Compute(predictionInputVector); //Load verification data and get new predictionInputVector for final prediction verificationData = VectorBundle.Load(verificationCsvData, demoCaseParams.StateMachineCfg.NeuralPreprocessorCfg.InputEncoderCfg.VaryingFieldsCfg.ExternalFieldsCfg.GetFieldNames(), demoCaseParams.StateMachineCfg.ReadoutLayerCfg.OutputFieldNameCollection, out predictionInputVector ); } else { //Patterned feeding data format verificationData = VectorBundle.Load(verificationCsvData, demoCaseParams.StateMachineCfg.ReadoutLayerCfg.OutputFieldNameCollection.Count); } verificationResults = stateMachine.Verify(verificationData); _log.Write(string.Empty); //Report verification results _log.Write(" Verification results", false); _log.Write(verificationResults.GetReport(6)); _log.Write(string.Empty); } //Perform prediction in case the input feeding is continuous (we know the input but we don't know the ideal output) if (continuousFeedingDataFormat) { double[] predictionOutputVector = stateMachine.Compute(predictionInputVector); string predictionReport = stateMachine.RL.GetForecastReport(predictionOutputVector, 6); _log.Write(" Forecasts", false); _log.Write(predictionReport); _log.Write(string.Empty); } return; }
/// <summary> /// Trains the network cluster to perform classification task and then verifies its performance. /// </summary> /// <param name="name">The name of a classification task.</param> /// <param name="trainDataFile">The name of a csv datafile containing the training data.</param> /// <param name="verifyDataFile">The name of a csv datafile containing the verification data.</param> /// <param name="numOfClasses">The number of classes.</param> /// <param name="foldDataRatio">Specifies what part of training data is reserved for testing. It determines the size of data fold and also number of networks within the cluster.</param> private void PerformClassification(string name, string trainDataFile, string verifyDataFile, int numOfClasses, double foldDataRatio) { _log.Write($"{name} classification performed by the Probabilistic cluster chain ({numOfClasses.ToString(CultureInfo.InvariantCulture)} classes)."); //Load csv data and create vector bundles _log.Write($"Loading {trainDataFile}..."); CsvDataHolder trainCsvData = new CsvDataHolder(trainDataFile); VectorBundle trainData = VectorBundle.Load(trainCsvData, numOfClasses); _log.Write($"Loading {verifyDataFile}..."); CsvDataHolder verifyCsvData = new CsvDataHolder(verifyDataFile); VectorBundle verifyData = VectorBundle.Load(verifyCsvData, numOfClasses); //Input data standardization //Allocation and preparation of the input feature filters FeatureFilterBase[] inputFeatureFilters = PrepareInputFeatureFilters(trainData); //Standardize training input data StandardizeInputVectors(trainData, inputFeatureFilters); //Standardize verification input data StandardizeInputVectors(verifyData, inputFeatureFilters); //Output data //Output data is already in the 0/1 form requested by the SoftMax activation so we don't //need to modify it. We only allocate the binary feature filters requested by the cluster chain builder. FeatureFilterBase[] outputFeatureFilters = new BinFeatureFilter[numOfClasses]; for (int i = 0; i < numOfClasses; i++) { outputFeatureFilters[i] = new BinFeatureFilter(Interval.IntZP1); } //Cluster chain configuration (we will have two chained clusters) //Configuration of the first cluster in the chain //End-networks configuration for the first cluster in the chain. For every testing fold will be trained two end-networks with different structure. List <FeedForwardNetworkSettings> netCfgs1 = new List <FeedForwardNetworkSettings> { //The first FF network will have two hidden layers of 30 TanH activated neurons. //Output layer will have the SoftMax activation (it must be SoftMax because we will use the Probabilistic cluster). new FeedForwardNetworkSettings(new AFAnalogSoftMaxSettings(), new HiddenLayersSettings(new HiddenLayerSettings(30, new AFAnalogTanHSettings()), new HiddenLayerSettings(30, new AFAnalogTanHSettings()) ), new RPropTrainerSettings(3, 200) ), //The second FF network will have two hidden layers of 30 LeakyReLU activated neurons. //Output layer will have the SoftMax activation (it must be SoftMax because we will use the Probabilistic cluster). new FeedForwardNetworkSettings(new AFAnalogSoftMaxSettings(), new HiddenLayersSettings(new HiddenLayerSettings(30, new AFAnalogLeakyReLUSettings()), new HiddenLayerSettings(30, new AFAnalogLeakyReLUSettings()) ), new RPropTrainerSettings(3, 200) ) }; //The first probabilistic network cluster configuration instance TNRNetClusterProbabilisticSettings clusterCfg1 = new TNRNetClusterProbabilisticSettings(new TNRNetClusterProbabilisticNetworksSettings(netCfgs1), new TNRNetClusterProbabilisticWeightsSettings() ); //Configuration of the second cluster in the chain //End-network configuration for the second cluster in the chain. For every testing fold will be trained one end-network. List <FeedForwardNetworkSettings> netCfgs2 = new List <FeedForwardNetworkSettings> { //FF network will have two hidden layers of 30 Elliot activated neurons. //Output layer will have the SoftMax activation (it must be SoftMax because we will use the Probabilistic cluster chain). new FeedForwardNetworkSettings(new AFAnalogSoftMaxSettings(), new HiddenLayersSettings(new HiddenLayerSettings(30, new AFAnalogElliotSettings()), new HiddenLayerSettings(30, new AFAnalogElliotSettings()) ), new RPropTrainerSettings(3, 200) ) }; //The second probabilistic network cluster configuration instance TNRNetClusterProbabilisticSettings clusterCfg2 = new TNRNetClusterProbabilisticSettings(new TNRNetClusterProbabilisticNetworksSettings(netCfgs2), new TNRNetClusterProbabilisticWeightsSettings() ); //Probabilistic network cluster chain configuration instance ITNRNetClusterChainSettings chainCfg = new TNRNetClusterChainProbabilisticSettings(new CrossvalidationSettings(foldDataRatio), new TNRNetClustersProbabilisticSettings(clusterCfg1, clusterCfg2 ) ); _log.Write($"Cluster configuration xml:"); _log.Write(chainCfg.GetXml(true).ToString()); //Training _log.Write($"Cluster chain training on {trainDataFile}..."); //An instance of network cluster chain builder. TNRNetClusterChainBuilder builder = new TNRNetClusterChainBuilder("Probabilistic Cluster Chain", chainCfg); //Register progress event handler builder.ChainBuildProgressChanged += OnClusterChainBuildProgressChanged; //Build the trained network cluster chain. TNRNetClusterChain trainedClusterChain = builder.Build(trainData, outputFeatureFilters); //Verification _log.Write(string.Empty); _log.Write(string.Empty); _log.Write($"Cluster chain verification on {verifyDataFile}..."); _log.Write(string.Empty); int numOfErrors = 0; for (int i = 0; i < verifyData.InputVectorCollection.Count; i++) { double[] computed = trainedClusterChain.Compute(verifyData.InputVectorCollection[i], out _); //Cluster result int computedWinnerIdx = computed.MaxIdx(); //Real result int realWinnerIdx = verifyData.OutputVectorCollection[i].MaxIdx(); if (computedWinnerIdx != realWinnerIdx) { ++numOfErrors; } _log.Write($"({i + 1}/{verifyData.InputVectorCollection.Count}) Errors: {numOfErrors}", true); } _log.Write(string.Empty); _log.Write($"Accuracy {(1d - (double)numOfErrors / (double)verifyData.InputVectorCollection.Count).ToString(CultureInfo.InvariantCulture)}"); _log.Write(string.Empty); return; }