static VersatileMLDataSet CreateDataset() { var dataset = new VersatileMLDataSet(new CSVDataSource("survey_processed_results.csv", true, CSVFormat.English)); ColumnDefinition outputColumnDefinition = null; // reading in the pre-processed CSV using (TextReader textReader = File.OpenText("survey_processed_results.csv")) { var headers = textReader.ReadLine(); var splitHeaders = headers.Replace("\"", "").Split(','); for (var i = 0; i < splitHeaders.Length; i++) { var header = splitHeaders[i]; if (header.Contains("Id")) { dataset.DefineSourceColumn(header, i, ColumnType.Ignore); continue; } if (header.Contains("Salary")) { outputColumnDefinition = dataset.DefineSourceColumn(header, i, ColumnType.Continuous); continue; } if (header.Contains("Years")) { var yearsColumn = dataset.DefineSourceColumn(header, i, ColumnType.Ordinal); yearsColumn.DefineClass(new[] { "ZeroToTwo", "ThreeToFive", "SixToEight", "NineToEleven", "TwelveToFourteen", "FifteenToSeventeen", "EighteenToTwenty", "TwentyOneToTwentyThree", "TwentyFourToTwentySix", "TwentySevenToTwentyNine", "ThirtyPlus" }); continue; } // all other columns are booleans - most of which are a result // of one-hot encoding. var booleanColumn = dataset.DefineSourceColumn(header, i, ColumnType.Ordinal); booleanColumn.DefineClass(new[] { "False", "True" }); } } dataset.DefineSingleOutputOthersInput(outputColumnDefinition); dataset.Analyze(); return(dataset); }
public void traning(string traningexamplespath) { IVersatileDataSource source = new CSVDataSource(traningexamplespath, false, CSVFormat.DecimalPoint); var data = new VersatileMLDataSet(source); data.DefineSourceColumn("num1", 0, ColumnType.Continuous); data.DefineSourceColumn("num2", 1, ColumnType.Continuous); data.DefineSourceColumn("num1", 2, ColumnType.Continuous); data.DefineSourceColumn("num1", 3, ColumnType.Continuous); data.DefineSourceColumn("num1", 4, ColumnType.Continuous); data.DefineSourceColumn("num1", 5, ColumnType.Continuous); data.DefineSourceColumn("num1", 6, ColumnType.Continuous); data.DefineSourceColumn("num1", 7, ColumnType.Continuous); data.DefineSourceColumn("num1", 8, ColumnType.Continuous); data.DefineSourceColumn("num1", 9, ColumnType.Continuous); data.DefineSourceColumn("num1", 10, ColumnType.Continuous); data.DefineSourceColumn("num1", 11, ColumnType.Continuous); ColumnDefinition outputColumn = data.DefineSourceColumn("kind", 12, ColumnType.Nominal); data.Analyze(); data.DefineSingleOutputOthersInput(outputColumn); var model = new EncogModel(data); model.SelectMethod(data, MLMethodFactory.TypeFeedforward); // Send any output to the console. model.Report = new ConsoleStatusReportable(); // Now normalize the data. Encog will automatically determine the correct normalization // type based on the model you chose in the last step. data.Normalize(); model.HoldBackValidation(0.3, true, 1001); // Choose whatever is the default training type for this model. model.SelectTrainingType(data); // Use a 5-fold cross-validated train. Return the best method found. bestMethod = (IMLRegression)model.Crossvalidate(5, true); //Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset)); //Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset)); // Display our normalization parameters. helper = data.NormHelper; // Console.WriteLine(helper.ToString()); // Display the final model. //Console.WriteLine(@"Final model: " + bestMethod); source.Close(); saveNetwork("save.eg"); savehelper("helper.hp"); //EncogFramework.Instance.Shutdown(); }
public PredictionMachine CreateAndTrainMachine() { VersatileMLDataSet trainingDataSet = CsvFileHelper.LoadDataSetFromCsv(GlobalConfig.CsvTickersFilename); #region Configure Dataset trainingDataSet.ConfigureDatacolumnsToUse( inputColumns: _predictionConfig.InputColumnNames, outputColumnName: _predictionConfig.PredictedColumnName ); //Number of Input nodes trainingDataSet.LagWindowSize = _predictionConfig.NumberOfPreviousTickers; //Number of output nodes trainingDataSet.LeadWindowSize = 1; trainingDataSet.Analyze(); //Encog does some magic here.... #endregion #region Train new neural net var trainer = new EncogModel(trainingDataSet); //Choose network structure trainer.SelectMethod(trainingDataSet, MLMethodFactory.TypeFeedforward); trainingDataSet.Normalize(); trainer.HoldBackValidation( validationPercent: 0.3, shuffle: false, seed: TOTALLY_RANDOM_NUMBER); trainer.SelectTrainingType(trainingDataSet); //More magic by encog trainer.Report = new ConsoleStatusReportable(); //Create and train the network (this is where the magic happens) var trainedNeuralNet = (IMLRegression)trainer .Crossvalidate(k: 5, shuffle: false); #endregion //Package into a prediction machine var predictionMachine = new PredictionMachine( neuralNet: trainedNeuralNet, normalizationHelper: trainingDataSet.NormHelper, config: _predictionConfig); return(predictionMachine); }
static void Main(string[] args) { var network = new BasicNetwork(); network.AddLayer(new BasicLayer(2)); network.AddLayer(new BasicLayer(3)); network.AddLayer(new BasicLayer(1)); network.Structure.FinalizeStructure(); network.Reset(); var trainingDataSource = new CSVDataSource(@"Data\training.csv", true, ','); //var validationDataSource = new CSVDataSource(@"Data\validation.csv", true, ','); var trainingSet = new VersatileMLDataSet(trainingDataSource); //var validationSet = new VersatileMLDataSet(validationDataSource); trainingSet.Analyze(); trainingSet.Normalize(); var training = new ResilientPropagation(network, trainingSet); int epoch = 1; do { training.Iteration(); Console.WriteLine($"Epoch #{epoch}. Error: {training.Error}"); epoch++; }while (training.Error > 0.01); training.FinishTraining(); Console.WriteLine("Neural Network Results:"); foreach (var pair in trainingSet) { var output = network.Compute(pair.Input); Console.WriteLine($"{pair.Input[0]},{pair.Input[1]}, actual={output[0]}, ideal={pair.Ideal}"); } EncogFramework.Instance.Shutdown(); }
/// <summary> /// Program entry point. /// </summary> /// <param name="app">Holds arguments and other info.</param> public void Execute(IExampleInterface app) { ErrorCalculation.Mode = ErrorCalculationMode.RMS; // Download the data that we will attempt to model. string filename = DownloadData(app.Args); // Define the format of the data file. // This area will change, depending on the columns and // format of the file that you are trying to model. var format = new CSVFormat('.', ' '); // decimal point and // space separated IVersatileDataSource source = new CSVDataSource(filename, true, format); var data = new VersatileMLDataSet(source); data.NormHelper.Format = format; ColumnDefinition columnSSN = data.DefineSourceColumn("SSN", ColumnType.Continuous); ColumnDefinition columnDEV = data.DefineSourceColumn("DEV", ColumnType.Continuous); // Analyze the data, determine the min/max/mean/sd of every column. data.Analyze(); // Use SSN & DEV to predict SSN. For time-series it is okay to have // SSN both as // an input and an output. data.DefineInput(columnSSN); data.DefineInput(columnDEV); data.DefineOutput(columnSSN); // Create feedforward neural network as the model type. // MLMethodFactory.TYPE_FEEDFORWARD. // You could also other model types, such as: // MLMethodFactory.SVM: Support Vector Machine (SVM) // MLMethodFactory.TYPE_RBFNETWORK: RBF Neural Network // MLMethodFactor.TYPE_NEAT: NEAT Neural Network // MLMethodFactor.TYPE_PNN: Probabilistic Neural Network var model = new EncogModel(data); model.SelectMethod(data, MLMethodFactory.TypeFeedforward); // Send any output to the console. model.Report = new ConsoleStatusReportable(); // Now normalize the data. Encog will automatically determine the // correct normalization // type based on the model you chose in the last step. data.Normalize(); // Set time series. data.LeadWindowSize = 1; data.LagWindowSize = WindowSize; // Hold back some data for a final validation. // Do not shuffle the data into a random ordering. (never shuffle // time series) // Use a seed of 1001 so that we always use the same holdback and // will get more consistent results. model.HoldBackValidation(0.3, false, 1001); // Choose whatever is the default training type for this model. model.SelectTrainingType(data); // Use a 5-fold cross-validated train. Return the best method found. // (never shuffle time series) var bestMethod = (IMLRegression)model.Crossvalidate(5, false); // Display the training and validation errors. Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset)); Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset)); // Display our normalization parameters. NormalizationHelper helper = data.NormHelper; Console.WriteLine(helper.ToString()); // Display the final model. Console.WriteLine(@"Final model: " + bestMethod); // Loop over the entire, original, dataset and feed it through the // model. This also shows how you would process new data, that was // not part of your training set. You do not need to retrain, simply // use the NormalizationHelper class. After you train, you can save // the NormalizationHelper to later normalize and denormalize your // data. source.Close(); var csv = new ReadCSV(filename, true, format); var line = new String[2]; // Create a vector to hold each time-slice, as we build them. // These will be grouped together into windows. var slice = new double[2]; var window = new VectorWindow(WindowSize + 1); IMLData input = helper.AllocateInputVector(WindowSize + 1); // Only display the first 100 int stopAfter = 100; while (csv.Next() && stopAfter > 0) { var result = new StringBuilder(); line[0] = csv.Get(2); // ssn line[1] = csv.Get(3); // dev helper.NormalizeInputVector(line, slice, false); // enough data to build a full window? if (window.IsReady()) { window.CopyWindow(((BasicMLData)input).Data, 0); String correct = csv.Get(2); // trying to predict SSN. IMLData output = bestMethod.Compute(input); String predicted = helper .DenormalizeOutputVectorToString(output)[0]; result.Append(line); result.Append(" -> predicted: "); result.Append(predicted); result.Append("(correct: "); result.Append(correct); result.Append(")"); Console.WriteLine(result.ToString()); } // Add the normalized slice to the window. We do this just after // the after checking to see if the window is ready so that the // window is always one behind the current row. This is because // we are trying to predict next row. window.Add(slice); stopAfter--; } csv.Close(); // Delete data file and shut down. File.Delete(filename); EncogFramework.Instance.Shutdown(); }
/// <summary> /// Program entry point. /// </summary> /// <param name="app">Holds arguments and other info.</param> public void Execute(IExampleInterface app) { // Download the data that we will attempt to model. string irisFile = DownloadData(app.Args); // Define the format of the data file. // This area will change, depending on the columns and // format of the file that you are trying to model. IVersatileDataSource source = new CSVDataSource(irisFile, false, CSVFormat.DecimalPoint); var data = new VersatileMLDataSet(source); data.DefineSourceColumn("sepal-length", 0, ColumnType.Continuous); data.DefineSourceColumn("sepal-width", 1, ColumnType.Continuous); data.DefineSourceColumn("petal-length", 2, ColumnType.Continuous); data.DefineSourceColumn("petal-width", 3, ColumnType.Continuous); // Define the column that we are trying to predict. ColumnDefinition outputColumn = data.DefineSourceColumn("species", 4, ColumnType.Nominal); // Analyze the data, determine the min/max/mean/sd of every column. data.Analyze(); // Map the prediction column to the output of the model, and all // other columns to the input. data.DefineSingleOutputOthersInput(outputColumn); // Create feedforward neural network as the model type. MLMethodFactory.TYPE_FEEDFORWARD. // You could also other model types, such as: // MLMethodFactory.SVM: Support Vector Machine (SVM) // MLMethodFactory.TYPE_RBFNETWORK: RBF Neural Network // MLMethodFactor.TYPE_NEAT: NEAT Neural Network // MLMethodFactor.TYPE_PNN: Probabilistic Neural Network var model = new EncogModel(data); model.SelectMethod(data, MLMethodFactory.TypeFeedforward); // Send any output to the console. model.Report = new ConsoleStatusReportable(); // Now normalize the data. Encog will automatically determine the correct normalization // type based on the model you chose in the last step. data.Normalize(); // Hold back some data for a final validation. // Shuffle the data into a random ordering. // Use a seed of 1001 so that we always use the same holdback and will get more consistent results. model.HoldBackValidation(0.3, true, 1001); // Choose whatever is the default training type for this model. model.SelectTrainingType(data); // Use a 5-fold cross-validated train. Return the best method found. var bestMethod = (IMLRegression)model.Crossvalidate(5, true); // Display the training and validation errors. Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset)); Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset)); // Display our normalization parameters. NormalizationHelper helper = data.NormHelper; Console.WriteLine(helper.ToString()); // Display the final model. Console.WriteLine(@"Final model: " + bestMethod); // Loop over the entire, original, dataset and feed it through the model. // This also shows how you would process new data, that was not part of your // training set. You do not need to retrain, simply use the NormalizationHelper // class. After you train, you can save the NormalizationHelper to later // normalize and denormalize your data. source.Close(); var csv = new ReadCSV(irisFile, false, CSVFormat.DecimalPoint); var line = new String[4]; IMLData input = helper.AllocateInputVector(); while (csv.Next()) { var result = new StringBuilder(); line[0] = csv.Get(0); line[1] = csv.Get(1); line[2] = csv.Get(2); line[3] = csv.Get(3); String correct = csv.Get(4); helper.NormalizeInputVector(line, ((BasicMLData)input).Data, false); IMLData output = bestMethod.Compute(input); String irisChosen = helper.DenormalizeOutputVectorToString(output)[0]; result.Append(line); result.Append(" -> predicted: "); result.Append(irisChosen); result.Append("(correct: "); result.Append(correct); result.Append(")"); Console.WriteLine(result.ToString()); } csv.Close(); // Delete data file ande shut down. File.Delete(irisFile); EncogFramework.Instance.Shutdown(); }