static VersatileMLDataSet CreateDataset() { var dataset = new VersatileMLDataSet(new CSVDataSource("survey_processed_results.csv", true, CSVFormat.English)); ColumnDefinition outputColumnDefinition = null; // reading in the pre-processed CSV using (TextReader textReader = File.OpenText("survey_processed_results.csv")) { var headers = textReader.ReadLine(); var splitHeaders = headers.Replace("\"", "").Split(','); for (var i = 0; i < splitHeaders.Length; i++) { var header = splitHeaders[i]; if (header.Contains("Id")) { dataset.DefineSourceColumn(header, i, ColumnType.Ignore); continue; } if (header.Contains("Salary")) { outputColumnDefinition = dataset.DefineSourceColumn(header, i, ColumnType.Continuous); continue; } if (header.Contains("Years")) { var yearsColumn = dataset.DefineSourceColumn(header, i, ColumnType.Ordinal); yearsColumn.DefineClass(new[] { "ZeroToTwo", "ThreeToFive", "SixToEight", "NineToEleven", "TwelveToFourteen", "FifteenToSeventeen", "EighteenToTwenty", "TwentyOneToTwentyThree", "TwentyFourToTwentySix", "TwentySevenToTwentyNine", "ThirtyPlus" }); continue; } // all other columns are booleans - most of which are a result // of one-hot encoding. var booleanColumn = dataset.DefineSourceColumn(header, i, ColumnType.Ordinal); booleanColumn.DefineClass(new[] { "False", "True" }); } } dataset.DefineSingleOutputOthersInput(outputColumnDefinition); dataset.Analyze(); return(dataset); }
public void traning(string traningexamplespath) { IVersatileDataSource source = new CSVDataSource(traningexamplespath, false, CSVFormat.DecimalPoint); var data = new VersatileMLDataSet(source); data.DefineSourceColumn("num1", 0, ColumnType.Continuous); data.DefineSourceColumn("num2", 1, ColumnType.Continuous); data.DefineSourceColumn("num1", 2, ColumnType.Continuous); data.DefineSourceColumn("num1", 3, ColumnType.Continuous); data.DefineSourceColumn("num1", 4, ColumnType.Continuous); data.DefineSourceColumn("num1", 5, ColumnType.Continuous); data.DefineSourceColumn("num1", 6, ColumnType.Continuous); data.DefineSourceColumn("num1", 7, ColumnType.Continuous); data.DefineSourceColumn("num1", 8, ColumnType.Continuous); data.DefineSourceColumn("num1", 9, ColumnType.Continuous); data.DefineSourceColumn("num1", 10, ColumnType.Continuous); data.DefineSourceColumn("num1", 11, ColumnType.Continuous); ColumnDefinition outputColumn = data.DefineSourceColumn("kind", 12, ColumnType.Nominal); data.Analyze(); data.DefineSingleOutputOthersInput(outputColumn); var model = new EncogModel(data); model.SelectMethod(data, MLMethodFactory.TypeFeedforward); // Send any output to the console. model.Report = new ConsoleStatusReportable(); // Now normalize the data. Encog will automatically determine the correct normalization // type based on the model you chose in the last step. data.Normalize(); model.HoldBackValidation(0.3, true, 1001); // Choose whatever is the default training type for this model. model.SelectTrainingType(data); // Use a 5-fold cross-validated train. Return the best method found. bestMethod = (IMLRegression)model.Crossvalidate(5, true); //Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset)); //Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset)); // Display our normalization parameters. helper = data.NormHelper; // Console.WriteLine(helper.ToString()); // Display the final model. //Console.WriteLine(@"Final model: " + bestMethod); source.Close(); saveNetwork("save.eg"); savehelper("helper.hp"); //EncogFramework.Instance.Shutdown(); }
/// <summary> /// Program entry point. /// </summary> /// <param name="app">Holds arguments and other info.</param> public void Execute(IExampleInterface app) { // Download the data that we will attempt to model. string irisFile = DownloadData(app.Args); // Define the format of the data file. // This area will change, depending on the columns and // format of the file that you are trying to model. IVersatileDataSource source = new CSVDataSource(irisFile, false, CSVFormat.DecimalPoint); var data = new VersatileMLDataSet(source); data.DefineSourceColumn("sepal-length", 0, ColumnType.Continuous); data.DefineSourceColumn("sepal-width", 1, ColumnType.Continuous); data.DefineSourceColumn("petal-length", 2, ColumnType.Continuous); data.DefineSourceColumn("petal-width", 3, ColumnType.Continuous); // Define the column that we are trying to predict. ColumnDefinition outputColumn = data.DefineSourceColumn("species", 4, ColumnType.Nominal); // Analyze the data, determine the min/max/mean/sd of every column. data.Analyze(); // Map the prediction column to the output of the model, and all // other columns to the input. data.DefineSingleOutputOthersInput(outputColumn); // Create feedforward neural network as the model type. MLMethodFactory.TYPE_FEEDFORWARD. // You could also other model types, such as: // MLMethodFactory.SVM: Support Vector Machine (SVM) // MLMethodFactory.TYPE_RBFNETWORK: RBF Neural Network // MLMethodFactor.TYPE_NEAT: NEAT Neural Network // MLMethodFactor.TYPE_PNN: Probabilistic Neural Network var model = new EncogModel(data); model.SelectMethod(data, MLMethodFactory.TypeFeedforward); // Send any output to the console. model.Report = new ConsoleStatusReportable(); // Now normalize the data. Encog will automatically determine the correct normalization // type based on the model you chose in the last step. data.Normalize(); // Hold back some data for a final validation. // Shuffle the data into a random ordering. // Use a seed of 1001 so that we always use the same holdback and will get more consistent results. model.HoldBackValidation(0.3, true, 1001); // Choose whatever is the default training type for this model. model.SelectTrainingType(data); // Use a 5-fold cross-validated train. Return the best method found. var bestMethod = (IMLRegression)model.Crossvalidate(5, true); // Display the training and validation errors. Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset)); Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset)); // Display our normalization parameters. NormalizationHelper helper = data.NormHelper; Console.WriteLine(helper.ToString()); // Display the final model. Console.WriteLine(@"Final model: " + bestMethod); // Loop over the entire, original, dataset and feed it through the model. // This also shows how you would process new data, that was not part of your // training set. You do not need to retrain, simply use the NormalizationHelper // class. After you train, you can save the NormalizationHelper to later // normalize and denormalize your data. source.Close(); var csv = new ReadCSV(irisFile, false, CSVFormat.DecimalPoint); var line = new String[4]; IMLData input = helper.AllocateInputVector(); while (csv.Next()) { var result = new StringBuilder(); line[0] = csv.Get(0); line[1] = csv.Get(1); line[2] = csv.Get(2); line[3] = csv.Get(3); String correct = csv.Get(4); helper.NormalizeInputVector(line, ((BasicMLData)input).Data, false); IMLData output = bestMethod.Compute(input); String irisChosen = helper.DenormalizeOutputVectorToString(output)[0]; result.Append(line); result.Append(" -> predicted: "); result.Append(irisChosen); result.Append("(correct: "); result.Append(correct); result.Append(")"); Console.WriteLine(result.ToString()); } csv.Close(); // Delete data file ande shut down. File.Delete(irisFile); EncogFramework.Instance.Shutdown(); }