Exemple #1
0
        static void Main(string[] args)
        {
            var format = new CSVFormat('.', ',');
            IVersatileDataSource source = new CSVDataSource("input.csv", true, format);

            var data = new VersatileMLDataSet(source);
            data.NormHelper.Format = format;

            for (int i = 0; i < 228; i++)
            {
                data.DefineSourceColumn("a" + (i + 1), i, ColumnType.Continuous);
            }

            ColumnDefinition columnOutput = data.DefineSourceColumn("close", 228, ColumnType.Continuous);

            data.Analyze();

            data.DefineSingleOutputOthersInput(columnOutput);

            var model = new EncogModel(data);
            model.SelectMethod(data, MLMethodFactory.TypeFeedforward);
            model.Report = new ConsoleStatusReportable();

            data.LeadWindowSize = 1;
            data.LagWindowSize = 2;

            data.Normalize();

            model.HoldBackValidation(0.3, true, 1001);

            model.SelectTrainingType(data);

            // Use a 5-fold cross-validated train.  Return the best method found.
            var bestMethod = (IMLRegression)model.Crossvalidate(5, true);

            // Display the training and validation errors.
            Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset));
            Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset));

            // Display our normalization parameters.
            NormalizationHelper helper = data.NormHelper;
            Console.WriteLine(helper.ToString());

            // Display the final model.
            Console.WriteLine(@"Final model: " + bestMethod);
        }
        /// <summary>
        ///     Program entry point.
        /// </summary>
        /// <param name="app">Holds arguments and other info.</param>
        public void Execute(IExampleInterface app)
        {
            // Download the data that we will attempt to model.
            string filename = DownloadData(app.Args);

            // Define the format of the data file.
            // This area will change, depending on the columns and 
            // format of the file that you are trying to model.
            var format = new CSVFormat('.', ' '); // decimal point and space separated
            IVersatileDataSource source = new CSVDataSource(filename, false, format);

            var data = new VersatileMLDataSet(source);
            data.NormHelper.Format = format;

            ColumnDefinition columnMPG = data.DefineSourceColumn("mpg", 0, ColumnType.Continuous);
            ColumnDefinition columnCylinders = data.DefineSourceColumn("cylinders", 1, ColumnType.Ordinal);
            // It is very important to predefine ordinals, so that the order is known.
            columnCylinders.DefineClass(new[] {"3", "4", "5", "6", "8"});
            data.DefineSourceColumn("displacement", 2, ColumnType.Continuous);
            ColumnDefinition columnHorsePower = data.DefineSourceColumn("horsepower", 3, ColumnType.Continuous);
            data.DefineSourceColumn("weight", 4, ColumnType.Continuous);
            data.DefineSourceColumn("acceleration", 5, ColumnType.Continuous);
            ColumnDefinition columnModelYear = data.DefineSourceColumn("model_year", 6, ColumnType.Ordinal);
            columnModelYear.DefineClass(new[]
            {"70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82"});
            data.DefineSourceColumn("origin", 7, ColumnType.Nominal);

            // Define how missing values are represented.
            data.NormHelper.DefineUnknownValue("?");
            data.NormHelper.DefineMissingHandler(columnHorsePower, new MeanMissingHandler());

            // Analyze the data, determine the min/max/mean/sd of every column.
            data.Analyze();

            // Map the prediction column to the output of the model, and all
            // other columns to the input.
            data.DefineSingleOutputOthersInput(columnMPG);

            // Create feedforward neural network as the model type. MLMethodFactory.TYPE_FEEDFORWARD.
            // You could also other model types, such as:
            // MLMethodFactory.SVM:  Support Vector Machine (SVM)
            // MLMethodFactory.TYPE_RBFNETWORK: RBF Neural Network
            // MLMethodFactor.TYPE_NEAT: NEAT Neural Network
            // MLMethodFactor.TYPE_PNN: Probabilistic Neural Network
            var model = new EncogModel(data);
            model.SelectMethod(data, MLMethodFactory.TypeFeedforward);

            // Send any output to the console.
            model.Report = new ConsoleStatusReportable();

            // Now normalize the data.  Encog will automatically determine the correct normalization
            // type based on the model you chose in the last step.
            data.Normalize();

            // Hold back some data for a final validation.
            // Shuffle the data into a random ordering.
            // Use a seed of 1001 so that we always use the same holdback and will get more consistent results.
            model.HoldBackValidation(0.3, true, 1001);

            // Choose whatever is the default training type for this model.
            model.SelectTrainingType(data);

            // Use a 5-fold cross-validated train.  Return the best method found.
            var bestMethod = (IMLRegression) model.Crossvalidate(5, true);

            // Display the training and validation errors.
            Console.WriteLine(@"Training error: " + model.CalculateError(bestMethod, model.TrainingDataset));
            Console.WriteLine(@"Validation error: " + model.CalculateError(bestMethod, model.ValidationDataset));

            // Display our normalization parameters.
            NormalizationHelper helper = data.NormHelper;
            Console.WriteLine(helper.ToString());

            // Display the final model.
            Console.WriteLine("Final model: " + bestMethod);

            // Loop over the entire, original, dataset and feed it through the model.
            // This also shows how you would process new data, that was not part of your
            // training set.  You do not need to retrain, simply use the NormalizationHelper
            // class.  After you train, you can save the NormalizationHelper to later
            // normalize and denormalize your data.
            source.Close();
            var csv = new ReadCSV(filename, false, format);
            var line = new String[7];
            IMLData input = helper.AllocateInputVector();

            while (csv.Next())
            {
                var result = new StringBuilder();

                line[0] = csv.Get(1);
                line[1] = csv.Get(2);
                line[2] = csv.Get(3);
                line[3] = csv.Get(4);
                line[4] = csv.Get(5);
                line[5] = csv.Get(6);
                line[6] = csv.Get(7);

                String correct = csv.Get(0);
                helper.NormalizeInputVector(line, ((BasicMLData) input).Data, false);
                IMLData output = bestMethod.Compute(input);
                String irisChosen = helper.DenormalizeOutputVectorToString(output)[0];

                result.Append(line);
                result.Append(" -> predicted: ");
                result.Append(irisChosen);
                result.Append("(correct: ");
                result.Append(correct);
                result.Append(")");

                Console.WriteLine(result.ToString());
            }
            csv.Close();

            // Delete data file and shut down.
            File.Delete(filename);
            EncogFramework.Instance.Shutdown();
        }
        /// <summary>
        ///     Program entry point.
        /// </summary>
        /// <param name="app">Holds arguments and other info.</param>
        public void Execute(IExampleInterface app)
        {
            ErrorCalculation.Mode = ErrorCalculationMode.RMS;
            // Download the data that we will attempt to model.
            string filename = DownloadData(app.Args);

            // Define the format of the data file.
            // This area will change, depending on the columns and
            // format of the file that you are trying to model.
            var format = new CSVFormat('.', ' '); // decimal point and
            // space separated
            IVersatileDataSource source = new CSVDataSource(filename, true,
                format);

            var data = new VersatileMLDataSet(source);
            data.NormHelper.Format = format;

            ColumnDefinition columnSSN = data.DefineSourceColumn("SSN",
                ColumnType.Continuous);
            ColumnDefinition columnDEV = data.DefineSourceColumn("DEV",
                ColumnType.Continuous);

            // Analyze the data, determine the min/max/mean/sd of every column.
            data.Analyze();

            // Use SSN & DEV to predict SSN. For time-series it is okay to have
            // SSN both as
            // an input and an output.
            data.DefineInput(columnSSN);
            data.DefineInput(columnDEV);
            data.DefineOutput(columnSSN);

            // Create feedforward neural network as the model type.
            // MLMethodFactory.TYPE_FEEDFORWARD.
            // You could also other model types, such as:
            // MLMethodFactory.SVM: Support Vector Machine (SVM)
            // MLMethodFactory.TYPE_RBFNETWORK: RBF Neural Network
            // MLMethodFactor.TYPE_NEAT: NEAT Neural Network
            // MLMethodFactor.TYPE_PNN: Probabilistic Neural Network
            var model = new EncogModel(data);
            model.SelectMethod(data, MLMethodFactory.TypeFeedforward);

            // Send any output to the console.
            model.Report = new ConsoleStatusReportable();

            // Now normalize the data. Encog will automatically determine the
            // correct normalization
            // type based on the model you chose in the last step.
            data.Normalize();

            // Set time series.
            data.LeadWindowSize = 1;
            data.LagWindowSize = WindowSize;

            // Hold back some data for a final validation.
            // Do not shuffle the data into a random ordering. (never shuffle
            // time series)
            // Use a seed of 1001 so that we always use the same holdback and
            // will get more consistent results.
            model.HoldBackValidation(0.3, false, 1001);

            // Choose whatever is the default training type for this model.
            model.SelectTrainingType(data);

            // Use a 5-fold cross-validated train. Return the best method found.
            // (never shuffle time series)
            var bestMethod = (IMLRegression) model.Crossvalidate(5,
                false);

            // Display the training and validation errors.
            Console.WriteLine(@"Training error: "
                              + model.CalculateError(bestMethod,
                                  model.TrainingDataset));
            Console.WriteLine(@"Validation error: "
                              + model.CalculateError(bestMethod,
                                  model.ValidationDataset));

            // Display our normalization parameters.
            NormalizationHelper helper = data.NormHelper;
            Console.WriteLine(helper.ToString());

            // Display the final model.
            Console.WriteLine(@"Final model: " + bestMethod);

            // Loop over the entire, original, dataset and feed it through the
            // model. This also shows how you would process new data, that was
            // not part of your training set. You do not need to retrain, simply
            // use the NormalizationHelper class. After you train, you can save
            // the NormalizationHelper to later normalize and denormalize your
            // data.
            source.Close();
            var csv = new ReadCSV(filename, true, format);
            var line = new String[2];

            // Create a vector to hold each time-slice, as we build them.
            // These will be grouped together into windows.
            var slice = new double[2];
            var window = new VectorWindow(WindowSize + 1);
            IMLData input = helper.AllocateInputVector(WindowSize + 1);

            // Only display the first 100
            int stopAfter = 100;

            while (csv.Next() && stopAfter > 0)
            {
                var result = new StringBuilder();

                line[0] = csv.Get(2); // ssn
                line[1] = csv.Get(3); // dev
                helper.NormalizeInputVector(line, slice, false);

                // enough data to build a full window?
                if (window.IsReady())
                {
                    window.CopyWindow(((BasicMLData) input).Data, 0);
                    String correct = csv.Get(2); // trying to predict SSN.
                    IMLData output = bestMethod.Compute(input);
                    String predicted = helper
                        .DenormalizeOutputVectorToString(output)[0];

                    result.Append(line);
                    result.Append(" -> predicted: ");
                    result.Append(predicted);
                    result.Append("(correct: ");
                    result.Append(correct);
                    result.Append(")");

                    Console.WriteLine(result.ToString());
                }

                // Add the normalized slice to the window. We do this just after
                // the after checking to see if the window is ready so that the
                // window is always one behind the current row. This is because
                // we are trying to predict next row.
                window.Add(slice);

                stopAfter--;
            }
            csv.Close();

            // Delete data file and shut down.
            File.Delete(filename);
            EncogFramework.Instance.Shutdown();
        }