Ejemplo n.º 1
0
        /// <summary>
        /// The main program entry point.
        /// </summary>
        /// <param name="args">The command line arguments.</param>
        static void Main(string[] args)
        {
            // create a machine learning context
            var context = new MLContext();

            // load data
            Console.WriteLine("Loading data....");
            var columnDef = new TextLoader.Column[]
            {
                new TextLoader.Column(nameof(Digit.PixelValues), DataKind.Single, 1, 784),
                new TextLoader.Column("Number", DataKind.Single, 0)
            };
            var trainDataView = context.Data.LoadFromTextFile(
                path: trainDataPath,
                columns: columnDef,
                hasHeader: true,
                separatorChar: ',');
            var testDataView = context.Data.LoadFromTextFile(
                path: testDataPath,
                columns: columnDef,
                hasHeader: true,
                separatorChar: ',');


            // build a training pipeline
            // step 1: map the number column to a key value and store in the label column
            var pipeline = context.Transforms.Conversion.MapValueToKey(
                outputColumnName: "Label",
                inputColumnName: "Number",
                keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)

                           // step 2: concatenate all feature columns
                           .Append(context.Transforms.Concatenate(
                                       "Features",
                                       nameof(Digit.PixelValues)))

                           // step 3: cache data to speed up training
                           .AppendCacheCheckpoint(context)

                           // step 4: train the model with SDCA
                           .Append(context.MulticlassClassification.Trainers.SdcaMaximumEntropy(
                                       labelColumnName: "Label",
                                       featureColumnName: "Features"))

                           // step 5: map the label key value back to a number
                           .Append(context.Transforms.Conversion.MapKeyToValue(
                                       outputColumnName: "Number",
                                       inputColumnName: "Label"));

            // train the model
            Console.WriteLine("Training model....");
            var model = pipeline.Fit(trainDataView);

            // use the model to make predictions on the test data
            Console.WriteLine("Evaluating model....");
            var predictions = model.Transform(testDataView);

            // evaluate the predictions
            var metrics = context.MulticlassClassification.Evaluate(
                data: predictions,
                labelColumnName: "Number",
                scoreColumnName: "Score");

            // show evaluation metrics
            Console.WriteLine($"Evaluation metrics");
            Console.WriteLine($"    MicroAccuracy:    {metrics.MicroAccuracy:0.###}");
            Console.WriteLine($"    MacroAccuracy:    {metrics.MacroAccuracy:0.###}");
            Console.WriteLine($"    LogLoss:          {metrics.LogLoss:#.###}");
            Console.WriteLine($"    LogLossReduction: {metrics.LogLossReduction:#.###}");
            Console.WriteLine();

            // grab three digits from the test data
            var digits     = context.Data.CreateEnumerable <Digit>(testDataView, reuseRowObject: false).ToArray();
            var testDigits = new Digit[] { digits[5], digits[16], digits[28], digits[63], digits[129] };

            // create a prediction engine
            var engine = context.Model.CreatePredictionEngine <Digit, DigitPrediction>(model);

            // set up a table to show the predictions
            var table = new Table(TableConfiguration.Unicode());

            table.AddColumn("Digit");
            for (var i = 0; i < 10; i++)
            {
                table.AddColumn($"P{i}");
            }

            // predict each test digit
            for (var i = 0; i < testDigits.Length; i++)
            {
                var prediction = engine.Predict(testDigits[i]);
                table.AddRow(
                    testDigits[i].Number,
                    prediction.Score[0].ToString("P2"),
                    prediction.Score[1].ToString("P2"),
                    prediction.Score[2].ToString("P2"),
                    prediction.Score[3].ToString("P2"),
                    prediction.Score[4].ToString("P2"),
                    prediction.Score[5].ToString("P2"),
                    prediction.Score[6].ToString("P2"),
                    prediction.Score[7].ToString("P2"),
                    prediction.Score[8].ToString("P2"),
                    prediction.Score[9].ToString("P2"));
            }

            // show results
            Console.WriteLine(table.ToString());
            Console.ReadKey();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// The main program entry point.
        /// </summary>
        /// <param name="args">The command line arguments.</param>
        static void Main(string[] args)
        {
            // create a machine learning context
            var context = new MLContext();

            // load data
            Console.WriteLine("Loading data....");
            var dataView = context.Data.LoadFromTextFile(
                path: dataPath,
                columns: new[]
            {
                new TextLoader.Column(nameof(Digit.PixelValues), DataKind.Single, 1, 784),
                new TextLoader.Column("Number", DataKind.Single, 0)
            },
                hasHeader: false,
                separatorChar: ',');

            // split data into a training and test set
            var partitions = context.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.2);

            // build a training pipeline
            // step 1: concatenate all feature columns
            var pipeline = context.Transforms.Concatenate(
                DefaultColumnNames.Features,
                nameof(Digit.PixelValues))

                           // step 2: cache data to speed up training
                           .AppendCacheCheckpoint(context)

                           // step 3: train the model with SDCA
                           .Append(context.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
                                       labelColumnName: "Number",
                                       featureColumnName: DefaultColumnNames.Features));

            // train the model
            Console.WriteLine("Training model....");
            var model = pipeline.Fit(partitions.TrainSet);

            // use the model to make predictions on the test data
            Console.WriteLine("Evaluating model....");
            var predictions = model.Transform(partitions.TestSet);

            // evaluate the predictions
            var metrics = context.MulticlassClassification.Evaluate(
                data: predictions,
                label: "Number",
                score: DefaultColumnNames.Score);

            // show evaluation metrics
            Console.WriteLine($"Evaluation metrics");
            Console.WriteLine($"    MicroAccuracy:    {metrics.AccuracyMicro:0.###}");
            Console.WriteLine($"    MacroAccuracy:    {metrics.AccuracyMacro:0.###}");
            Console.WriteLine($"    LogLoss:          {metrics.LogLoss:#.###}");
            Console.WriteLine($"    LogLossReduction: {metrics.LogLossReduction:#.###}");
            Console.WriteLine();

            // grab three digits from the data: 2, 7, and 9
            var digits     = context.Data.CreateEnumerable <Digit>(dataView, reuseRowObject: false).ToArray();
            var testDigits = new Digit[] { digits[5], digits[12], digits[20] };

            // create a prediction engine
            var engine = model.CreatePredictionEngine <Digit, DigitPrediction>(context);

            // predict each test digit
            for (var i = 0; i < testDigits.Length; i++)
            {
                var prediction = engine.Predict(testDigits[i]);

                // show results
                Console.WriteLine($"Predicting test digit {i}...");
                for (var j = 0; j < 10; j++)
                {
                    Console.WriteLine($"  {j}: {prediction.Score[j]:P2}");
                }
                Console.WriteLine();
            }
            Console.ReadKey();
        }