/// <summary> /// The main program entry point. /// </summary> /// <param name="args">The command line arguments.</param> static void Main(string[] args) { // create a machine learning context var context = new MLContext(); // load data Console.WriteLine("Loading data...."); var columnDef = new TextLoader.Column[] { new TextLoader.Column(nameof(Digit.PixelValues), DataKind.Single, 1, 784), new TextLoader.Column("Number", DataKind.Single, 0) }; var trainDataView = context.Data.LoadFromTextFile( path: trainDataPath, columns: columnDef, hasHeader: true, separatorChar: ','); var testDataView = context.Data.LoadFromTextFile( path: testDataPath, columns: columnDef, hasHeader: true, separatorChar: ','); // build a training pipeline // step 1: map the number column to a key value and store in the label column var pipeline = context.Transforms.Conversion.MapValueToKey( outputColumnName: "Label", inputColumnName: "Number", keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue) // step 2: concatenate all feature columns .Append(context.Transforms.Concatenate( "Features", nameof(Digit.PixelValues))) // step 3: cache data to speed up training .AppendCacheCheckpoint(context) // step 4: train the model with SDCA .Append(context.MulticlassClassification.Trainers.SdcaMaximumEntropy( labelColumnName: "Label", featureColumnName: "Features")) // step 5: map the label key value back to a number .Append(context.Transforms.Conversion.MapKeyToValue( outputColumnName: "Number", inputColumnName: "Label")); // train the model Console.WriteLine("Training model...."); var model = pipeline.Fit(trainDataView); // use the model to make predictions on the test data Console.WriteLine("Evaluating model...."); var predictions = model.Transform(testDataView); // evaluate the predictions var metrics = context.MulticlassClassification.Evaluate( data: predictions, labelColumnName: "Number", scoreColumnName: "Score"); // show evaluation metrics Console.WriteLine($"Evaluation metrics"); Console.WriteLine($" MicroAccuracy: {metrics.MicroAccuracy:0.###}"); Console.WriteLine($" MacroAccuracy: {metrics.MacroAccuracy:0.###}"); Console.WriteLine($" LogLoss: {metrics.LogLoss:#.###}"); Console.WriteLine($" LogLossReduction: {metrics.LogLossReduction:#.###}"); Console.WriteLine(); // grab three digits from the test data var digits = context.Data.CreateEnumerable <Digit>(testDataView, reuseRowObject: false).ToArray(); var testDigits = new Digit[] { digits[5], digits[16], digits[28], digits[63], digits[129] }; // create a prediction engine var engine = context.Model.CreatePredictionEngine <Digit, DigitPrediction>(model); // set up a table to show the predictions var table = new Table(TableConfiguration.Unicode()); table.AddColumn("Digit"); for (var i = 0; i < 10; i++) { table.AddColumn($"P{i}"); } // predict each test digit for (var i = 0; i < testDigits.Length; i++) { var prediction = engine.Predict(testDigits[i]); table.AddRow( testDigits[i].Number, prediction.Score[0].ToString("P2"), prediction.Score[1].ToString("P2"), prediction.Score[2].ToString("P2"), prediction.Score[3].ToString("P2"), prediction.Score[4].ToString("P2"), prediction.Score[5].ToString("P2"), prediction.Score[6].ToString("P2"), prediction.Score[7].ToString("P2"), prediction.Score[8].ToString("P2"), prediction.Score[9].ToString("P2")); } // show results Console.WriteLine(table.ToString()); Console.ReadKey(); }
/// <summary> /// The main program entry point. /// </summary> /// <param name="args">The command line arguments.</param> static void Main(string[] args) { // create a machine learning context var context = new MLContext(); // load data Console.WriteLine("Loading data...."); var dataView = context.Data.LoadFromTextFile( path: dataPath, columns: new[] { new TextLoader.Column(nameof(Digit.PixelValues), DataKind.Single, 1, 784), new TextLoader.Column("Number", DataKind.Single, 0) }, hasHeader: false, separatorChar: ','); // split data into a training and test set var partitions = context.MulticlassClassification.TrainTestSplit(dataView, testFraction: 0.2); // build a training pipeline // step 1: concatenate all feature columns var pipeline = context.Transforms.Concatenate( DefaultColumnNames.Features, nameof(Digit.PixelValues)) // step 2: cache data to speed up training .AppendCacheCheckpoint(context) // step 3: train the model with SDCA .Append(context.MulticlassClassification.Trainers.StochasticDualCoordinateAscent( labelColumnName: "Number", featureColumnName: DefaultColumnNames.Features)); // train the model Console.WriteLine("Training model...."); var model = pipeline.Fit(partitions.TrainSet); // use the model to make predictions on the test data Console.WriteLine("Evaluating model...."); var predictions = model.Transform(partitions.TestSet); // evaluate the predictions var metrics = context.MulticlassClassification.Evaluate( data: predictions, label: "Number", score: DefaultColumnNames.Score); // show evaluation metrics Console.WriteLine($"Evaluation metrics"); Console.WriteLine($" MicroAccuracy: {metrics.AccuracyMicro:0.###}"); Console.WriteLine($" MacroAccuracy: {metrics.AccuracyMacro:0.###}"); Console.WriteLine($" LogLoss: {metrics.LogLoss:#.###}"); Console.WriteLine($" LogLossReduction: {metrics.LogLossReduction:#.###}"); Console.WriteLine(); // grab three digits from the data: 2, 7, and 9 var digits = context.Data.CreateEnumerable <Digit>(dataView, reuseRowObject: false).ToArray(); var testDigits = new Digit[] { digits[5], digits[12], digits[20] }; // create a prediction engine var engine = model.CreatePredictionEngine <Digit, DigitPrediction>(context); // predict each test digit for (var i = 0; i < testDigits.Length; i++) { var prediction = engine.Predict(testDigits[i]); // show results Console.WriteLine($"Predicting test digit {i}..."); for (var j = 0; j < 10; j++) { Console.WriteLine($" {j}: {prediction.Score[j]:P2}"); } Console.WriteLine(); } Console.ReadKey(); }