Ejemplo n.º 1
0
        internal static MulticlassClassificationResult AutoFit(this MulticlassClassificationContext context,
                                                               IDataView trainData,
                                                               string label,
                                                               IDataView validationData            = null,
                                                               InferredColumn[] inferredColumns    = null,
                                                               AutoFitSettings settings            = null,
                                                               CancellationToken cancellationToken = default,
                                                               IProgress <MulticlassClassificationIterationResult> iterationCallback = null, IDebugLogger debugLogger = null)
        {
            // run autofit & get all pipelines run in that process
            var(allPipelines, bestPipeline) = AutoFitApi.Fit(trainData, validationData, label, inferredColumns,
                                                             settings, TaskKind.MulticlassClassification, OptimizingMetric.Accuracy, debugLogger);

            var results = new MulticlassClassificationIterationResult[allPipelines.Length];

            for (var i = 0; i < results.Length; i++)
            {
                var iterationResult = allPipelines[i];
                var result          = new MulticlassClassificationIterationResult(iterationResult.Model, (MultiClassClassifierMetrics)iterationResult.EvaluatedMetrics, iterationResult.ScoredValidationData);
                results[i] = result;
            }
            var bestResult = new MulticlassClassificationIterationResult(bestPipeline.Model, (MultiClassClassifierMetrics)bestPipeline.EvaluatedMetrics, bestPipeline.ScoredValidationData);

            return(new MulticlassClassificationResult(bestResult, results));
        }
Ejemplo n.º 2
0
        static void Main(string[] args)
        {
            var dataPath = "AgeRangeData.csv";
            var env      = new LocalEnvironment();
            var reader   = TextLoader.CreateReader(env, ctx => (
                                                       Age: ctx.LoadFloat(1),
                                                       Label: ctx.LoadText(3)),
                                                   separator: ',', hasHeader: true);
            var trainData = reader.Read(new MultiFileSource(dataPath));

            var classification   = new MulticlassClassificationContext(env);
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (
                                               r.Label,
                                               Predictions: classification.Trainers.Sdca
                                                   (label: r.Label.ToKey(),
                                                   features: r.Age.AsVector())))
                                   .Append(r => r.Predictions.predictedLabel.ToValue());

            var model = learningPipeline.Fit(trainData);

            var predictionFunc = model.AsDynamic.MakePredictionFunction <AgeRangeNewApi, AgeRangePredictionNewApi>(env);

            var example = new AgeRangeNewApi()
            {
                Age    = 6,
                Name   = "John",
                Gender = "M"
            };
            var prediction = predictionFunc.Predict(example);

            Console.WriteLine("prediction: " + prediction.PredictedLabel);
            Console.ReadLine();
        }
        public void BuildAndTrain()
        {
            var featurizerModelLocation = inputModelLocation;

            ConsoleWriteHeader("Read model");
            Console.WriteLine($"Model location: {featurizerModelLocation}");
            Console.WriteLine($"Images folder: {imagesFolder}");
            Console.WriteLine($"Training file: {dataLocation}");
            Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}");



            var loader = new TextLoader(env,
                                        new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("ImagePath", DataKind.Text, 0),
                    new TextLoader.Column("Label", DataKind.Text, 1)
                }
            });



            var pipeline = new ValueToKeyMappingEstimator(env, "Label", "LabelTokey")
                           .Append(new ImageLoadingEstimator(env, imagesFolder, ("ImagePath", "ImageReal")))
                           .Append(new ImageResizingEstimator(env, "ImageReal", "ImageReal", ImageNetSettings.imageHeight, ImageNetSettings.imageWidth))
                           .Append(new ImagePixelExtractingEstimator(env, new[] { new ImagePixelExtractorTransform.ColumnInfo("ImageReal", "input", interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean) }))
                           .Append(new TensorFlowEstimator(env, featurizerModelLocation, new[] { "input" }, new[] { "softmax2_pre_activation" }))
                           .Append(new SdcaMultiClassTrainer(env, "softmax2_pre_activation", "LabelTokey"))
                           .Append(new KeyToValueEstimator(env, ("PredictedLabel", "PredictedLabelValue")));

            // Train the pipeline
            ConsoleWriteHeader("Training classification model");
            var data  = loader.Read(new MultiFileSource(dataLocation));
            var model = pipeline.Fit(data);

            // Process the training data through the model
            // This is an optional step, but it's useful for debugging issues
            var trainData = model.Transform(data);
            var loadedModelOutputColumnNames = trainData.Schema.GetColumnNames();
            var trainData2 = trainData.AsEnumerable <ImageNetPipeline>(env, false, true).ToList();

            trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max()));

            // Get some performance metric on the model using training data
            var sdcaContext = new MulticlassClassificationContext(env);

            ConsoleWriteHeader("Classification metrics");
            var metrics = sdcaContext.Evaluate(trainData, label: "LabelTokey", predictedLabel: "PredictedLabel");

            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");

            // Save the model to assets/outputs
            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(outputModelLocation);
            using (var f = new FileStream(outputModelLocation, FileMode.Create))
                model.SaveTo(env, f);
            Console.WriteLine($"Model saved: {outputModelLocation}");
        }
Ejemplo n.º 4
0
        public void SdcaMulticlass()
        {
            var env        = new ConsoleEnvironment(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.iris.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx    = new MulticlassClassificationContext(env);
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadText(0), features: c.LoadFloat(1, 4)));

            MulticlassLogisticRegressionPredictor pred = null;

            var loss = new HingeLoss(new HingeLoss.Arguments()
            {
                Margin = 1
            });

            // With a custom loss function we no longer get calibrated predictions.
            var est = reader.MakeNewEstimator()
                      .Append(r => (label: r.label.ToKey(), r.features))
                      .Append(r => (r.label, preds: ctx.Trainers.Sdca(
                                        r.label,
                                        r.features,
                                        maxIterations: 2,
                                        loss: loss, onFit: p => pred = p)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            VBuffer <float>[] weights = default;
            pred.GetWeights(ref weights, out int n);
            Assert.True(n == 3 && n == weights.Length);
            foreach (var w in weights)
            {
                Assert.True(w.Length == 4);
            }

            var biases = pred.GetBiases();

            Assert.True(biases.Count() == 3);

            var data = model.Read(dataSource);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.ColumnCount; ++c)
            {
                Console.WriteLine($"{schema.GetColumnName(c)}, {schema.GetColumnType(c)}");
            }

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds, 2);

            Assert.True(metrics.LogLoss > 0);
            Assert.True(metrics.TopKAccuracy > 0);
        }
Ejemplo n.º 5
0
        private void CrossValidationOn(string dataPath)
        {
            // Create a new environment for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var env = new LocalEnvironment();

            // We know that this is a classification task, so we create a multiclass classification context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var classification = new MulticlassClassificationContext(env);

            // Step one: read the data as an IDataView.
            // First, we define the reader: specify the data columns and where to find them in the text file.
            var reader = TextLoader.CreateReader(env, ctx => (
                                                     // The four features of the Iris dataset.
                                                     SepalLength: ctx.LoadFloat(0),
                                                     SepalWidth: ctx.LoadFloat(1),
                                                     PetalLength: ctx.LoadFloat(2),
                                                     PetalWidth: ctx.LoadFloat(3),
                                                     // Label: kind of iris.
                                                     Label: ctx.LoadText(4)
                                                     ),
                                                 // Default separator is tab, but the dataset has comma.
                                                 separator: ',');

            // Read the data.
            var data = reader.Read(new MultiFileSource(dataPath));

            // Build the training pipeline.
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (
                                               // Convert string label to a key.
                                               Label: r.Label.ToKey(),
                                               // Concatenate all the features together into one column 'Features'.
                                               Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth)))
                                   .Append(r => (
                                               r.Label,
                                               // Train the multi-class SDCA model to predict the label using features.
                                               Predictions: classification.Trainers.Sdca(r.Label, r.Features)));

            // Split the data 90:10 into train and test sets, train and evaluate.
            var(trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.1);

            // Train the model.
            var model = learningPipeline.Fit(trainData);
            // Compute quality metrics on the test set.
            var metrics = classification.Evaluate(model.Transform(testData), r => r.Label, r => r.Predictions);

            Console.WriteLine(metrics.AccuracyMicro);

            // Now run the 5-fold cross-validation experiment, using the same pipeline.
            var cvResults = classification.CrossValidate(data, learningPipeline, r => r.Label, numFolds: 5);

            // The results object is an array of 5 elements. For each of the 5 folds, we have metrics, model and scored test data.
            // Let's compute the average micro-accuracy.
            var microAccuracies = cvResults.Select(r => r.metrics.AccuracyMicro);

            Console.WriteLine(microAccuracies.Average());
        }
Ejemplo n.º 6
0
        public static void Train(MLContext env)
        {
            try
            {
                var classification = new MulticlassClassificationContext(env);

                // STEP 1: Common data loading configuration
                var reader = env.Data.CreateTextReader(
                    new TextLoader.Arguments()
                {
                    Column = new[]
                    {
                        new TextLoader.Column("PixelValues", DataKind.R4, 0, 63),
                        new TextLoader.Column("Number", DataKind.R4, 64)
                    },
                    Separator = ",",
                    HasHeader = false
                });

                var data     = reader.Read(TrainDataPath);
                var testData = reader.Read(ValidationDataPath);

                // STEP 2: Common data process configuration with pipeline data transformations
                var dataProcessPipeline = env.Transforms.Concatenate("Features", "PixelValues").AppendCacheCheckpoint(env);

                // STEP 3: Set the training algorithm, then create and config the modelBuilder
                var trainer          = env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Number", featureColumn: "Features");
                var trainingPipeline = dataProcessPipeline.Append(trainer);

                // STEP 4: Train the model fitting to the DataSet
                var watch = System.Diagnostics.Stopwatch.StartNew();
                Console.WriteLine("=============== Training the model ===============");

                ITransformer trainedModel = trainingPipeline.Fit(data);
                long         elapsedMs    = watch.ElapsedMilliseconds;
                Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****");

                Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
                var predictions = trainedModel.Transform(testData);
                var metrics     = env.MulticlassClassification.Evaluate(predictions, "Number", "Score");

                Common.ConsoleHelper.PrintMultiClassClassificationMetrics(trainer.ToString(), metrics);

                using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
                    env.Model.Save(trainedModel, fs);

                Console.WriteLine("The model is saved to {0}", ModelPath);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                //return null;
            }
        }
Ejemplo n.º 7
0
 public static MulticlassClassificationResult AutoFit(this MulticlassClassificationContext context,
                                                      IDataView trainData,
                                                      string label,
                                                      IDataView validationData            = null,
                                                      InferredColumn[] inferredColumns    = null,
                                                      AutoFitSettings settings            = null,
                                                      CancellationToken cancellationToken = default,
                                                      IProgress <MulticlassClassificationIterationResult> iterationCallback = null)
 {
     return(AutoFit(context, trainData, label, validationData, inferredColumns, settings,
                    cancellationToken, iterationCallback, null));
 }
Ejemplo n.º 8
0
        public void BuildAndTrain()
        {
            var featurizerModelLocation = inputModelLocation;

            ConsoleWriteHeader("Read model");
            Console.WriteLine($"Model location: {featurizerModelLocation}");
            Console.WriteLine($"Images folder: {imagesFolder}");
            Console.WriteLine($"Training file: {dataLocation}");
            Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}");



            var data = mlContext.Data.ReadFromTextFile <ImageNetData>(dataLocation, hasHeader: true);

            var pipeline = mlContext.Transforms.Conversion.MapValueToKey("Label", "LabelTokey")
                           .Append(mlContext.Transforms.LoadImages(imagesFolder, ("ImagePath", "ImageReal")))
                           .Append(mlContext.Transforms.Resize("ImageReal", "ImageReal", ImageNetSettings.imageHeight, ImageNetSettings.imageWidth))
                           .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransform.ColumnInfo("ImageReal", "input", interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean)))
                           .Append(mlContext.Transforms.ScoreTensorFlowModel(featurizerModelLocation, new[] { "input" }, new[] { "softmax2_pre_activation" }))
                           .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression("LabelTokey", "softmax2_pre_activation"))
                           .Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "PredictedLabelValue")));

            // Train the pipeline
            ConsoleWriteHeader("Training classification model");
            var model = pipeline.Fit(data);

            // Process the training data through the model
            // This is an optional step, but it's useful for debugging issues
            var trainData = model.Transform(data);
            var loadedModelOutputColumnNames = trainData.Schema
                                               .Where(col => !col.IsHidden).Select(col => col.Name);
            var trainData2 = trainData.AsEnumerable <ImageNetPipeline>(mlContext, false, true).ToList();

            trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max()));

            // Get some performance metric on the model using training data
            var sdcaContext = new MulticlassClassificationContext(mlContext);

            ConsoleWriteHeader("Classification metrics");
            var metrics = sdcaContext.Evaluate(trainData, label: "LabelTokey", predictedLabel: "PredictedLabel");

            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");

            // Save the model to assets/outputs
            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(outputModelLocation);
            using (var f = new FileStream(outputModelLocation, FileMode.Create))
                mlContext.Model.Save(model, f);

            Console.WriteLine($"Model saved: {outputModelLocation}");
        }
Ejemplo n.º 9
0
        public void MultiClassNaiveBayesTrainer()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.iris.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx    = new MulticlassClassificationContext(env);
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadText(0), features: c.LoadFloat(1, 4)));

            MultiClassNaiveBayesPredictor pred = null;

            // With a custom loss function we no longer get calibrated predictions.
            var est = reader.MakeNewEstimator()
                      .Append(r => (label: r.label.ToKey(), r.features))
                      .Append(r => (r.label, preds: ctx.Trainers.MultiClassNaiveBayesTrainer(
                                        r.label,
                                        r.features, onFit: p => pred = p)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            int[]   labelHistogram   = default;
            int[][] featureHistogram = default;
            pred.GetLabelHistogram(ref labelHistogram, out int labelCount1);
            pred.GetFeatureHistogram(ref featureHistogram, out int labelCount2, out int featureCount);
            Assert.True(labelCount1 == 3 && labelCount1 == labelCount2 && labelCount1 <= labelHistogram.Length);
            for (int i = 0; i < labelCount1; i++)
            {
                Assert.True(featureCount == 4 && (featureCount <= featureHistogram[i].Length));
            }

            var data = model.Read(dataSource);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.Count; ++c)
            {
                Console.WriteLine($"{schema[c].Name}, {schema[c].Type}");
            }

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds, 2);

            Assert.True(metrics.LogLoss > 0);
            Assert.True(metrics.TopKAccuracy > 0);
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Create the ML context.
        /// </summary>
        /// <param name="seed">Random seed. Set to <c>null</c> for a non-deterministic environment.</param>
        /// <param name="conc">Concurrency level. Set to 1 to run single-threaded. Set to 0 to pick automatically.</param>
        public MLContext(int?seed = null, int conc = 0)
        {
            _env = new LocalEnvironment(seed, conc, MakeCompositionContainer);
            _env.AddListener(ProcessMessage);

            BinaryClassification     = new BinaryClassificationContext(_env);
            MulticlassClassification = new MulticlassClassificationContext(_env);
            Regression = new RegressionContext(_env);
            Clustering = new ClusteringContext(_env);
            Ranking    = new RankingContext(_env);
            Transforms = new TransformsCatalog(_env);
            Model      = new ModelOperationsCatalog(_env);
            Data       = new DataOperations(_env);
        }
Ejemplo n.º 11
0
        public void MulticlassLogisticRegression()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.iris.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx    = new MulticlassClassificationContext(env);
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadText(0), features: c.LoadFloat(1, 4)));

            MulticlassLogisticRegressionPredictor pred = null;

            // With a custom loss function we no longer get calibrated predictions.
            var est = reader.MakeNewEstimator()
                      .Append(r => (label: r.label.ToKey(), r.features))
                      .Append(r => (r.label, preds: ctx.Trainers.MultiClassLogisticRegression(
                                        r.label,
                                        r.features, onFit: p => pred = p,
                                        advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            VBuffer <float>[] weights = default;
            pred.GetWeights(ref weights, out int n);
            Assert.True(n == 3 && n == weights.Length);
            foreach (var w in weights)
            {
                Assert.True(w.Length == 4);
            }

            var data = model.Read(dataSource);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.Count; ++c)
            {
                Console.WriteLine($"{schema[c].Name}, {schema[c].Type}");
            }

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds, 2);

            Assert.True(metrics.LogLoss > 0);
            Assert.True(metrics.TopKAccuracy > 0);
        }
Ejemplo n.º 12
0
        private ITransformer TrainOnIris(string irisDataPath)
        {
            // Create a new environment for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var env = new LocalEnvironment();

            // We know that this is a classification task, so we create a multiclass classification context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var classification = new MulticlassClassificationContext(env);

            // Step one: read the data as an IDataView.
            // First, we define the reader: specify the data columns and where to find them in the text file.
            var reader = TextLoader.CreateReader(env, ctx => (
                                                     // The four features of the Iris dataset.
                                                     SepalLength: ctx.LoadFloat(0),
                                                     SepalWidth: ctx.LoadFloat(1),
                                                     PetalLength: ctx.LoadFloat(2),
                                                     PetalWidth: ctx.LoadFloat(3),
                                                     // Label: kind of iris.
                                                     Label: ctx.LoadText(4)
                                                     ),
                                                 // Default separator is tab, but the dataset has comma.
                                                 separator: ',');

            // Retrieve the training data.
            var trainData = reader.Read(new MultiFileSource(irisDataPath));

            // Build the training pipeline.
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (
                                               r.Label,
                                               // Concatenate all the features together into one column 'Features'.
                                               Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth)))
                                   .Append(r => (
                                               r.Label,
                                               // Train the multi-class SDCA model to predict the label using features.
                                               // Note that the label is a text, so it needs to be converted to key using 'ToKey' estimator.
                                               Predictions: classification.Trainers.Sdca(r.Label.ToKey(), r.Features)))
                                   // Apply the inverse conversion from 'predictedLabel' key back to string value.
                                   // Note that the final output column is only one, and we didn't assign a name to it.
                                   // In this case, ML.NET auto-assigns the name 'Data' to the produced column.
                                   .Append(r => r.Predictions.predictedLabel.ToValue());

            // Train the model.
            var model = learningPipeline.Fit(trainData).AsDynamic;

            return(model);
        }
Ejemplo n.º 13
0
 PermutationFeatureImportance(
     this MulticlassClassificationContext ctx,
     IPredictionTransformer <IPredictor> model,
     IDataView data,
     string label                = DefaultColumnNames.Label,
     string features             = DefaultColumnNames.Features,
     bool useFeatureWeightFilter = false,
     int?topExamples             = null)
 {
     return(PermutationFeatureImportance <MultiClassClassifierMetrics> .GetImportanceMetricsMatrix(
                CatalogUtils.GetEnvironment(ctx),
                model,
                data,
                idv => ctx.Evaluate(idv, label),
                MulticlassClassificationDelta,
                features,
                useFeatureWeightFilter,
                topExamples));
 }
Ejemplo n.º 14
0
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only
        public void MultiClassLightGBM()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.iris.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx    = new MulticlassClassificationContext(env);
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadText(0), features: c.LoadFloat(1, 4)));

            OvaPredictor pred = null;

            // With a custom loss function we no longer get calibrated predictions.
            var est = reader.MakeNewEstimator()
                      .Append(r => (label: r.label.ToKey(), r.features))
                      .Append(r => (r.label, preds: ctx.Trainers.LightGbm(
                                        r.label,
                                        r.features, onFit: p => pred = p)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);

            var data = model.Read(dataSource);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.Count; ++c)
            {
                Console.WriteLine($"{schema[c].Name}, {schema[c].Type}");
            }

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds, 2);

            Assert.True(metrics.LogLoss > 0);
            Assert.True(metrics.TopKAccuracy > 0);
        }
Ejemplo n.º 15
0
        public static void Train()
        {
            using (var env = new LocalEnvironment(1974))
            {
                /*env.AddListener((messageSource, message) =>
                 *  Console.WriteLine($"{messageSource.ShortName}: {message.Message} ({message.Kind})"));*/
                env.AddListener(ConsoleLogger);

                var classification = new MulticlassClassificationContext(env);

                var reader = TextLoader.CreateReader(env, ctx => (
                                                         Sentence: ctx.LoadText(1),
                                                         Label: ctx.LoadText(0)
                                                         ),
                                                     separator: ',');

                var trainData = reader.Read(new MultiFileSource(TrainDataPath));

                var pipeline = reader.MakeNewEstimator()
                               .Append(r => (
                                           Label: r.Label.ToKey(),
                                           Features: r.Sentence.FeaturizeText()))
                               .Append(r => (
                                           r.Label,
                                           Predictions: classification.Trainers.Sdca(r.Label, r.Features)
                                           ))
                               .Append(r => r.Predictions.predictedLabel.ToValue());

                Console.WriteLine("=============== Training model ===============");

                var model = pipeline.Fit(trainData).AsDynamic;

                using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
                    model.SaveTo(env, fs);

                Console.WriteLine("=============== End training ===============");
                Console.WriteLine("The model is saved to {0}", ModelPath);
            }
        }
Ejemplo n.º 16
0
        public void CrossValidate()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.iris.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx    = new MulticlassClassificationContext(env);
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadText(0), features: c.LoadFloat(1, 4)));

            var est = reader.MakeNewEstimator()
                      .Append(r => (label: r.label.ToKey(), r.features))
                      .Append(r => (r.label, preds: ctx.Trainers.Sdca(
                                        r.label,
                                        r.features,
                                        maxIterations: 2)));

            var results = ctx.CrossValidate(reader.Read(dataSource), est, r => r.label)
                          .Select(x => x.metrics).ToArray();

            Assert.Equal(5, results.Length);
            Assert.True(results.All(x => x.LogLoss > 0));
        }
Ejemplo n.º 17
0
        static void Main(string[] args)
        {
            var testDataFile = TestData.PrepareTestDataAndReturnPath(23, 61, 72);

            for (int i = 0; i < 5; i++)
            {
                var env            = new LocalEnvironment();
                var classification = new MulticlassClassificationContext(env);
                var reader         = TextLoader.CreateReader(env, ctx => (Label: ctx.LoadText(0), Text: ctx.LoadText(1)), separator: ',', hasHeader: false);
                var data           = reader.Read(new MultiFileSource(testDataFile));

                var learningPipeline = reader.MakeNewEstimator()
                                       .Append(r => (Label: r.Label.ToKey(), Features: r.Text.FeaturizeText(advancedSettings: s =>
                {
                    s.KeepDiacritics = false;
                    //s.KeepNumbers = false; bv PC
                    s.KeepPunctuations = false;
                    s.TextCase = TextNormalizerTransform.CaseNormalizationMode.Lower;
                    s.TextLanguage = TextTransform.Language.Dutch;
                    s.VectorNormalizer = TextTransform.TextNormKind.LInf;
                })))
                                       .Append(r => (Label: r.Label, Predications: classification.Trainers.Sdca(r.Label, r.Features)));

                var(trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.2);
                var model   = learningPipeline.Fit(trainData);
                var metrics = classification.Evaluate(model.Transform(testData), r => r.Label, r => r.Predications);
                Console.WriteLine(metrics.AccuracyMicro);
                Console.WriteLine(metrics.AccuracyMacro);

                //var cvResults = classification.CrossValidate(data, learningPipeline, r => r.Label, numFolds: 5);
                //var microAccuracies = cvResults.Select(r => r.metrics.AccuracyMicro);
                //Console.WriteLine(microAccuracies.Average());
                //var macroAccuracies = cvResults.Select(r => r.metrics.AccuracyMacro);
                //Console.WriteLine(macroAccuracies.Average());
                Console.WriteLine("-----------");
            }
        }
Ejemplo n.º 18
0
 public static Pipeline GetPipeline(this MulticlassClassificationContext context, IDataView dataView, string label)
 {
     return(PipelineSuggesterApi.GetPipeline(TaskKind.MulticlassClassification, dataView, label));
 }
Ejemplo n.º 19
0
        private void TrainAndInspectWeights(string dataPath)
        {
            // Create a new environment for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var env = new LocalEnvironment();

            // We know that this is a classification task, so we create a multiclass classification context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var classification = new MulticlassClassificationContext(env);

            // Step one: read the data as an IDataView.
            // First, we define the reader: specify the data columns and where to find them in the text file.
            var reader = TextLoader.CreateReader(env, ctx => (
                                                     // The four features of the Iris dataset.
                                                     SepalLength: ctx.LoadFloat(0),
                                                     SepalWidth: ctx.LoadFloat(1),
                                                     PetalLength: ctx.LoadFloat(2),
                                                     PetalWidth: ctx.LoadFloat(3),
                                                     // Label: kind of iris.
                                                     Label: ctx.LoadText(4)
                                                     ),
                                                 // Default separator is tab, but the dataset has comma.
                                                 separator: ',');

            // Retrieve the training data.
            var trainData = reader.Read(new MultiFileSource(dataPath));

            // This is the predictor ('weights collection') that we will train.
            MulticlassLogisticRegressionPredictor predictor = null;
            // And these are the normalizer scales that we will learn.
            ImmutableArray <float> normScales;
            // Build the training pipeline.
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (
                                               r.Label,
                                               // Concatenate all the features together into one column 'Features'.
                                               Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth)))
                                   .Append(r => (
                                               r.Label,
                                               // Normalize (rescale) the features to be between -1 and 1.
                                               Features: r.Features.Normalize(
                                                   // When the normalizer is trained, the below delegate is going to be called.
                                                   // We use it to memorize the scales.
                                                   onFit: (scales, offsets) => normScales = scales)))
                                   .Append(r => (
                                               r.Label,
                                               // Train the multi-class SDCA model to predict the label using features.
                                               // Note that the label is a text, so it needs to be converted to key using 'ToKey' estimator.
                                               Predictions: classification.Trainers.Sdca(r.Label.ToKey(), r.Features,
                                                                                         // When the model is trained, the below delegate is going to be called.
                                                                                         // We use that to memorize the predictor object.
                                                                                         onFit: p => predictor = p)));

            // Train the model. During this call our 'onFit' delegate will be invoked,
            // and our 'predictor' will be set.
            var model = learningPipeline.Fit(trainData);

            // Now we can use 'predictor' to look at the weights.
            // 'weights' will be an array of weight vectors, one vector per class.
            // Our problem has 3 classes, so numClasses will be 3, and weights will contain
            // 3 vectors (of 4 values each).
            VBuffer <float>[] weights = null;
            predictor.GetWeights(ref weights, out int numClasses);

            // similarly we can also inspect the biases for the 3 classes
            var biases = predictor.GetBiases();

            // Inspect the normalizer scales.
            Console.WriteLine(string.Join(" ", normScales));
        }
Ejemplo n.º 20
0
        private static void Main(string[] args)
        {
            //1. Create ML.NET context/environment
            using (var env = new LocalEnvironment())
            {
                //2. Create DataReader with data schema mapped to file's columns
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "tab",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.R4, 0),
                        new TextLoader.Column("SepalLength", DataKind.R4, 1),
                        new TextLoader.Column("SepalWidth", DataKind.R4, 2),
                        new TextLoader.Column("PetalLength", DataKind.R4, 3),
                        new TextLoader.Column("PetalWidth", DataKind.R4, 4),
                    }
                });

                //Load training data
                IDataView trainingDataView = reader.Read(new MultiFileSource(TrainDataPath));

                //3.Create a flexible pipeline (composed by a chain of estimators) for creating/traing the model.
                var pipeline =
                    new ConcatEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                    .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments(),
                                                      "Features",
                                                      "Label"));


                //4. Create and train the model
                Console.WriteLine("=============== Create and Train the Model ===============");

                var model = pipeline.Fit(trainingDataView);


                Console.WriteLine("=============== End of training ===============");
                Console.WriteLine();


                //5. Evaluate the model and show accuracy stats

                //Load evaluation/test data
                IDataView testDataView = reader.Read(new MultiFileSource(TestDataPath));

                Console.WriteLine("=============== Evaluating Model's accuracy with Test data===============");
                var predictions = model.Transform(testDataView);

                var multiClassificationCtx = new MulticlassClassificationContext(env);
                var metrics = multiClassificationCtx.Evaluate(predictions, "Label");

                Console.WriteLine("Metrics:");
                Console.WriteLine($"    AccuracyMacro = {metrics.AccuracyMacro:0.####}, a value between 0 and 1, the closer to 1, the better");
                Console.WriteLine($"    AccuracyMicro = {metrics.AccuracyMicro:0.####}, a value between 0 and 1, the closer to 1, the better");
                Console.WriteLine($"    LogLoss = {metrics.LogLoss:0.####}, the closer to 0, the better");
                Console.WriteLine($"    LogLoss for class 1 = {metrics.PerClassLogLoss[0]:0.####}, the closer to 0, the better");
                Console.WriteLine($"    LogLoss for class 2 = {metrics.PerClassLogLoss[1]:0.####}, the closer to 0, the better");
                Console.WriteLine($"    LogLoss for class 3 = {metrics.PerClassLogLoss[2]:0.####}, the closer to 0, the better");
                Console.WriteLine();


                //6. Test Sentiment Prediction with one sample text
                var predictionFunct = model.MakePredictionFunction <IrisData, IrisPrediction>(env);



                var prediction = predictionFunct.Predict(TestIrisData.Iris1);
                Console.WriteLine($"Actual: setosa.     Predicted probability: setosa:      {prediction.Score[0]:0.####}");
                Console.WriteLine($"                                           versicolor:  {prediction.Score[1]:0.####}");
                Console.WriteLine($"                                           virginica:   {prediction.Score[2]:0.####}");
                Console.WriteLine();


                prediction = predictionFunct.Predict(TestIrisData.Iris2);
                Console.WriteLine($"Actual: virginica.  Predicted probability: setosa:      {prediction.Score[0]:0.####}");
                Console.WriteLine($"                                           versicolor:  {prediction.Score[1]:0.####}");
                Console.WriteLine($"                                           virginica:   {prediction.Score[2]:0.####}");
                Console.WriteLine();


                prediction = predictionFunct.Predict(TestIrisData.Iris3);
                Console.WriteLine($"Actual: versicolor. Predicted probability: setosa:      {prediction.Score[0]:0.####}");
                Console.WriteLine($"                                           versicolor:  {prediction.Score[1]:0.####}");
                Console.WriteLine($"                                           virginica:   {prediction.Score[2]:0.####}");
                Console.WriteLine();
            }
        }
Ejemplo n.º 21
0
        static void Main(string[] args)
        {
            var mlContext = new MLContext(0);

            //New Way Start

            TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("InsuranceCode", DataKind.Text, 0),
                    new TextLoader.Column("CarrierName", DataKind.Text, 1),
                    new TextLoader.Column("Address1", DataKind.Text, 2),
                    new TextLoader.Column("Address2", DataKind.Text, 3),
                    new TextLoader.Column("Zip", DataKind.Text, 4),
                    new TextLoader.Column("DefaultProfileType", DataKind.Text, 5),
                    new TextLoader.Column("CarrierId", DataKind.Text, 6),
                    new TextLoader.Column("State", DataKind.Text, 7),
                    new TextLoader.Column("Label", DataKind.R8, 8),
                }
            });

            var data = textLoader.Read(@"data.csv");

            // Step 2: Pipeline


            var transformPipeline = mlContext.Transforms.Categorical.OneHotEncoding("State")
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding("DefaultProfileType"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding("InsuranceCode"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding("Zip"))
                                    .Append(mlContext.Transforms.Text.FeaturizeText("CarrierName",
                                                                                    "CarrierName",
                                                                                    a =>
            {
                a.KeepDiacritics   = false;
                a.KeepPunctuations = false;
                a.TextCase         =
                    TextNormalizingEstimator
                    .CaseNormalizationMode
                    .Lower;
                a.OutputTokens     = true;
                a.VectorNormalizer =
                    TextFeaturizingEstimator
                    .TextNormKind.L2;
            }))
                                    .Append(mlContext.Transforms.Concatenate("Address",
                                                                             "Address1",
                                                                             "Address2"))
                                    .Append(mlContext.Transforms.Text.FeaturizeText("Address",
                                                                                    "Address",
                                                                                    a =>
            {
                a.KeepDiacritics   = false;
                a.KeepPunctuations = false;
                a.TextCase         =
                    TextNormalizingEstimator
                    .CaseNormalizationMode
                    .Lower;
                a.OutputTokens     = true;
                a.VectorNormalizer =
                    TextFeaturizingEstimator
                    .TextNormKind.L2;
            }))
                                    .Append(mlContext.Transforms.Concatenate("Features",
                                                                             "CarrierName",
                                                                             "Address",
                                                                             "Zip",
                                                                             "State",
                                                                             "DefaultProfileType",
                                                                             "InsuranceCode"));

            var learner = mlContext.Regression.Trainers.StochasticDualCoordinateAscent(
                labelColumn: DefaultColumnNames.Label, featureColumn: DefaultColumnNames.Features);



            var transformedData = transformPipeline.Fit(data).Transform(data);


            var model = learner.Fit(transformedData);

            var multiClassificationCtx = new MulticlassClassificationContext(mlContext);
            var metrics = multiClassificationCtx.Evaluate(transformedData, "Label");

            PrintClassificationMetrics("XRef", metrics);
        }
Ejemplo n.º 22
0
        public static void Main(string[] args)
        {
            var dataPath  = Path.Combine("Assets", "Data", "corefx_issues.tsv");
            var modelPath = Path.Combine("..", "..", "..", "model.zip");

            using (var environment = new ConsoleEnvironment())
            {
                // 1. [Define Trainer context]
                var classification = new MulticlassClassificationContext(environment);

                // 2. [Load Data with Initial Schema] Create Reader (lazy evaluation)
                var reader = TextLoader.CreateReader(
                    environment,
                    ctx => (
                        Id: ctx.LoadText(0),
                        Area: ctx.LoadText(1),
                        Title: ctx.LoadText(2),
                        Description: ctx.LoadText(3)),
                    separator: '\t',
                    hasHeader: true);

                var data = reader.Read(new MultiFileSource(dataPath));

                // 3. [Define Training Pipeline (estimator) and Feature Extraction]
                var learningPipeline = reader.MakeNewEstimator()
                                       .Append(row => (
                                                   Label: row.Area.ToKey(),
                                                   Title: row.Title.FeaturizeText(),
                                                   Description: row.Description.FeaturizeText()))
                                       .Append(row => (
                                                   Label: row.Label,
                                                   Features: row.Title.ConcatWith(row.Description).Normalize()))
                                       .Append(row => (
                                                   Label: row.Label,
                                                   Score: classification.Trainers.Sdca(row.Label, row.Features)))
                                       .Append(row => (
                                                   Label: row.Label,
                                                   Score: row.Score,
                                                   PredictedLabel: row.Score.predictedLabel.ToValue()));

                // 4. [Train Model]
                var(trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.2);
                var model = learningPipeline.Fit(trainData); // Training and Data Access for the first time

                // 5. [Evaluate Model]
                var scores  = model.Transform(testData);
                var metrics = classification.Evaluate(scores, row => row.Label, row => row.Score);
                Console.WriteLine("Micro-accuracy is: " + metrics.AccuracyMicro);

                // 6. [Save Model for later use]
                using (var file = new FileStream(modelPath, FileMode.Create))
                {
                    model.AsDynamic.SaveTo(environment, file);
                }

                ITransformer loadedModel;
                using (var file = new FileStream(modelPath, FileMode.Open))
                {
                    loadedModel = TransformerChain.LoadFrom(environment, file);
                }

                // 7. [Model Consumption]
                var predictor = loadedModel.MakePredictionFunction <GitHubIssue, GitHubIssuePrediction>(environment);

                var prediction = predictor.Predict(new GitHubIssue()
                {
                    Title       = "Title",
                    Description = "Description"
                });

                Console.WriteLine("Predicted label is: " + prediction.Area);
                Console.ReadLine();
            }
        }
Ejemplo n.º 23
0
        static void Main(string[] args)
        {
            var dataPath = @"Data/creditcard.csv";

            // Create a new environment for ML.NET operations.
            // It can be used for exception tracking and logging,
            // as well as the source of randomness.
            // Seed set to any number so you have a deterministic environment
            var env = new LocalEnvironment(seed: 1);

            // Step one: read the data as an IDataView.

            // Create the reader: define the data columns
            // and where to find them in the text file.
            var reader = TextLoader.CreateReader(env,
                                                 ctx => (
                                                     Features: ctx.LoadFloat(1, 29), // V1...V28 + Amount
                                                     Label: ctx.LoadText(30)),       // Class
                                                 separator: ',', hasHeader: true);

            // Now read the file
            // (remember though, readers are lazy, so the actual
            //  reading will happen when the data is accessed).
            var data = reader.Read(new MultiFileSource(dataPath));

            // 'transformedData' is a 'promise' of data. Let's actually read it.
            var someRows = data.AsDynamic
                           // Convert to an enumerable of user-defined type.
                           .AsEnumerable <TransactionData>(env, reuseRowObject: false)
                           // Take a couple values as an array.
                           .Take(4).ToArray();

            ConsoleHelpers.ConsoleWriteHeader("Show 4");
            foreach (var viewRow in someRows)
            {
                Console.WriteLine($"Label: {viewRow.Label}");
                Console.WriteLine($"Features: [0] {viewRow.Features[0]} [1] {viewRow.Features[1]} [2] {viewRow.Features[2]} ... [28] {viewRow.Features[28]}");
                //Console.WriteLine($"Features Normalized: [0] {viewRow.FeaturesNormalizedByMeanVar[0]} [1] {viewRow.FeaturesNormalizedByMeanVar[1]} [2] {viewRow.FeaturesNormalizedByMeanVar[2]} ... [28] {viewRow.FeaturesNormalizedByMeanVar[28]}");
            }
            Console.WriteLine("");

            // Step two: define the learning pipeline.

            // We know that this is a regression task, so we create a regression context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var classification = new MulticlassClassificationContext(env);


            // Start creating our processing pipeline.
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(row => (
                                               FeaturesNormalizedByMeanVar: row.Features.NormalizeByMeanVar(), // normalize values
                                               Label: row.Label.ToKey()))
                                   .Append(row => (
                                               row.Label,
                                               Predictions:  classification.Trainers.Sdca(row.Label, features: row.FeaturesNormalizedByMeanVar)));

            // Split the data 80:20 into train and test sets, train and evaluate.
            var(trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.2);

            // Step three: Train the model.
            var model = learningPipeline.Fit(trainData);
            // Compute quality metrics on the test set.
            var metrics = classification.Evaluate(model.Transform(testData), row => row.Label, row => row.Predictions);


            ConsoleHelpers.ConsoleWriteHeader("Train Metrics (80/20) :");
            Console.WriteLine($"Acuracy Macro: {metrics.AccuracyMacro}");
            Console.WriteLine($"Acuracy Micro: {metrics.AccuracyMicro}");
            Console.WriteLine($"Log Loss: {metrics.LogLoss}");
            Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction}");



            // Now run the 5-fold cross-validation experiment, using the same pipeline.
            var cvResults = classification.CrossValidate(data, learningPipeline, r => r.Label, numFolds: 5);

            // The results object is an array of 5 elements. For each of the 5 folds, we have metrics, model and scored test data.
            // Let's compute the average micro-accuracy.

            var cvmetrics = cvResults.Select(r => r.metrics);
            int count     = 1;

            foreach (var metric in cvmetrics)
            {
                ConsoleHelpers.ConsoleWriteHeader($"Train Metrics Cross Validate [{count}/5]:");
                Console.WriteLine($"Acuracy Macro: {metrics.AccuracyMacro}");
                Console.WriteLine($"Acuracy Micro: {metrics.AccuracyMicro}");
                Console.WriteLine($"Log Loss: {metrics.LogLoss}");
                Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction}");
                Console.WriteLine("");
                count++;
            }
        }