示例#1
0
        public void FastTreeBinaryClassification()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataSource = new MultiFileSource(dataPath);
            var ctx        = new BinaryClassificationContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9)));

            IPredictorWithFeatureWeights <float> pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, preds: ctx.Trainers.FastTree(r.label, r.features,
                                                                          numTrees: 10,
                                                                          numLeaves: 5,
                                                                          onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);

            // 9 input features, so we ought to have 9 weights.
            VBuffer <float> weights = new VBuffer <float>();

            pred.GetFeatureWeights(ref weights);
            Assert.Equal(9, weights.Length);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds);

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.Accuracy, 0, 1);
            Assert.InRange(metrics.Auc, 0, 1);
            Assert.InRange(metrics.Auprc, 0, 1);
        }
示例#2
0
        public void AveragePerceptronNoCalibration()
        {
            var env        = new ConsoleEnvironment(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataSource = new MultiFileSource(dataPath);
            var ctx        = new BinaryClassificationContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9)));

            LinearBinaryPredictor pred = null;

            var loss = new HingeLoss(new HingeLoss.Arguments()
            {
                Margin = 1
            });

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, preds: ctx.Trainers.AveragedPerceptron(r.label, r.features, lossFunction: loss,
                                                                                    numIterations: 2, onFit: p => pred = p)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 9 input features, so we ought to have 9 weights.
            Assert.Equal(9, pred.Weights2.Count);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds);

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.Accuracy, 0, 1);
            Assert.InRange(metrics.Auc, 0, 1);
            Assert.InRange(metrics.Auprc, 0, 1);
        }
        static void Main(string[] args)
        {
            //1. Create ML.NET context/environment
            using (var env = new LocalEnvironment())
            {
                //2. Create DataReader with data schema mapped to file's columns
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "tab",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.Bool, 0),
                        new TextLoader.Column("Text", DataKind.Text, 1)
                    }
                });

                //Load training data
                IDataView trainingDataView = reader.Read(new MultiFileSource(TrainDataPath));


                //3.Create a flexible pipeline (composed by a chain of estimators) for creating/traing the model.

                var pipeline = new TextTransform(env, "Text", "Features")  //Convert the text column to numeric vectors (Features column)
                               .Append(new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(),
                                                                       "Features",
                                                                       "Label"));
                //.Append(new LinearClassificationTrainer(env, "Features", "Label")); //(Simpler in ML.NET v0.7)



                //4. Create and train the model
                Console.WriteLine("=============== Create and Train the Model ===============");

                var model = pipeline.Fit(trainingDataView);

                Console.WriteLine("=============== End of training ===============");
                Console.WriteLine();


                //5. Evaluate the model and show accuracy stats

                //Load evaluation/test data
                IDataView testDataView = reader.Read(new MultiFileSource(TestDataPath));

                Console.WriteLine("=============== Evaluating Model's accuracy with Test data===============");
                var predictions = model.Transform(testDataView);

                var binClassificationCtx = new BinaryClassificationContext(env);
                var metrics = binClassificationCtx.Evaluate(predictions, "Label");

                Console.WriteLine();
                Console.WriteLine("Model quality metrics evaluation");
                Console.WriteLine("------------------------------------------");
                Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
                Console.WriteLine($"Auc: {metrics.Auc:P2}");
                Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
                Console.WriteLine("=============== End of Model's evaluation ===============");
                Console.WriteLine();


                //6. Test Sentiment Prediction with one sample text
                var predictionFunct = model.MakePredictionFunction <SentimentIssue, SentimentPrediction>(env);

                SentimentIssue sampleStatement = new SentimentIssue
                {
                    Text = "This is a very rude movie"
                };

                var resultprediction = predictionFunct.Predict(sampleStatement);

                Console.WriteLine();
                Console.WriteLine("=============== Test of model with a sample ===============");

                Console.WriteLine($"Text: {sampleStatement.Text} | Prediction: {(Convert.ToBoolean(resultprediction.Prediction) ? "Toxic" : "Nice")} sentiment | Probability: {resultprediction.Probability} ");

                // Save model to .ZIP file
                SaveModelAsFile(env, model);

                // Predict again but now testing the model loading from the .ZIP file
                PredictWithModelLoadedFromFile(sampleStatement);

                Console.WriteLine("=============== End of process, hit any key to finish ===============");
                Console.ReadKey();
            }
        }
示例#4
0
        public static PredictionFunction <TrafficObservation, AlertPrediction> ModelAndTrain()
        {
            Console.WriteLine("Starting Machine Learning Binary Classification");
            MLContext mlContext = new MLContext(seed: 1);

            IDataView data      = null;
            IDataView trainData = null;
            IDataView testData  = null;

            // Step one: read the data as an IDataView.
            // Create the reader: define the data columns
            // and where to find them in the text file.
            var reader = new TextLoader(mlContext, new TextLoader.Arguments
            {
                Column = new[] {
                    // A boolean column depicting the 'label'.
                    new TextLoader.Column("NextHourAlert", DataKind.BL, 20),
                    // 18 Features
                    new TextLoader.Column("AvgTotalBytes", DataKind.R4, 2),
                    new TextLoader.Column("AvgTotalPackets", DataKind.R4, 3),
                    new TextLoader.Column("AvgAveragebps", DataKind.R4, 4),
                    new TextLoader.Column("AvgOutPercentUtil", DataKind.R4, 5),
                    new TextLoader.Column("AvgInPercentUtil", DataKind.R4, 6),
                    new TextLoader.Column("AvgPercentUtil", DataKind.R4, 7),
                    new TextLoader.Column("MinTotalBytes", DataKind.R4, 8),
                    new TextLoader.Column("MinTotalPackets", DataKind.R4, 9),
                    new TextLoader.Column("MinAveragebps", DataKind.R4, 10),
                    new TextLoader.Column("MinOutPercentUtil", DataKind.R4, 11),
                    new TextLoader.Column("MinInPercentUtil", DataKind.R4, 12),
                    new TextLoader.Column("MinPercentUtil", DataKind.R4, 13),
                    new TextLoader.Column("MaxTotalBytes", DataKind.R4, 14),
                    new TextLoader.Column("MaxTotalPackets", DataKind.R4, 15),
                    new TextLoader.Column("MaxAveragebps", DataKind.R4, 16),
                    new TextLoader.Column("MaxOutPercentUtil", DataKind.R4, 17),
                    new TextLoader.Column("MaxInPercentUtil", DataKind.R4, 18),
                    new TextLoader.Column("MaxPercentUtil", DataKind.R4, 19)
                },
                // First line of the file is a header, not a data row.
                HasHeader = true,
                Separator = ","
            });

            // We know that this is a Binary Classification task,
            // so we create a Binary Classification context:
            // it will give us the algorithms we need,
            // as well as the evaluation procedure.
            var classification = new BinaryClassificationContext(mlContext);

            data = reader.Read(new MultiFileSource(_datapath));

            (trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.2);

            //Create a flexible pipeline (composed by a chain of estimators) for building/traing the model.

            var pipeline = mlContext.Transforms.Concatenate("Features", new[] { "AvgTotalBytes", "AvgTotalPackets", "AvgAveragebps", "AvgOutPercentUtil", "AvgInPercentUtil", "AvgPercentUtil",
                                                                                "MinTotalBytes", "MinTotalPackets", "MinAveragebps", "MinOutPercentUtil", "MinInPercentUtil", "MinPercentUtil",
                                                                                "MaxTotalBytes", "MaxTotalPackets", "MaxAveragebps", "MaxOutPercentUtil", "MaxInPercentUtil", "MaxPercentUtil" })
                           .Append(mlContext.Transforms.Normalize(inputName: "Features", outputName: "FeaturesNormalizedByMeanVar", mode: NormalizerMode.MeanVariance))
                           .Append(mlContext.BinaryClassification.Trainers.FastTree(label: "NextHourAlert",
                                                                                    features: "Features",
                                                                                    numLeaves: 20,
                                                                                    numTrees: 100,
                                                                                    minDatapointsInLeafs: 10,
                                                                                    learningRate: 0.2));
            var model = pipeline.Fit(trainData);

            var metrics = classification.Evaluate(model.Transform(testData), "NextHourAlert");

            Console.WriteLine("Acuracy: " + metrics.Accuracy);
            Console.WriteLine($"Area under ROC curve: {metrics.Auc}");
            Console.WriteLine($"Area under the precision/recall curve: {metrics.Auprc}");
            Console.WriteLine($"Entropy: {metrics.Entropy}");
            Console.WriteLine($"F1 Score: {metrics.F1Score}");
            Console.WriteLine($"Log loss: {metrics.LogLoss}");
            Console.WriteLine($"Log loss reduction: {metrics.LogLossReduction}");
            Console.WriteLine($"Negative precision: {metrics.NegativePrecision}");
            Console.WriteLine($"Positive precision: {metrics.PositivePrecision}");
            Console.WriteLine($"Positive recall: {metrics.PositiveRecall}");

            var predictor = model.MakePredictionFunction <TrafficObservation, AlertPrediction>(mlContext);

            return(predictor);
        }
示例#5
0
        static void Main(string[] args)
        {
            //Create MLContext to be shared across the model creation workflow objects
            //Set a random seed for repeatable/deterministic results across multiple trainings.
            var mlContext = new MLContext();

            // STEP 1: Common data loading configuration
            // PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
            var textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("PassengerId", DataKind.Num, 0),
                    new TextLoader.Column("Label", DataKind.Bool, 1),
                    new TextLoader.Column("Pclass", DataKind.Num, 2),
                    new TextLoader.Column("Name", DataKind.Text, 3),
                    new TextLoader.Column("Sex", DataKind.Text, 4),
                    new TextLoader.Column("Age", DataKind.Num, 5),
                    new TextLoader.Column("SibSp", DataKind.Num, 6),
                    new TextLoader.Column("Parch", DataKind.Num, 7),
                    new TextLoader.Column("Ticket", DataKind.Text, 8),
                    new TextLoader.Column("Fare", DataKind.Text, 9),
                    new TextLoader.Column("Cabin", DataKind.Text, 10),
                    new TextLoader.Column("Embarked", DataKind.Text, 11)
                }
            });

            // Load training data and add it to the pipeline
            string trainingDataPath = @".\data\titanic.training.csv";
            string dataPath         = @".\data\titanic.csv";
            var    trainingData     = textLoader.Read(trainingDataPath);
            var    testData         = textLoader.Read(dataPath);

            var features = trainingData.Schema.GetColumns()
                           .Where(c => c.column.Name != "Label")
                           .Select(c => c.column.Name)
                           .ToArray();

            // Build several alternative featurization pipelines.
            var pipeline =
                mlContext.Transforms.Categorical.OneHotEncoding("Sex")
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("Name"))
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("Ticket"))
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("Fare"))
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("Cabin"))
                .Append(mlContext.Transforms.Categorical.OneHotEncoding("Embarked"))
                .Append(mlContext.Transforms.Concatenate("Features", features))
                .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "Label", featureColumn: "Features"));

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create a PredictionFunction from our model
            var predictor  = model.MakePredictionFunction <TitanicData, TitanicPrediction>(mlContext);
            var prediction = predictor.Predict(new TitanicData()
            {
                Pclass   = 3f,
                Name     = "Braund, Mr. Owen Harris",
                Sex      = "male",
                Age      = 31,
                SibSp    = 0,
                Parch    = 0,
                Ticket   = "335097",
                Fare     = "7.75",
                Cabin    = "",
                Embarked = "Q"
            });

            Console.WriteLine($"Did this passenger survive? {(prediction.Survived ? "Yes" : "No")}");

            var context = new BinaryClassificationContext(mlContext);
            var metrics = context.Evaluate(model.Transform(testData), "Label");

            Console.WriteLine("Acuracy: " + metrics.Accuracy);
            Console.WriteLine($"Auc: {metrics.Auc:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
        }
示例#6
0
        public static void Main(string[] args)
        {
            var trainDataPath = Path.Combine("Assets", "Data", "train.tsv");
            var testDataPath  = Path.Combine("Assets", "Data", "test.tsv");

            using (var environment = new ConsoleEnvironment())
            {
                // 1. [Define Trainer context]
                var binaryClassificationContext = new BinaryClassificationContext(environment);

                // 2. [Load Data with Initial Schema] Create Reader (lazy evaluation)
                var reader = TextLoader.CreateReader(
                    environment,
                    context => (
                        Comment: context.LoadText(1),
                        Attack: context.LoadBool(2)));

                var trainData = reader.Read(new MultiFileSource(trainDataPath));
                var testData  = reader.Read(new MultiFileSource(testDataPath));

                // 3. [Define Training Pipeline (estimator) and Feature Extraction]
                var estimator = reader.MakeNewEstimator()
                                .Append(
                    row => (
                        Label: row.Attack,
                        Text: row.Comment.FeaturizeText()))
                                .Append(
                    row => (
                        Label: row.Label,
                        Attack: binaryClassificationContext.Trainers.FastTree(row.Label, row.Text, numLeaves: 50, numTrees: 50, minDatapointsInLeafs: 20)))
                                .Append(
                    row => (
                        Label: row.Label,
                        Prediction: row.Attack,
                        PredictedLabel: row.Attack.predictedLabel));

                // 4. [Train Model]
                var model = estimator.Fit(trainData);

                // 5. [Evaluate Model]
                var predictions = model.Transform(testData);
                var metrics     = binaryClassificationContext.Evaluate(predictions, row => row.Label, row => row.Prediction);

                var predictionFunction = model.AsDynamic.MakePredictionFunction <SentimentData, SentimentPrediction>(environment);

                var prediction = predictionFunction.Predict(new SentimentData()
                {
                    Comment = "Insert comment"
                });

                Console.WriteLine("Predicted sentiment is: " + prediction.PredictedLabel);
                Console.WriteLine();
                Console.WriteLine("PredictionModel quality metrics evaluation");
                Console.WriteLine("------------------------------------------");
                Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
                Console.WriteLine($"AUC: {metrics.Auc:P2}");
                Console.WriteLine($"Positive Precision: {metrics.PositivePrecision:P2}");
                Console.WriteLine($"Negative Precision: {metrics.NegativePrecision:P2}");
                Console.WriteLine($"Positive Recall: {metrics.PositiveRecall:P2}");
                Console.WriteLine($"Negative Recall: {metrics.NegativeRecall:P2}");
                Console.WriteLine($"F1Score: {metrics.F1Score:P2}");

                Console.ReadLine();
            }
        }
        public static void Run(
            [QueueTrigger("training-jobs", Connection = "AzureWebJobsStorage")] TrainingJobTriggerData myQueueItem,
            [Blob("models/customer-churn.zip", FileAccess.Write, Connection = "AzureWebJobsStorage")] Stream modelStream,
            [Blob("data/train.csv", FileAccess.Read, Connection = "AzureWebJobsStorage")] Stream trainingData,
            [Blob("data/validate.csv", FileAccess.Read, Connection = "AzureWebJobsStorage")] Stream validationData,
            ILogger log)
        {
            if (typeof(Microsoft.ML.Runtime.Data.LoadTransform) == null ||
                typeof(Microsoft.ML.Runtime.Learners.LinearClassificationTrainer) == null ||
                typeof(Microsoft.ML.Runtime.Internal.CpuMath.SseUtils) == null ||
                typeof(Microsoft.ML.Runtime.FastTree.FastTree) == null)
            {
                log.LogError("Error loading ML.NET");
            }

            log.LogInformation("Training customer churn model.");

            var env = new LocalEnvironment();
            var classificationContext = new BinaryClassificationContext(env);

            var loader = TextLoader.CreateReader(env, ctx => (
                                                     SeniorCitizen: ctx.LoadText(FieldNames.IndexOf("SeniorCitizen")),
                                                     Partner: ctx.LoadText(FieldNames.IndexOf("Partner")),
                                                     Dependents: ctx.LoadText(FieldNames.IndexOf("Dependents")),
                                                     InternetService: ctx.LoadText(FieldNames.IndexOf("InternetService")),
                                                     OnlineSecurity: ctx.LoadText(FieldNames.IndexOf("OnlineSecurity")),
                                                     OnlineBackup: ctx.LoadText(FieldNames.IndexOf("OnlineBackup")),
                                                     DeviceProtection: ctx.LoadText(FieldNames.IndexOf("DeviceProtection")),
                                                     TechSupport: ctx.LoadText(FieldNames.IndexOf("TechSupport")),
                                                     Contract: ctx.LoadText(FieldNames.IndexOf("Contract")),
                                                     PaperlessBilling: ctx.LoadText(FieldNames.IndexOf("PaperlessBilling")),
                                                     PaymentMethod: ctx.LoadText(FieldNames.IndexOf("PaymentMethod")),
                                                     Tenure: ctx.LoadFloat(FieldNames.IndexOf("Tenure")),
                                                     MonthlyCharges: ctx.LoadFloat(FieldNames.IndexOf("MonthlyCharges")),
                                                     Churn: ctx.LoadBool(FieldNames.IndexOf("Churn"))
                                                     ), new StreamDataSource(trainingData), hasHeader: true, separator: ',');

            var estimator = loader.MakeNewEstimator()
                            .Append(row => (
                                        SeniorCitizen: row.SeniorCitizen.OneHotEncoding(),
                                        Partner: row.Partner.OneHotEncoding(),
                                        Dependents: row.Dependents.OneHotEncoding(),
                                        InternetService: row.InternetService.OneHotEncoding(),
                                        OnlineSecurity: row.OnlineSecurity.OneHotEncoding(),
                                        OnlineBackup: row.OnlineBackup.OneHotEncoding(),
                                        DeviceProtection: row.DeviceProtection.OneHotEncoding(),
                                        TechSupport: row.TechSupport.OneHotEncoding(),
                                        Contract: row.Contract.OneHotEncoding(),
                                        PaperlessBilling: row.PaperlessBilling.OneHotEncoding(),
                                        PaymentMethod: row.PaymentMethod.OneHotEncoding(),
                                        Tenure: row.Tenure,
                                        MonthlyCharges: row.MonthlyCharges,
                                        Churn: row.Churn
                                        ))
                            .Append(row => (
                                        Churn: row.Churn,
                                        Features: row.SeniorCitizen.ConcatWith(
                                            row.Partner, row.Dependents, row.InternetService,
                                            row.OnlineSecurity, row.OnlineBackup, row.DeviceProtection,
                                            row.TechSupport, row.Contract, row.PaperlessBilling,
                                            row.Tenure, row.MonthlyCharges
                                            )))
                            .Append(row =>
            {
                var prediction = classificationContext.Trainers.Sdca(row.Churn, row.Features);
                return(
                    PredictedLabel: prediction.predictedLabel,
                    Score: prediction.score,
                    Label: row.Churn,
                    Probability: prediction.probability);
            });

            var trainingSet   = loader.Read(new StreamDataSource(trainingData));
            var validationSet = loader.Read(new StreamDataSource(validationData));

            var trainingSampleCount = trainingSet
                                      .AsDynamic
                                      .AsEnumerable <CustomerChurnPredictionData>(env, false)
                                      .Count();

            var validationSampleCount = validationSet
                                        .AsDynamic
                                        .AsEnumerable <CustomerChurnPredictionData>(env, false)
                                        .Count();

            log.LogInformation("Training on {Rows} samples", trainingSampleCount);

            var model = estimator.Fit(trainingSet);

            log.LogInformation("Validating on {Rows} samples", trainingSampleCount);

            var predictions = model.Transform(validationSet).AsDynamic;
            var score       = classificationContext.Evaluate(predictions, "Label");

            log.LogInformation("Model accuracy: {Accuracy}, F1 score: {F1Score}", score.Accuracy, score.F1Score);

            model.AsDynamic.SaveTo(env, modelStream);
        }