/// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            SdcaLogisticRegressionBinaryTrainer.Options options,
            Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName   = labelName;
                options.FeatureColumnName = featuresName;

                var trainer = new SdcaLogisticRegressionBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Regularization">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="numberOfIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights = null,
            float?l2Regularization = null,
            float?l1Threshold      = null,
            int?numberOfIterations = null,
            Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Regularization < 0), nameof(l2Regularization), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(numberOfIterations < 1), nameof(numberOfIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new SdcaLogisticRegressionBinaryTrainer(env, labelName, featuresName, weightsName, l2Regularization, l1Threshold, numberOfIterations);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
Exemple #3
0
        private static IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Options()
            {
                Separators = new[] { '\t' },
                HasHeader  = true,
                Columns    = new[]
                {
                    new TextLoader.Column("Label", DataKind.Boolean, 0),
                    new TextLoader.Column("SentimentText", DataKind.String, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Options()
            {
                KeepDiacritics         = false,
                KeepPunctuations       = false,
                CaseMode               = TextNormalizingEstimator.CaseMode.Lower,
                OutputTokensColumnName = "tokens",
                Norm = normalize ? TextFeaturizingEstimator.NormFunction.L2 : TextFeaturizingEstimator.NormFunction.None,
                CharFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 3, UseAllLengths = false
                },
                WordFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 2, UseAllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1);
            {
                // Pipeline
                var loader = new TextLoader(env, args).Load(new MultiFileSource(trainFilename));

                var trans = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaLogisticRegressionBinaryTrainer(env, new SdcaLogisticRegressionBinaryTrainer.Options
                {
                    LabelColumnName   = "Label",
                    FeatureColumnName = "Features"
                });

                var cached    = new Microsoft.ML.Data.CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }
Exemple #4
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(DataPath, hasHeader: true);

            DataOperationsCatalog.TrainTestData trainTestSplit =
                mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView trainingData = trainTestSplit.TrainSet;
            IDataView testData     = trainTestSplit.TestSet;

            // STEP 2: Common data process configuration with pipeline data transformations
            TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features",
                                                                                                   inputColumnName: nameof(SentimentIssue.Text));

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, dataView, dataProcessPipeline, 2);
            //Peak the transformed features column
            //ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", dataView, dataProcessPipeline, 1);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            SdcaLogisticRegressionBinaryTrainer trainer =
                mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label",
                                                                               featureColumnName: "Features");
            EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(trainer);

            //Measure training time
            Stopwatch watch = Stopwatch.StartNew();

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            ITransformer trainedModel = trainingPipeline.Fit(trainingData);

            //Stop measuring time
            watch.Stop();
            long elapsedMs = watch.ElapsedMilliseconds;

            Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****");

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            IDataView predictions = trainedModel.Transform(testData);
            CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label",
                                                                                                    scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(trainedModel);
        }
        void IAiTest.Train()
        {
            Console.WriteLine("=============== Binary Classification - TextSentiment Prediction ===============");
            IDataView dataView            = _context.Data.LoadFromTextFile <SentimentData>($"{RootFolder}/{TrainDataFile}", hasHeader: true);
            var       dataProcessPipeline = _context.Transforms.Text.FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.Text));

            _trainer = _context.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");
            var       trainingPipeline = dataProcessPipeline.Append(_trainer);
            Stopwatch stop             = new Stopwatch();

            Console.WriteLine("=============== Create and Train the Model ===============");
            stop.Start();
            _model = trainingPipeline.Fit(dataView);
            stop.Stop();
            Console.WriteLine($" Total {stop.ElapsedMilliseconds} ms");
            Console.WriteLine("=============== End of training ===============");
            Console.WriteLine();
        }
Exemple #6
0
        public ML <T, T2> Run()
        {
            IDataView trainingDataView = mlContext.Data.LoadFromEnumerable(this.data);

            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data
                                                            .TrainTestSplit(trainingDataView, testFraction: 0.2);

            switch (this.type)
            {
            case MLType.TextFeaturizingEstimator:
            {
                TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text
                                                               .FeaturizeText(outputColumnName: "Features", inputColumnName: this.inputName);

                SdcaLogisticRegressionBinaryTrainer sdcaRegressionTrainer = mlContext.BinaryClassification.Trainers
                                                                            .SdcaLogisticRegression(labelColumnName: this.labelName, featureColumnName: "Features");

                EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(sdcaRegressionTrainer);

                trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);
                mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName);
                IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

                this.modelMetrics = mlContext.BinaryClassification
                                    .Evaluate(data: testSetTransform,
                                              labelColumnName: this.labelName,
                                              scoreColumnName: this.scoreName);
                break;
            }

            case MLType.LightGbm:
            {
                var fields = this.thisType
                             .GetType()
                             .GetProperties(BindingFlags.Public | BindingFlags.Instance)
                             .Select(p => p.Name);
                //mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")

                var featurePipeline = this.isFeaturesIncluded
                        ? null
                        : mlContext.Transforms.Concatenate("Features", fields.ToArray());

                var trainer = mlContext.Regression.Trainers
                              .LightGbm(new LightGbmRegressionTrainer.Options()
                    {
                        NumberOfIterations                = 100,
                        LearningRate                      = 0.3227682f,
                        NumberOfLeaves                    = 55,
                        MinimumExampleCountPerLeaf        = 10,
                        UseCategoricalSplit               = false,
                        HandleMissingValue                = true,
                        UseZeroAsMissingValue             = false,
                        MinimumExampleCountPerGroup       = 50,
                        MaximumCategoricalSplitPointCount = 32,
                        CategoricalSmoothing              = 20,
                        L2CategoricalRegularization       = 5,
                        Booster = new GradientBooster.Options()
                        {
                            L2Regularization = 0, L1Regularization = 0.5
                        },
                        LabelColumnName   = this.labelName,
                        FeatureColumnName = "Features"
                    });

                var pipeline2 = featurePipeline == null ? null : featurePipeline.Append(trainer);

                if (pipeline2 == null)
                {
                    trainedModel = trainer.Fit(dataSplit.TrainSet);
                    mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName);

                    IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

                    var crossValidationResults = mlContext.Regression
                                                 .CrossValidate(trainingDataView, trainer, numberOfFolds: 5, labelColumnName: this.labelName);
                }
                else
                {
                    trainedModel = pipeline2.Fit(dataSplit.TrainSet);
                    mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName);

                    IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

                    var crossValidationResults = mlContext.Regression
                                                 .CrossValidate(trainingDataView,
                                                                pipeline2,
                                                                numberOfFolds: 5,
                                                                labelColumnName: this.labelName);
                }

                //this.modelMetrics = mlContext.Regression
                //.Evaluate(data: testSetTransform,
                //          labelColumnName: this.labelName,
                //          scoreColumnName: this.scoreName);
                break;
            }
            }

            //var msg = $"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" +
            //    $"Area Under Precision Recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}" +
            //    $"{Environment.NewLine}" +
            //    $"Accuracy: {modelMetrics.Accuracy:P2}{Environment.NewLine}" +
            //    $"F1Score: {modelMetrics.F1Score:P2}{Environment.NewLine}" +
            //    $"Positive Recall: {modelMetrics.PositiveRecall:#.##}{Environment.NewLine}" +
            //    $"Negative Recall: {modelMetrics.NegativeRecall:#.##}{Environment.NewLine}";

            this.isTaught = true;
            return(this);
        }