Beispiel #1
0
        private ITransformer BuildAndTrainUsingParams(ColumnEnum column)
        {
            List <string>            features        = new List <string>();
            TextFeaturizingEstimator textTransformer = null;
            EstimatorChain <ColumnConcatenatingTransformer> estimatorColumn = null;
            EstimatorChain <ITransformer> estimatorTransformer = null;

            if (_includeDay)
            {
                textTransformer = _mlContext.Transforms.Text.FeaturizeText("DayString", "Day");
                features.Add("DayString");
            }
            if (_includeMonth)
            {
                if (textTransformer != null)
                {
                    estimatorTransformer = textTransformer.Append(_mlContext.Transforms.Text.FeaturizeText("MonthString", "Month"));
                }
                else
                {
                    textTransformer = _mlContext.Transforms.Text.FeaturizeText("MonthString", "Month");
                }
                features.Add("MonthString");
            }
            if (_includeWeek)
            {
                features.Add("Week");
            }

            if (textTransformer == null)
            {
                var res = _mlContext.Transforms.Concatenate("Features", features.ToArray())
                          .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                          .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

                return(res.Fit(_trainData));
            }
            if (estimatorTransformer != null)
            {
                var res2 = estimatorTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                           .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                           .Append(_mlContext.Regression.Trainers.FastTreeTweedie());
                return(res2.Fit(_trainData));
            }
            var res3 = textTransformer.Append(_mlContext.Transforms.Concatenate("Features", features.ToArray()))
                       .Append(_mlContext.Transforms.CopyColumns("Label", System.Enum.GetName(typeof(ColumnEnum), column)))
                       .Append(_mlContext.Regression.Trainers.FastTreeTweedie());

            return(res3.Fit(_trainData));
        }
Beispiel #2
0
        public ML <T, T2> Run()
        {
            IDataView trainingDataView = mlContext.Data.LoadFromEnumerable(this.data);

            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data
                                                            .TrainTestSplit(trainingDataView, testFraction: 0.2);

            switch (this.type)
            {
            case MLType.TextFeaturizingEstimator:
            {
                TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text
                                                               .FeaturizeText(outputColumnName: "Features", inputColumnName: this.inputName);

                SdcaLogisticRegressionBinaryTrainer sdcaRegressionTrainer = mlContext.BinaryClassification.Trainers
                                                                            .SdcaLogisticRegression(labelColumnName: this.labelName, featureColumnName: "Features");

                EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(sdcaRegressionTrainer);

                trainedModel = trainingPipeline.Fit(dataSplit.TrainSet);
                mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName);
                IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

                this.modelMetrics = mlContext.BinaryClassification
                                    .Evaluate(data: testSetTransform,
                                              labelColumnName: this.labelName,
                                              scoreColumnName: this.scoreName);
                break;
            }

            case MLType.LightGbm:
            {
                var fields = this.thisType
                             .GetType()
                             .GetProperties(BindingFlags.Public | BindingFlags.Instance)
                             .Select(p => p.Name);
                //mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")

                var featurePipeline = this.isFeaturesIncluded
                        ? null
                        : mlContext.Transforms.Concatenate("Features", fields.ToArray());

                var trainer = mlContext.Regression.Trainers
                              .LightGbm(new LightGbmRegressionTrainer.Options()
                    {
                        NumberOfIterations                = 100,
                        LearningRate                      = 0.3227682f,
                        NumberOfLeaves                    = 55,
                        MinimumExampleCountPerLeaf        = 10,
                        UseCategoricalSplit               = false,
                        HandleMissingValue                = true,
                        UseZeroAsMissingValue             = false,
                        MinimumExampleCountPerGroup       = 50,
                        MaximumCategoricalSplitPointCount = 32,
                        CategoricalSmoothing              = 20,
                        L2CategoricalRegularization       = 5,
                        Booster = new GradientBooster.Options()
                        {
                            L2Regularization = 0, L1Regularization = 0.5
                        },
                        LabelColumnName   = this.labelName,
                        FeatureColumnName = "Features"
                    });

                var pipeline2 = featurePipeline == null ? null : featurePipeline.Append(trainer);

                if (pipeline2 == null)
                {
                    trainedModel = trainer.Fit(dataSplit.TrainSet);
                    mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName);

                    IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

                    var crossValidationResults = mlContext.Regression
                                                 .CrossValidate(trainingDataView, trainer, numberOfFolds: 5, labelColumnName: this.labelName);
                }
                else
                {
                    trainedModel = pipeline2.Fit(dataSplit.TrainSet);
                    mlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, this.modelName);

                    IDataView testSetTransform = trainedModel.Transform(dataSplit.TestSet);

                    var crossValidationResults = mlContext.Regression
                                                 .CrossValidate(trainingDataView,
                                                                pipeline2,
                                                                numberOfFolds: 5,
                                                                labelColumnName: this.labelName);
                }

                //this.modelMetrics = mlContext.Regression
                //.Evaluate(data: testSetTransform,
                //          labelColumnName: this.labelName,
                //          scoreColumnName: this.scoreName);
                break;
            }
            }

            //var msg = $"Area Under Curve: {modelMetrics.AreaUnderRocCurve:P2}{Environment.NewLine}" +
            //    $"Area Under Precision Recall Curve: {modelMetrics.AreaUnderPrecisionRecallCurve:P2}" +
            //    $"{Environment.NewLine}" +
            //    $"Accuracy: {modelMetrics.Accuracy:P2}{Environment.NewLine}" +
            //    $"F1Score: {modelMetrics.F1Score:P2}{Environment.NewLine}" +
            //    $"Positive Recall: {modelMetrics.PositiveRecall:#.##}{Environment.NewLine}" +
            //    $"Negative Recall: {modelMetrics.NegativeRecall:#.##}{Environment.NewLine}";

            this.isTaught = true;
            return(this);
        }
Beispiel #3
0
        private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext)
        {
            // STEP 1: Common data loading configuration
            IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentIssue>(DataPath, hasHeader: true);

            DataOperationsCatalog.TrainTestData trainTestSplit =
                mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
            IDataView trainingData = trainTestSplit.TrainSet;
            IDataView testData     = trainTestSplit.TestSet;

            // STEP 2: Common data process configuration with pipeline data transformations
            TextFeaturizingEstimator dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: "Features",
                                                                                                   inputColumnName: nameof(SentimentIssue.Text));

            // (OPTIONAL) Peek data (such as 2 records) in training DataView after applying the ProcessPipeline's transformations into "Features"
            ConsoleHelper.PeekDataViewInConsole(mlContext, dataView, dataProcessPipeline, 2);
            //Peak the transformed features column
            //ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", dataView, dataProcessPipeline, 1);

            // STEP 3: Set the training algorithm, then create and config the modelBuilder
            SdcaLogisticRegressionBinaryTrainer trainer =
                mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label",
                                                                               featureColumnName: "Features");
            EstimatorChain <BinaryPredictionTransformer <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > > trainingPipeline = dataProcessPipeline.Append(trainer);

            //Measure training time
            Stopwatch watch = Stopwatch.StartNew();

            // STEP 4: Train the model fitting to the DataSet
            Console.WriteLine("=============== Training the model ===============");
            ITransformer trainedModel = trainingPipeline.Fit(trainingData);

            //Stop measuring time
            watch.Stop();
            long elapsedMs = watch.ElapsedMilliseconds;

            Console.WriteLine($"***** Training time: {elapsedMs / 1000} seconds *****");

            // STEP 5: Evaluate the model and show accuracy stats
            Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
            IDataView predictions = trainedModel.Transform(testData);
            CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label",
                                                                                                    scoreColumnName: "Score");

            ConsoleHelper.PrintBinaryClassificationMetrics(trainer.ToString(), metrics);

            // STEP 6: Save/persist the trained model to a .ZIP file
            mlContext.Model.Save(trainedModel, trainingData.Schema, ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(trainedModel);
        }