LightGbm(
            this SweepableBinaryClassificationTrainers trainer,
            string labelColumnName   = "Label",
            string featureColumnName = "Features",
            SweepableOption <LightGbmBinaryTrainer.Options> optionBuilder = null,
            LightGbmBinaryTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = LightGbmBinaryTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);
            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;

                return context.BinaryClassification.Trainers.LightGbm(option);
            },
                       optionBuilder,
                       new string[] { labelColumnName, featureColumnName },
                       new string[] { PredictedLabel },
                       nameof(LightGbmBinaryTrainer)));
        }
Example #2
0
        public static IEstimator <ITransformer> BuildTrainingPipeline(MLContext mlContext)
        {
            // Data process configuration with pipeline data transformations
            var dataProcessPipeline = mlContext.Transforms.Concatenate("Features", new[] { "Time", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25", "V26", "V27", "V28", "Amount" });

            var boosterOptions = new GradientBooster.Options()
            {
                L2Regularization = 1,
                L1Regularization = 0
            };

            var trainerOptions = new LightGbmBinaryTrainer.Options()
            {
                NumberOfIterations                = 150,
                LearningRate                      = 0.2001066f,
                NumberOfLeaves                    = 7,
                MinimumExampleCountPerLeaf        = 10,
                UseCategoricalSplit               = true,
                HandleMissingValue                = false,
                MinimumExampleCountPerGroup       = 100,
                MaximumCategoricalSplitPointCount = 16,
                CategoricalSmoothing              = 10,
                L2CategoricalRegularization       = 5,
                Booster           = boosterOptions,
                LabelColumnName   = "Class",
                FeatureColumnName = "Features"
            };

            // Set the training algorithm
            var trainer          = mlContext.BinaryClassification.Trainers.LightGbm(trainerOptions);
            var trainingPipeline = dataProcessPipeline.Append(trainer);

            return(trainingPipeline);
        }
Example #3
0
        public void Train()
        {
            var trainingData = GetData(_dataPath);
            var testData     = GetData(_validatePath);

            double?bestScore = null;

            while (true)
            {
                var options = new LightGbmBinaryTrainer.Options
                {
                    ExampleWeightColumnName = nameof(Appointment.Weight),
                    EvaluationMetric        = LightGbmBinaryTrainer.Options.EvaluateMetricType.Logloss,
                    Sigmoid = 1,
                    CategoricalSmoothing              = 10,
                    L2CategoricalRegularization       = 10,
                    MaximumCategoricalSplitPointCount = 8,
                    MinimumExampleCountPerLeaf        = 1,
                    WeightOfPositiveExamples          = 2,
                    MaximumBinCountPerFeature         = 200,
                    Seed = 459933621,
                    HandleMissingValue          = true,
                    UseZeroAsMissingValue       = false,
                    MinimumExampleCountPerGroup = 100,
                    NumberOfIterations          = 200,
                    LearningRate   = 0.01,
                    NumberOfLeaves = 110,
                    Booster        = new GradientBooster.Options
                    {
                        L1Regularization   = 0,
                        L2Regularization   = 0,
                        MaximumTreeDepth   = 0,
                        SubsampleFrequency = 0,
                        SubsampleFraction  = 1,
                        FeatureFraction    = 1,
                        MinimumChildWeight = 0.1,
                        MinimumSplitGain   = 0,
                    }
                };

                var trainer = _context.BinaryClassification.Trainers.LightGbm(options);

                var pipeline = CreatePipeline(trainer);

                var model = pipeline.Fit(trainingData);

                var f1 = Evaluate("Test", model, testData);

                if (!bestScore.HasValue || f1 > bestScore.Value)
                {
                    bestScore = f1;
                    SaveModel(trainingData.Schema, model);
                    Console.WriteLine($"Saved new model at {bestScore.Value:P2}");
                }
                else if (bestScore.HasValue)
                {
                    Console.WriteLine($"Best model is still {bestScore.Value:P2}");
                }
            }
        }
Example #4
0
        /// <summary>
        /// Create <see cref="LightGbmBinaryTrainer"/> with advanced options, which predicts a target using a gradient boosting decision tree binary classification.
        /// </summary>
        /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
        /// <param name="options">Trainer options.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[LightGbmBinaryClassification](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
                                                     LightGbmBinaryTrainer.Options options)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            var env = CatalogUtils.GetEnvironment(catalog);

            return(new LightGbmBinaryTrainer(env, options));
        }
Example #5
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, LgbmOption param)
        {
            var option = new LightGbmBinaryTrainer.Options()
            {
                NumberOfLeaves             = param.NumberOfLeaves,
                NumberOfIterations         = param.NumberOfTrees,
                MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf,
                LearningRate            = param.LearningRate,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                Booster = new GradientBooster.Options()
                {
                    SubsampleFraction = param.SubsampleFraction,
                    FeatureFraction   = param.FeatureFraction,
                    L1Regularization  = param.L1Regularization,
                    L2Regularization  = param.L2Regularization,
                },
                MaximumBinCountPerFeature = param.MaximumBinCountPerFeature,
            };

            return(context.BinaryClassification.Trainers.LightGbm(option));
        }
        // This example requires installation of additional nuget package
        // <a href="https://www.nuget.org/packages/Microsoft.ML.LightGbm/">Microsoft.ML.LightGbm</a>.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // Define trainer options.
            var options = new LightGbmBinaryTrainer.Options
            {
                Booster = new GossBooster.Options
                {
                    TopRate   = 0.3,
                    OtherRate = 0.2
                }
            };

            // Define the trainer.
            var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Print 5 predictions.
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: True, Prediction: True
            //   Label: False, Prediction: True
            //   Label: True, Prediction: True
            //   Label: True, Prediction: True
            //   Label: False, Prediction: False

            // Evaluate the overall metrics.
            var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.71
            //   AUC: 0.76
            //   F1 Score: 0.70
            //   Negative Precision: 0.73
            //   Negative Recall: 0.71
            //   Positive Precision: 0.69
            //   Positive Recall: 0.71
            //
            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //   Confusion table
            //             ||======================
            //   PREDICTED || positive | negative | Recall
            //   TRUTH     ||======================
            //    positive ||      168 |       70 | 0.7059
            //    negative ||       88 |      174 | 0.6641
            //             ||======================
            //   Precision ||   0.6563 |   0.7131 |
        }
Example #7
0
        public static void Sweeper(MLContext mlContext, IDataView file, IEstimator <ITransformer> pipeline, string modelname, IDictionary <string, IEstimator <ITransformer> > estimator, Stopwatch stw)
        {
            //Diese Optionen sind nur für den LightGBM
            string saveDirechtory = $"C:\\Users\\ludwi\\source\\repos\\JugendForscht\\LoggingData.json";
            IDictionary <string, dynamic> Result = new Dictionary <string, dynamic>();
            int trainingStage = 0;

            double[] LearningRate       = new double[] { 1, 0.5, 0.25, 0.1, 0.001, 0.0001, 0.00001, 0.000001 };
            int[]    NumberOfIterations = new int[] { 10, 20, 50, 75, 100, 150, 200, 300 };
            double[] Sigmoid            = new double[] { 1.1, 1, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001, 0.0001 };
            bool[]   UnbalancedSets     = new bool[] { true, false };

            var Model = estimator[modelname];
            var data  = mlContext.Data.TrainTestSplit(file, testFraction: 0.2, seed: 42);

            foreach (double learningrate in LearningRate)
            {
                foreach (int numberofiterations in NumberOfIterations)
                {
                    foreach (double sigmoid in Sigmoid)
                    {
                        foreach (bool unbalancedsets in UnbalancedSets)
                        {
                            var options = new LightGbmBinaryTrainer.Options
                            {
                                LearningRate       = learningrate,
                                NumberOfIterations = numberofiterations,
                                Sigmoid            = sigmoid,
                                UnbalancedSets     = unbalancedsets,
                                Verbose            = true,
                                Silent             = false,
                            };
                            stw.Restart();
                            var model = Model.Fit(data.TrainSet);
                            stw.Stop();
                            Console.WriteLine($"Finished training {modelname} with parameters:  {stw.ElapsedMilliseconds / 1000}s");
                            Console.WriteLine($"Amounts of runs:        {trainingStage}");
                            Console.WriteLine($"  LearningRate:             {learningrate}");
                            Console.WriteLine($"  NumberOfIterations:       {numberofiterations}");
                            Console.WriteLine($"  Sigmoid:                  {sigmoid}");
                            Console.WriteLine($"  UnbalancedSets:           {unbalancedsets}");
                            Console.WriteLine();
                            #region Dictionary for logging data
                            IDictionary <string, dynamic> param = new Dictionary <string, dynamic>()
                            {
                                { "LearningRate", learningrate },
                                { "NumberOfIterations", numberofiterations },
                                { "Sigmoid", sigmoid },
                                { "UnbalancedSets", unbalancedsets },
                            };

                            IDictionary <string, dynamic> leistung = Evaluate(mlContext: mlContext, model: model, data.TestSet);

                            IDictionary <string, Dictionary <string, dynamic> > leistung_param = new Dictionary <string, Dictionary <string, dynamic> >()
                            {
                                { "Leistung", (Dictionary <string, dynamic>)leistung },
                                { "Parameter", (Dictionary <string, dynamic>)param }
                            };

                            IDictionary <string, Dictionary <string, Dictionary <string, dynamic> > > output = new Dictionary <string, Dictionary <string, Dictionary <string, dynamic> > >()
                            {
                                { modelname, (Dictionary <string, Dictionary <string, dynamic> >)leistung_param }
                            };
                            #endregion
                            string json = JsonSerializer.Serialize(output);
                            File.AppendAllText(path: saveDirechtory, contents: json);
                            Console.WriteLine(json);
                            trainingStage++;
                        }
                    }
                }
            }
        }
 public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams,
                                         ColumnInformation columnInfo, IDataView validationSet)
 {
     LightGbmBinaryTrainer.Options options = TrainerExtensionUtil.CreateLightGbmOptions <LightGbmBinaryTrainer.Options, float, BinaryPredictionTransformer <CalibratedModelParametersBase <LightGbmBinaryModelParameters, PlattCalibrator> >, CalibratedModelParametersBase <LightGbmBinaryModelParameters, PlattCalibrator> >(sweepParams, columnInfo);
     return(mlContext.BinaryClassification.Trainers.LightGbm(options));
 }
Example #9
0
        public void Train()
        {
            var trainingData = GetData(_dataPath);
            var testData     = GetData(_validatePath);

            double?bestScore = null;

            while (true)
            {
                var options = new LightGbmBinaryTrainer.Options
                {
                    //ExampleWeightColumnName = nameof(Appointment.Weight),
                    EvaluationMetric = LightGbmBinaryTrainer.Options.EvaluateMetricType.Logloss,
                    //UnbalancedSets = true,
                    WeightOfPositiveExamples = 1.6,         //new Random().Next(20, 40) / 10,
                    //Sigmoid = 1,
                    CategoricalSmoothing              = 1,  //Random(0, 1, 10, 20),
                    L2CategoricalRegularization       = 1,  //Random(0.1, 0.5, 1, 5, 10),
                    MaximumCategoricalSplitPointCount = 16, //Random(8, 16, 32, 64),
                    MinimumExampleCountPerLeaf        = 20, //Random(1, 10, 20, 50),
                    MaximumBinCountPerFeature         = 200,
                    HandleMissingValue          = true,
                    UseZeroAsMissingValue       = false,
                    MinimumExampleCountPerGroup = 100, //Random(10, 50, 100, 200),
                    NumberOfIterations          = 100,
                    LearningRate   = 0.4f,             //Random(0.025f, 0.08f, 0.2f, 0.4f),
                    NumberOfLeaves = 128,              //Random(2, 16, 64, 128),
                    Booster        = new GradientBooster.Options
                    {
                        L1Regularization   = 1, //Random(0, 0.5, 1),
                        L2Regularization   = 1, //Random(0, 0.5, 1),
                        MaximumTreeDepth   = 0,
                        SubsampleFrequency = 0,
                        SubsampleFraction  = 1,
                        FeatureFraction    = 1,
                        MinimumChildWeight = 0.1,
                        MinimumSplitGain   = 0,
                    }
                };

                var trainer = _context.BinaryClassification.Trainers.LightGbm(options);

                var pipeline = CreatePipeline(trainer);

                var model = pipeline.Fit(trainingData);

                var beta    = 2;
                var metrics = Evaluate("Test", model, testData, beta);
                //var score = metrics.FBeta(beta);
                var score = (metrics.PositiveRecall + metrics.NegativeRecall) / 2;

                if (!bestScore.HasValue || score > bestScore.Value)
                {
                    bestScore = score;
                    SaveModel(trainingData.Schema, model);
                    Console.WriteLine($"Saved new model at {bestScore.Value:P2}");
                }
                else if (bestScore.HasValue)
                {
                    Console.WriteLine($"Best model is still {bestScore.Value:P2}");
                }
            }
Example #10
0
        /// <summary>
        /// Predict a target using a tree binary classification model trained with the <see cref="LightGbmBinaryTrainer"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
        /// <param name="label">The label column.</param>
        /// <param name="features">The features column.</param>
        /// <param name="weights">The weights column.</param>
        /// <param name="options">Algorithm advanced settings.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained. Note that this action cannot change the result in any way;
        /// it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
                                                                                                                Scalar <bool> label, Vector <float> features, Scalar <float> weights,
                                                                                                                LightGbmBinaryTrainer.Options options,
                                                                                                                Action <CalibratedModelParametersBase <LightGbmBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(options, nameof(options));
            CheckUserValues(label, features, weights, onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName         = labelName;
                options.FeatureColumnName       = featuresName;
                options.ExampleWeightColumnName = weightsName;

                var trainer = new LightGbmBinaryTrainer(env, options);

                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                else
                {
                    return(trainer);
                }
            }, label, features, weights);

            return(rec.Output);
        }