LightGbm( this SweepableBinaryClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <LightGbmBinaryTrainer.Options> optionBuilder = null, LightGbmBinaryTrainer.Options defaultOption = null) { var context = trainer.Context; if (optionBuilder == null) { optionBuilder = LightGbmBinaryTrainerSweepableOptions.Default; } optionBuilder.SetDefaultOption(defaultOption); return(context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = labelColumnName; option.FeatureColumnName = featureColumnName; return context.BinaryClassification.Trainers.LightGbm(option); }, optionBuilder, new string[] { labelColumnName, featureColumnName }, new string[] { PredictedLabel }, nameof(LightGbmBinaryTrainer))); }
public static IEstimator <ITransformer> BuildTrainingPipeline(MLContext mlContext) { // Data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.Concatenate("Features", new[] { "Time", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12", "V13", "V14", "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22", "V23", "V24", "V25", "V26", "V27", "V28", "Amount" }); var boosterOptions = new GradientBooster.Options() { L2Regularization = 1, L1Regularization = 0 }; var trainerOptions = new LightGbmBinaryTrainer.Options() { NumberOfIterations = 150, LearningRate = 0.2001066f, NumberOfLeaves = 7, MinimumExampleCountPerLeaf = 10, UseCategoricalSplit = true, HandleMissingValue = false, MinimumExampleCountPerGroup = 100, MaximumCategoricalSplitPointCount = 16, CategoricalSmoothing = 10, L2CategoricalRegularization = 5, Booster = boosterOptions, LabelColumnName = "Class", FeatureColumnName = "Features" }; // Set the training algorithm var trainer = mlContext.BinaryClassification.Trainers.LightGbm(trainerOptions); var trainingPipeline = dataProcessPipeline.Append(trainer); return(trainingPipeline); }
public void Train() { var trainingData = GetData(_dataPath); var testData = GetData(_validatePath); double?bestScore = null; while (true) { var options = new LightGbmBinaryTrainer.Options { ExampleWeightColumnName = nameof(Appointment.Weight), EvaluationMetric = LightGbmBinaryTrainer.Options.EvaluateMetricType.Logloss, Sigmoid = 1, CategoricalSmoothing = 10, L2CategoricalRegularization = 10, MaximumCategoricalSplitPointCount = 8, MinimumExampleCountPerLeaf = 1, WeightOfPositiveExamples = 2, MaximumBinCountPerFeature = 200, Seed = 459933621, HandleMissingValue = true, UseZeroAsMissingValue = false, MinimumExampleCountPerGroup = 100, NumberOfIterations = 200, LearningRate = 0.01, NumberOfLeaves = 110, Booster = new GradientBooster.Options { L1Regularization = 0, L2Regularization = 0, MaximumTreeDepth = 0, SubsampleFrequency = 0, SubsampleFraction = 1, FeatureFraction = 1, MinimumChildWeight = 0.1, MinimumSplitGain = 0, } }; var trainer = _context.BinaryClassification.Trainers.LightGbm(options); var pipeline = CreatePipeline(trainer); var model = pipeline.Fit(trainingData); var f1 = Evaluate("Test", model, testData); if (!bestScore.HasValue || f1 > bestScore.Value) { bestScore = f1; SaveModel(trainingData.Schema, model); Console.WriteLine($"Saved new model at {bestScore.Value:P2}"); } else if (bestScore.HasValue) { Console.WriteLine($"Best model is still {bestScore.Value:P2}"); } } }
/// <summary> /// Create <see cref="LightGbmBinaryTrainer"/> with advanced options, which predicts a target using a gradient boosting decision tree binary classification. /// </summary> /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param> /// <param name="options">Trainer options.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LightGbmBinaryClassification](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs)] /// ]]> /// </format> /// </example> public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, LightGbmBinaryTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); var env = CatalogUtils.GetEnvironment(catalog); return(new LightGbmBinaryTrainer(env, options)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, LgbmOption param) { var option = new LightGbmBinaryTrainer.Options() { NumberOfLeaves = param.NumberOfLeaves, NumberOfIterations = param.NumberOfTrees, MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf, LearningRate = param.LearningRate, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, Booster = new GradientBooster.Options() { SubsampleFraction = param.SubsampleFraction, FeatureFraction = param.FeatureFraction, L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, }, MaximumBinCountPerFeature = param.MaximumBinCountPerFeature, }; return(context.BinaryClassification.Trainers.LightGbm(option)); }
// This example requires installation of additional nuget package // <a href="https://www.nuget.org/packages/Microsoft.ML.LightGbm/">Microsoft.ML.LightGbm</a>. public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new LightGbmBinaryTrainer.Options { Booster = new GossBooster.Options { TopRate = 0.3, OtherRate = 0.2 } }; // Define the trainer. var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(options); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different from training data. var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } // Expected output: // Label: True, Prediction: True // Label: False, Prediction: True // Label: True, Prediction: True // Label: True, Prediction: True // Label: False, Prediction: False // Evaluate the overall metrics. var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: // Accuracy: 0.71 // AUC: 0.76 // F1 Score: 0.70 // Negative Precision: 0.73 // Negative Recall: 0.71 // Positive Precision: 0.69 // Positive Recall: 0.71 // // TEST POSITIVE RATIO: 0.4760 (238.0/(238.0+262.0)) // Confusion table // ||====================== // PREDICTED || positive | negative | Recall // TRUTH ||====================== // positive || 168 | 70 | 0.7059 // negative || 88 | 174 | 0.6641 // ||====================== // Precision || 0.6563 | 0.7131 | }
public static void Sweeper(MLContext mlContext, IDataView file, IEstimator <ITransformer> pipeline, string modelname, IDictionary <string, IEstimator <ITransformer> > estimator, Stopwatch stw) { //Diese Optionen sind nur für den LightGBM string saveDirechtory = $"C:\\Users\\ludwi\\source\\repos\\JugendForscht\\LoggingData.json"; IDictionary <string, dynamic> Result = new Dictionary <string, dynamic>(); int trainingStage = 0; double[] LearningRate = new double[] { 1, 0.5, 0.25, 0.1, 0.001, 0.0001, 0.00001, 0.000001 }; int[] NumberOfIterations = new int[] { 10, 20, 50, 75, 100, 150, 200, 300 }; double[] Sigmoid = new double[] { 1.1, 1, 0.75, 0.5, 0.25, 0.1, 0.01, 0.001, 0.0001 }; bool[] UnbalancedSets = new bool[] { true, false }; var Model = estimator[modelname]; var data = mlContext.Data.TrainTestSplit(file, testFraction: 0.2, seed: 42); foreach (double learningrate in LearningRate) { foreach (int numberofiterations in NumberOfIterations) { foreach (double sigmoid in Sigmoid) { foreach (bool unbalancedsets in UnbalancedSets) { var options = new LightGbmBinaryTrainer.Options { LearningRate = learningrate, NumberOfIterations = numberofiterations, Sigmoid = sigmoid, UnbalancedSets = unbalancedsets, Verbose = true, Silent = false, }; stw.Restart(); var model = Model.Fit(data.TrainSet); stw.Stop(); Console.WriteLine($"Finished training {modelname} with parameters: {stw.ElapsedMilliseconds / 1000}s"); Console.WriteLine($"Amounts of runs: {trainingStage}"); Console.WriteLine($" LearningRate: {learningrate}"); Console.WriteLine($" NumberOfIterations: {numberofiterations}"); Console.WriteLine($" Sigmoid: {sigmoid}"); Console.WriteLine($" UnbalancedSets: {unbalancedsets}"); Console.WriteLine(); #region Dictionary for logging data IDictionary <string, dynamic> param = new Dictionary <string, dynamic>() { { "LearningRate", learningrate }, { "NumberOfIterations", numberofiterations }, { "Sigmoid", sigmoid }, { "UnbalancedSets", unbalancedsets }, }; IDictionary <string, dynamic> leistung = Evaluate(mlContext: mlContext, model: model, data.TestSet); IDictionary <string, Dictionary <string, dynamic> > leistung_param = new Dictionary <string, Dictionary <string, dynamic> >() { { "Leistung", (Dictionary <string, dynamic>)leistung }, { "Parameter", (Dictionary <string, dynamic>)param } }; IDictionary <string, Dictionary <string, Dictionary <string, dynamic> > > output = new Dictionary <string, Dictionary <string, Dictionary <string, dynamic> > >() { { modelname, (Dictionary <string, Dictionary <string, dynamic> >)leistung_param } }; #endregion string json = JsonSerializer.Serialize(output); File.AppendAllText(path: saveDirechtory, contents: json); Console.WriteLine(json); trainingStage++; } } } } }
public ITrainerEstimator CreateInstance(MLContext mlContext, IEnumerable <SweepableParam> sweepParams, ColumnInformation columnInfo, IDataView validationSet) { LightGbmBinaryTrainer.Options options = TrainerExtensionUtil.CreateLightGbmOptions <LightGbmBinaryTrainer.Options, float, BinaryPredictionTransformer <CalibratedModelParametersBase <LightGbmBinaryModelParameters, PlattCalibrator> >, CalibratedModelParametersBase <LightGbmBinaryModelParameters, PlattCalibrator> >(sweepParams, columnInfo); return(mlContext.BinaryClassification.Trainers.LightGbm(options)); }
public void Train() { var trainingData = GetData(_dataPath); var testData = GetData(_validatePath); double?bestScore = null; while (true) { var options = new LightGbmBinaryTrainer.Options { //ExampleWeightColumnName = nameof(Appointment.Weight), EvaluationMetric = LightGbmBinaryTrainer.Options.EvaluateMetricType.Logloss, //UnbalancedSets = true, WeightOfPositiveExamples = 1.6, //new Random().Next(20, 40) / 10, //Sigmoid = 1, CategoricalSmoothing = 1, //Random(0, 1, 10, 20), L2CategoricalRegularization = 1, //Random(0.1, 0.5, 1, 5, 10), MaximumCategoricalSplitPointCount = 16, //Random(8, 16, 32, 64), MinimumExampleCountPerLeaf = 20, //Random(1, 10, 20, 50), MaximumBinCountPerFeature = 200, HandleMissingValue = true, UseZeroAsMissingValue = false, MinimumExampleCountPerGroup = 100, //Random(10, 50, 100, 200), NumberOfIterations = 100, LearningRate = 0.4f, //Random(0.025f, 0.08f, 0.2f, 0.4f), NumberOfLeaves = 128, //Random(2, 16, 64, 128), Booster = new GradientBooster.Options { L1Regularization = 1, //Random(0, 0.5, 1), L2Regularization = 1, //Random(0, 0.5, 1), MaximumTreeDepth = 0, SubsampleFrequency = 0, SubsampleFraction = 1, FeatureFraction = 1, MinimumChildWeight = 0.1, MinimumSplitGain = 0, } }; var trainer = _context.BinaryClassification.Trainers.LightGbm(options); var pipeline = CreatePipeline(trainer); var model = pipeline.Fit(trainingData); var beta = 2; var metrics = Evaluate("Test", model, testData, beta); //var score = metrics.FBeta(beta); var score = (metrics.PositiveRecall + metrics.NegativeRecall) / 2; if (!bestScore.HasValue || score > bestScore.Value) { bestScore = score; SaveModel(trainingData.Schema, model); Console.WriteLine($"Saved new model at {bestScore.Value:P2}"); } else if (bestScore.HasValue) { Console.WriteLine($"Best model is still {bestScore.Value:P2}"); } }
/// <summary> /// Predict a target using a tree binary classification model trained with the <see cref="LightGbmBinaryTrainer"/>. /// </summary> /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param> /// <param name="label">The label column.</param> /// <param name="features">The features column.</param> /// <param name="weights">The weights column.</param> /// <param name="options">Algorithm advanced settings.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; /// it is only a way for the caller to be informed about what was learnt.</param> /// <returns>The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns> public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar <bool> label, Vector <float> features, Scalar <float> weights, LightGbmBinaryTrainer.Options options, Action <CalibratedModelParametersBase <LightGbmBinaryModelParameters, PlattCalibrator> > onFit = null) { Contracts.CheckValue(options, nameof(options)); CheckUserValues(label, features, weights, onFit); var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { options.LabelColumnName = labelName; options.FeatureColumnName = featuresName; options.ExampleWeightColumnName = weightsName; var trainer = new LightGbmBinaryTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } else { return(trainer); } }, label, features, weights); return(rec.Output); }