FastTree( this SweepableBinaryClassificationTrainers trainer, string labelColumnName = "Label", string featureColumnName = "Features", SweepableOption <FastTreeBinaryTrainer.Options> optionBuilder = null, FastTreeBinaryTrainer.Options defaultOption = null) { var context = trainer.Context; if (optionBuilder == null) { optionBuilder = FastTreeBinaryTrainerSweepableOptions.Default; } optionBuilder.SetDefaultOption(defaultOption); return(context.AutoML().CreateSweepableEstimator( (context, option) => { option.LabelColumnName = labelColumnName; option.FeatureColumnName = featureColumnName; return context.BinaryClassification.Trainers.FastTree(option); }, optionBuilder, new string[] { labelColumnName, featureColumnName }, new string[] { PredictedLabel }, nameof(FastTreeBinaryTrainer))); }
/// <summary> /// Create <see cref="FastTreeBinaryTrainer"/> with advanced options, which predicts a target using a decision tree binary classification model. /// </summary> /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param> /// <param name="options">Trainer options.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[FastTreeBinaryClassification](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs)] /// ]]> /// </format> /// </example> public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, FastTreeBinaryTrainer.Options options) { Contracts.CheckValue(catalog, nameof(catalog)); Contracts.CheckValue(options, nameof(options)); var env = CatalogUtils.GetEnvironment(catalog); return(new FastTreeBinaryTrainer(env, options)); }
public void TestFastTreeBinaryFeaturizationInPipelineWithOptionalOutputs() { int dataPointCount = 200; var data = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(dataPointCount).ToList(); var dataView = ML.Data.LoadFromEnumerable(data); dataView = ML.Data.Cache(dataView); var trainerOptions = new FastTreeBinaryTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10, NumberOfLeaves = 4, MinimumExampleCountPerLeaf = 10, FeatureColumnName = "Features", LabelColumnName = "Label" }; var options = new FastTreeBinaryFeaturizationEstimator.Options() { InputColumnName = "Features", TrainerOptions = trainerOptions, TreesColumnName = null, PathsColumnName = null, LeavesColumnName = "Leaves" }; bool isWrong = false; try { var wrongPipeline = ML.Transforms.FeaturizeByFastTreeBinary(options) .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Trees", "Leaves", "Paths")) .Append(ML.BinaryClassification.Trainers.SdcaLogisticRegression("Label", "CombinedFeatures")); var wrongModel = wrongPipeline.Fit(dataView); } catch { isWrong = true; // Only "Leaves" is produced by the tree featurizer, so accessing "Trees" and "Paths" will lead to an error. } Assert.True(isWrong); var pipeline = ML.Transforms.FeaturizeByFastTreeBinary(options) .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Leaves")) .Append(ML.BinaryClassification.Trainers.SdcaLogisticRegression("Label", "CombinedFeatures")); var model = pipeline.Fit(dataView); var prediction = model.Transform(dataView); var metrics = ML.BinaryClassification.Evaluate(prediction); Assert.True(metrics.Accuracy > 0.98); Assert.True(metrics.LogLoss < 0.05); Assert.True(metrics.AreaUnderPrecisionRecallCurve > 0.98); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastTreeOption param) { var option = new FastTreeBinaryTrainer.Options() { NumberOfLeaves = param.NumberOfLeaves, NumberOfTrees = param.NumberOfTrees, MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf, LearningRate = param.LearningRate, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), MaximumBinCountPerFeature = param.MaximumBinCountPerFeature, FeatureFraction = param.FeatureFraction, }; return(context.BinaryClassification.Trainers.FastTree(option)); }
public static ITransformer BuildAndTrainModel(MLContext mlContext, IDataView splitTrainSet) { string defaultColumnName = "Features"; var trainerOptions = new FastTreeBinaryTrainer.Options(); trainerOptions.NumberOfLeaves = 50; trainerOptions.NumberOfTrees = 50; trainerOptions.MinimumExampleCountPerLeaf = 20; var pipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: defaultColumnName, inputColumnName: nameof(SentimentData.SentimentText)) //.Append(mlContext.BinaryClassification.Trainers.FastTree(numLeaves: 50, NumberOfTrees: 50, MinimumExampleCountPerLeaf: 20)); .Append(mlContext.BinaryClassification.Trainers.FastTree(trainerOptions)); Console.WriteLine("=============== Create and Train the Model ==============="); var model = pipeline.Fit(splitTrainSet); Console.WriteLine("=============== End of training ==============="); Console.WriteLine(); return(model); }
public void TestFastTreeBinaryFeaturizationInPipeline() { int dataPointCount = 200; var data = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(dataPointCount).ToList(); var dataView = ML.Data.LoadFromEnumerable(data); dataView = ML.Data.Cache(dataView); var trainerOptions = new FastTreeBinaryTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10, NumberOfLeaves = 4, MinimumExampleCountPerLeaf = 10, FeatureColumnName = "Features", LabelColumnName = "Label" }; var options = new FastTreeBinaryFeaturizationEstimator.Options() { InputColumnName = "Features", TreesColumnName = "Trees", LeavesColumnName = "Leaves", PathsColumnName = "Paths", TrainerOptions = trainerOptions }; var pipeline = ML.Transforms.FeaturizeByFastTreeBinary(options) .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Trees", "Leaves", "Paths")) .Append(ML.BinaryClassification.Trainers.SdcaLogisticRegression("Label", "CombinedFeatures")); var model = pipeline.Fit(dataView); var prediction = model.Transform(dataView); var metrics = ML.BinaryClassification.Evaluate(prediction); Assert.True(metrics.Accuracy > 0.98); Assert.True(metrics.LogLoss < 0.05); Assert.True(metrics.AreaUnderPrecisionRecallCurve > 0.98); }
// This example requires installation of additional NuGet package // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>. public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of training data points. var dataPoints = GenerateRandomDataPoints(1000); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); // Define trainer options. var options = new FastTreeBinaryTrainer.Options { // Use L2Norm for early stopping. EarlyStoppingMetric = EarlyStoppingMetric.L2Norm, // Create a simpler model by penalizing usage of new features. FeatureFirstUsePenalty = 0.1, // Reduce the number of trees to 50. NumberOfTrees = 50 }; // Define the trainer. var pipeline = mlContext.BinaryClassification.Trainers.FastTree(options); // Train the model. var model = pipeline.Fit(trainingData); // Create testing data. Use different random seed to make it different from training data. var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123)); // Run the model on test data set. var transformedTestData = model.Transform(testData); // Convert IDataView object to a list. var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList(); // Print 5 predictions. foreach (var p in predictions.Take(5)) { Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); } // Expected output: // Label: True, Prediction: True // Label: False, Prediction: False // Label: True, Prediction: True // Label: True, Prediction: True // Label: False, Prediction: False // Evaluate the overall metrics. var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); PrintMetrics(metrics); // Expected output: // Accuracy: 0.78 // AUC: 0.88 // F1 Score: 0.79 // Negative Precision: 0.83 // Negative Recall: 0.74 // Positive Precision: 0.74 // Positive Recall: 0.84 // Log Loss: 0.62 // Log Loss Reduction: 37.77 // Entropy: 1.00 }
// This example requires installation of additional NuGet package // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>. public static void Example() { // Create a new context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. // Setting the seed to a fixed number in this example to make outputs deterministic. var mlContext = new MLContext(seed: 0); // Create a list of data points to be transformed. var dataPoints = GenerateRandomDataPoints(100).ToList(); // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. var dataView = mlContext.Data.LoadFromEnumerable(dataPoints); // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times, // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory, // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms // which needs many data passes. dataView = mlContext.Data.Cache(dataView); // Define input and output columns of tree-based featurizer. string labelColumnName = nameof(DataPoint.Label); string featureColumnName = nameof(DataPoint.Features); string treesColumnName = nameof(TransformedDataPoint.Trees); string leavesColumnName = nameof(TransformedDataPoint.Leaves); string pathsColumnName = nameof(TransformedDataPoint.Paths); // Define the configuration of the trainer used to train a tree-based model. var trainerOptions = new FastTreeBinaryTrainer.Options { // Use L2Norm for early stopping. EarlyStoppingMetric = EarlyStoppingMetric.L2Norm, // Create a simpler model by penalizing usage of new features. FeatureFirstUsePenalty = 0.1, // Reduce the number of trees to 3. NumberOfTrees = 3, // Number of leaves per tree. NumberOfLeaves = 6, // Feature column name. FeatureColumnName = featureColumnName, // Label column name. LabelColumnName = labelColumnName }; // Define the tree-based featurizer's configuration. var options = new FastTreeBinaryFeaturizationEstimator.Options { InputColumnName = featureColumnName, TreesColumnName = treesColumnName, LeavesColumnName = leavesColumnName, PathsColumnName = pathsColumnName, TrainerOptions = trainerOptions }; // Define the featurizer. var pipeline = mlContext.Transforms.FeaturizeByFastTreeBinary(options); // Train the model. var model = pipeline.Fit(dataView); // Apply the trained transformer to the considered data set. var transformed = model.Transform(dataView); // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView. var transformedDataPoints = mlContext.Data.CreateEnumerable <TransformedDataPoint>(transformed, false).ToList(); // Print out the transformation of the first 3 data points. for (int i = 0; i < 3; ++i) { var dataPoint = dataPoints[i]; var transformedDataPoint = transformedDataPoints[i]; Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:"); Console.WriteLine($" Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}]."); Console.WriteLine($" Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}]."); Console.WriteLine($" Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}]."); } // Expected output: // The original feature vector [0.8173254,0.7680227,0.5581612] is transformed to three different tree-based feature vectors: // Trees' output values: [0.5714286,0.4636412,0.535588]. // Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1]. // The original feature vector [0.5888848,0.9360271,0.4721779] is transformed to three different tree-based feature vectors: // Trees' output values: [0.2352941,-0.1382389,0.535588]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1]. // The original feature vector [0.2737045,0.2919063,0.4673147] is transformed to three different tree-based feature vectors: // Trees' output values: [0.2352941,-0.1382389,-0.2184284]. // Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0]. // Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,0,0,0]. }
/// <summary> /// FastTree <see cref="BinaryClassificationCatalog"/> extension method. /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/>. /// </summary> /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param> /// <param name="label">The label column.</param> /// <param name="features">The features column.</param> /// <param name="weights">The optional weights column.</param> /// <param name="options">Algorithm advanced settings.</param> /// <param name="onFit">A delegate that is called every time the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive /// the linear model that was trained. Note that this action cannot change the result in any way; /// it is only a way for the caller to be informed about what was learnt.</param> /// <returns>The set of output columns including in order the predicted binary classification score (which will range /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs)] /// ]]></format> /// </example> public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) FastTree(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, Scalar <bool> label, Vector <float> features, Scalar <float> weights, FastTreeBinaryTrainer.Options options, Action <CalibratedModelParametersBase <FastTreeBinaryModelParameters, PlattCalibrator> > onFit = null) { Contracts.CheckValueOrNull(options); CheckUserValues(label, features, weights, onFit); var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { options.LabelColumnName = labelName; options.FeatureColumnName = featuresName; options.ExampleWeightColumnName = weightsName; var trainer = new FastTreeBinaryTrainer(env, options); if (onFit != null) { return(trainer.WithOnFitDelegate(trans => onFit(trans.Model))); } else { return(trainer); } }, label, features, weights); return(rec.Output); }