FastTree(
            this SweepableBinaryClassificationTrainers trainer,
            string labelColumnName   = "Label",
            string featureColumnName = "Features",
            SweepableOption <FastTreeBinaryTrainer.Options> optionBuilder = null,
            FastTreeBinaryTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = FastTreeBinaryTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);
            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;

                return context.BinaryClassification.Trainers.FastTree(option);
            },
                       optionBuilder,
                       new string[] { labelColumnName, featureColumnName },
                       new string[] { PredictedLabel },
                       nameof(FastTreeBinaryTrainer)));
        }
Exemplo n.º 2
0
        /// <summary>
        /// Create <see cref="FastTreeBinaryTrainer"/> with advanced options, which predicts a target using a decision tree binary classification model.
        /// </summary>
        /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
        /// <param name="options">Trainer options.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[FastTreeBinaryClassification](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static FastTreeBinaryTrainer FastTree(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
                                                     FastTreeBinaryTrainer.Options options)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            Contracts.CheckValue(options, nameof(options));

            var env = CatalogUtils.GetEnvironment(catalog);

            return(new FastTreeBinaryTrainer(env, options));
        }
Exemplo n.º 3
0
        public void TestFastTreeBinaryFeaturizationInPipelineWithOptionalOutputs()
        {
            int dataPointCount = 200;
            var data           = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(dataPointCount).ToList();
            var dataView       = ML.Data.LoadFromEnumerable(data);

            dataView = ML.Data.Cache(dataView);

            var trainerOptions = new FastTreeBinaryTrainer.Options
            {
                NumberOfThreads            = 1,
                NumberOfTrees              = 10,
                NumberOfLeaves             = 4,
                MinimumExampleCountPerLeaf = 10,
                FeatureColumnName          = "Features",
                LabelColumnName            = "Label"
            };

            var options = new FastTreeBinaryFeaturizationEstimator.Options()
            {
                InputColumnName  = "Features",
                TrainerOptions   = trainerOptions,
                TreesColumnName  = null,
                PathsColumnName  = null,
                LeavesColumnName = "Leaves"
            };


            bool isWrong = false;

            try
            {
                var wrongPipeline = ML.Transforms.FeaturizeByFastTreeBinary(options)
                                    .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Trees", "Leaves", "Paths"))
                                    .Append(ML.BinaryClassification.Trainers.SdcaLogisticRegression("Label", "CombinedFeatures"));
                var wrongModel = wrongPipeline.Fit(dataView);
            }
            catch
            {
                isWrong = true; // Only "Leaves" is produced by the tree featurizer, so accessing "Trees" and "Paths" will lead to an error.
            }
            Assert.True(isWrong);

            var pipeline = ML.Transforms.FeaturizeByFastTreeBinary(options)
                           .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Leaves"))
                           .Append(ML.BinaryClassification.Trainers.SdcaLogisticRegression("Label", "CombinedFeatures"));
            var model      = pipeline.Fit(dataView);
            var prediction = model.Transform(dataView);
            var metrics    = ML.BinaryClassification.Evaluate(prediction);

            Assert.True(metrics.Accuracy > 0.98);
            Assert.True(metrics.LogLoss < 0.05);
            Assert.True(metrics.AreaUnderPrecisionRecallCurve > 0.98);
        }
Exemplo n.º 4
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastTreeOption param)
        {
            var option = new FastTreeBinaryTrainer.Options()
            {
                NumberOfLeaves             = param.NumberOfLeaves,
                NumberOfTrees              = param.NumberOfTrees,
                MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf,
                LearningRate              = param.LearningRate,
                LabelColumnName           = param.LabelColumnName,
                FeatureColumnName         = param.FeatureColumnName,
                ExampleWeightColumnName   = param.ExampleWeightColumnName,
                NumberOfThreads           = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
                MaximumBinCountPerFeature = param.MaximumBinCountPerFeature,
                FeatureFraction           = param.FeatureFraction,
            };

            return(context.BinaryClassification.Trainers.FastTree(option));
        }
Exemplo n.º 5
0
        public static ITransformer BuildAndTrainModel(MLContext mlContext, IDataView splitTrainSet)
        {
            string defaultColumnName = "Features";

            var trainerOptions = new FastTreeBinaryTrainer.Options();

            trainerOptions.NumberOfLeaves             = 50;
            trainerOptions.NumberOfTrees              = 50;
            trainerOptions.MinimumExampleCountPerLeaf = 20;

            var pipeline = mlContext.Transforms.Text.FeaturizeText(outputColumnName: defaultColumnName, inputColumnName: nameof(SentimentData.SentimentText))
                           //.Append(mlContext.BinaryClassification.Trainers.FastTree(numLeaves: 50, NumberOfTrees: 50, MinimumExampleCountPerLeaf: 20));
                           .Append(mlContext.BinaryClassification.Trainers.FastTree(trainerOptions));

            Console.WriteLine("=============== Create and Train the Model ===============");
            var model = pipeline.Fit(splitTrainSet);

            Console.WriteLine("=============== End of training ===============");
            Console.WriteLine();

            return(model);
        }
Exemplo n.º 6
0
        public void TestFastTreeBinaryFeaturizationInPipeline()
        {
            int dataPointCount = 200;
            var data           = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(dataPointCount).ToList();
            var dataView       = ML.Data.LoadFromEnumerable(data);

            dataView = ML.Data.Cache(dataView);

            var trainerOptions = new FastTreeBinaryTrainer.Options
            {
                NumberOfThreads            = 1,
                NumberOfTrees              = 10,
                NumberOfLeaves             = 4,
                MinimumExampleCountPerLeaf = 10,
                FeatureColumnName          = "Features",
                LabelColumnName            = "Label"
            };

            var options = new FastTreeBinaryFeaturizationEstimator.Options()
            {
                InputColumnName  = "Features",
                TreesColumnName  = "Trees",
                LeavesColumnName = "Leaves",
                PathsColumnName  = "Paths",
                TrainerOptions   = trainerOptions
            };

            var pipeline = ML.Transforms.FeaturizeByFastTreeBinary(options)
                           .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Trees", "Leaves", "Paths"))
                           .Append(ML.BinaryClassification.Trainers.SdcaLogisticRegression("Label", "CombinedFeatures"));
            var model      = pipeline.Fit(dataView);
            var prediction = model.Transform(dataView);
            var metrics    = ML.BinaryClassification.Evaluate(prediction);

            Assert.True(metrics.Accuracy > 0.98);
            Assert.True(metrics.LogLoss < 0.05);
            Assert.True(metrics.AreaUnderPrecisionRecallCurve > 0.98);
        }
        // This example requires installation of additional NuGet package
        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // Define trainer options.
            var options = new FastTreeBinaryTrainer.Options
            {
                // Use L2Norm for early stopping.
                EarlyStoppingMetric = EarlyStoppingMetric.L2Norm,
                // Create a simpler model by penalizing usage of new features.
                FeatureFirstUsePenalty = 0.1,
                // Reduce the number of trees to 50.
                NumberOfTrees = 50
            };

            // Define the trainer.
            var pipeline = mlContext.BinaryClassification.Trainers.FastTree(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Print 5 predictions.
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: True, Prediction: True
            //   Label: False, Prediction: False
            //   Label: True, Prediction: True
            //   Label: True, Prediction: True
            //   Label: False, Prediction: False

            // Evaluate the overall metrics.
            var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.78
            //   AUC: 0.88
            //   F1 Score: 0.79
            //   Negative Precision: 0.83
            //   Negative Recall: 0.74
            //   Positive Precision: 0.74
            //   Positive Recall: 0.84
            //   Log Loss: 0.62
            //   Log Loss Reduction: 37.77
            //   Entropy: 1.00
        }
Exemplo n.º 8
0
        // This example requires installation of additional NuGet package
        // <a href="https://www.nuget.org/packages/Microsoft.ML.FastTree/">Microsoft.ML.FastTree</a>.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of data points to be transformed.
            var dataPoints = GenerateRandomDataPoints(100).ToList();

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var dataView = mlContext.Data.LoadFromEnumerable(dataPoints);

            // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
            // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
            // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
            // which needs many data passes.
            dataView = mlContext.Data.Cache(dataView);

            // Define input and output columns of tree-based featurizer.
            string labelColumnName   = nameof(DataPoint.Label);
            string featureColumnName = nameof(DataPoint.Features);
            string treesColumnName   = nameof(TransformedDataPoint.Trees);
            string leavesColumnName  = nameof(TransformedDataPoint.Leaves);
            string pathsColumnName   = nameof(TransformedDataPoint.Paths);

            // Define the configuration of the trainer used to train a tree-based model.
            var trainerOptions = new FastTreeBinaryTrainer.Options
            {
                // Use L2Norm for early stopping.
                EarlyStoppingMetric = EarlyStoppingMetric.L2Norm,
                // Create a simpler model by penalizing usage of new features.
                FeatureFirstUsePenalty = 0.1,
                // Reduce the number of trees to 3.
                NumberOfTrees = 3,
                // Number of leaves per tree.
                NumberOfLeaves = 6,
                // Feature column name.
                FeatureColumnName = featureColumnName,
                // Label column name.
                LabelColumnName = labelColumnName
            };

            // Define the tree-based featurizer's configuration.
            var options = new FastTreeBinaryFeaturizationEstimator.Options
            {
                InputColumnName  = featureColumnName,
                TreesColumnName  = treesColumnName,
                LeavesColumnName = leavesColumnName,
                PathsColumnName  = pathsColumnName,
                TrainerOptions   = trainerOptions
            };

            // Define the featurizer.
            var pipeline = mlContext.Transforms.FeaturizeByFastTreeBinary(options);

            // Train the model.
            var model = pipeline.Fit(dataView);

            // Apply the trained transformer to the considered data set.
            var transformed = model.Transform(dataView);

            // Convert IDataView object to a list. Each element in the resulted list corresponds to a row in the IDataView.
            var transformedDataPoints = mlContext.Data.CreateEnumerable <TransformedDataPoint>(transformed, false).ToList();

            // Print out the transformation of the first 3 data points.
            for (int i = 0; i < 3; ++i)
            {
                var dataPoint            = dataPoints[i];
                var transformedDataPoint = transformedDataPoints[i];
                Console.WriteLine($"The original feature vector [{String.Join(",", dataPoint.Features)}] is transformed to three different tree-based feature vectors:");
                Console.WriteLine($"  Trees' output values: [{String.Join(",", transformedDataPoint.Trees)}].");
                Console.WriteLine($"  Leave IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Leaves)}].");
                Console.WriteLine($"  Paths IDs' 0-1 representation: [{String.Join(",", transformedDataPoint.Paths)}].");
            }

            // Expected output:
            //   The original feature vector [0.8173254,0.7680227,0.5581612] is transformed to three different tree-based feature vectors:
            //     Trees' output values: [0.5714286,0.4636412,0.535588].
            //     Leave IDs' 0-1 representation: [0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1].
            //     Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1].
            //   The original feature vector [0.5888848,0.9360271,0.4721779] is transformed to three different tree-based feature vectors:
            //     Trees' output values: [0.2352941,-0.1382389,0.535588].
            //     Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1].
            //     Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,1,1,1].
            //   The original feature vector [0.2737045,0.2919063,0.4673147] is transformed to three different tree-based feature vectors:
            //     Trees' output values: [0.2352941,-0.1382389,-0.2184284].
            //     Leave IDs' 0-1 representation: [0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0].
            //     Paths IDs' 0-1 representation: [1,0,0,1,1,1,0,1,0,1,1,1,0,0,0].
        }
Exemplo n.º 9
0
        /// <summary>
        /// FastTree <see cref="BinaryClassificationCatalog"/> extension method.
        /// Predict a target using a decision tree binary classification model trained with the <see cref="FastTreeBinaryTrainer"/>.
        /// </summary>
        /// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
        /// <param name="label">The label column.</param>
        /// <param name="features">The features column.</param>
        /// <param name="weights">The optional weights column.</param>
        /// <param name="options">Algorithm advanced settings.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained. Note that this action cannot change the result in any way;
        /// it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[FastTree](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) FastTree(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
                                                                                                                Scalar <bool> label, Vector <float> features, Scalar <float> weights,
                                                                                                                FastTreeBinaryTrainer.Options options,
                                                                                                                Action <CalibratedModelParametersBase <FastTreeBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValueOrNull(options);
            CheckUserValues(label, features, weights, onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName         = labelName;
                options.FeatureColumnName       = featuresName;
                options.ExampleWeightColumnName = weightsName;

                var trainer = new FastTreeBinaryTrainer(env, options);

                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans => onFit(trans.Model)));
                }
                else
                {
                    return(trainer);
                }
            }, label, features, weights);

            return(rec.Output);
        }