Пример #1
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            SdcaBinaryTrainer.Options options,
            Action <LinearBinaryModelParameters, ParameterMixingCalibratedPredictor> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumn   = labelName;
                options.FeatureColumn = featuresName;

                var trainer = new SdcaBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        // Under the default log-loss we assume a calibrated predictor.
                        var model = trans.Model;
                        var cali = (ParameterMixingCalibratedPredictor)model;
                        var pred = (LinearBinaryModelParameters)cali.SubPredictor;
                        onFit(pred, cali);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            SdcaBinaryTrainer.Options options,
            Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName   = labelName;
                options.FeatureColumnName = featuresName;

                var trainer = new SdcaBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
Пример #3
0
        /// <summary>
        /// Predict a target using a logistic regression model trained with the SDCA trainer.
        /// The trained model can produce probablity via feeding output value of the linear
        /// function to a <see cref="PlattCalibrator"/>. Compared with <see cref="StochasticDualCoordinateAscent(BinaryClassificationCatalog.BinaryClassificationTrainers, string, string, string, float?, float?, int?)"/>,
        /// this function allows more advanced settings by accepting <see cref="SdcaBinaryTrainer.Options"/>.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        public static SdcaBinaryTrainer StochasticDualCoordinateAscent(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            SdcaBinaryTrainer.Options options)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            Contracts.CheckValue(options, nameof(options));

            var env = CatalogUtils.GetEnvironment(catalog);

            return(new SdcaBinaryTrainer(env, options));
        }
        // In this examples we will use the adult income dataset. The goal is to predict
        // if a person's income is above $50K or not, based on demographic information about that person.
        // For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Download and featurize the dataset.
            var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);

            // Leave out 10% of data for testing.
            var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);

            // Define the trainer options.
            var options = new SdcaBinaryTrainer.Options()
            {
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaxIterations = 30,
                // Give the instances of the positive class slightly more weight.
                PositiveInstanceWeight = 1.2f,
            };

            // Create data training pipeline.
            var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(options);

            // Fit this pipeline to the training data.
            var model = pipeline.Fit(trainTestData.TrainSet);

            // Evaluate how the model is doing on the test data.
            var dataWithPredictions = model.Transform(trainTestData.TestSet);
            var metrics             = mlContext.BinaryClassification.Evaluate(dataWithPredictions);

            SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.85
            //   AUC: 0.90
            //   F1 Score: 0.66
            //   Negative Precision: 0.89
            //   Negative Recall: 0.92
            //   Positive Precision: 0.70
            //   Positive Recall: 0.63
            //   LogLoss: 0.47
            //   LogLossReduction: 39.77
            //   Entropy: 0.78
        }
Пример #5
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and a custom loss.
        /// Note that because we cannot be sure that all loss functions will produce naturally calibrated outputs, setting
        /// a custom loss function will not produce a calibrated probability column.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="loss">The custom loss.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), and the predicted label.</returns>
        public static (Scalar <float> score, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features,
            Scalar <float> weights,
            ISupportSdcaClassificationLoss loss,
            SdcaBinaryTrainer.Options options,
            Action <LinearBinaryModelParameters> onFit = null
            )
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            bool hasProbs = loss is LogLoss;

            var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
                (env, labelName, featuresName, weightsName) =>
            {
                options.FeatureColumn = featuresName;
                options.LabelColumn   = labelName;

                var trainer = new SdcaBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        var model = trans.Model;
                        if (model is ParameterMixingCalibratedPredictor cali)
                        {
                            onFit((LinearBinaryModelParameters)cali.SubPredictor);
                        }
                        else
                        {
                            onFit((LinearBinaryModelParameters)model);
                        }
                    }));
                }
                return(trainer);
            }, label, features, weights, hasProbs);

            return(rec.Output);
        }