SdcaNonCalibrated(
            this SweepableBinaryClassificationTrainers trainer,
            string labelColumnName   = "Label",
            string featureColumnName = "Features",
            SweepableOption <SdcaNonCalibratedBinaryTrainer.Options> optionBuilder = null,
            SdcaNonCalibratedBinaryTrainer.Options defaultOption = null)
        {
            var context = trainer.Context;

            if (optionBuilder == null)
            {
                optionBuilder = SdcaNonCalibratedBinaryTrainerSweepableOptions.Default;
            }

            optionBuilder.SetDefaultOption(defaultOption);
            return(context.AutoML().CreateSweepableEstimator(
                       (context, option) =>
            {
                option.LabelColumnName = labelColumnName;
                option.FeatureColumnName = featureColumnName;

                return context.BinaryClassification.Trainers.SdcaNonCalibrated(option);
            },
                       optionBuilder,
                       new string[] { labelColumnName, featureColumnName },
                       new string[] { PredictedLabel },
                       nameof(SdcaNonCalibratedBinaryTrainer)));
        }
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and a custom loss.
        /// Note that because we cannot be sure that all loss functions will produce naturally calibrated outputs, setting
        /// a custom loss function will not produce a calibrated probability column.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="lossFunction">The custom loss.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), and the predicted label.</returns>
        public static (Scalar <float> score, Scalar <bool> predictedLabel) SdcaNonCalibrated(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            ISupportSdcaClassificationLoss lossFunction,
            SdcaNonCalibratedBinaryTrainer.Options options,
            Action <LinearBinaryModelParameters> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
                (env, labelName, featuresName, weightsName) =>
            {
                options.FeatureColumnName = featuresName;
                options.LabelColumnName   = labelName;

                var trainer = new SdcaNonCalibratedBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
Beispiel #3
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        public static SdcaNonCalibratedBinaryTrainer StochasticDualCoordinateAscentNonCalibrated(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            SdcaNonCalibratedBinaryTrainer.Options options)
        {
            Contracts.CheckValue(catalog, nameof(catalog));
            Contracts.CheckValue(options, nameof(options));

            var env = CatalogUtils.GetEnvironment(catalog);

            return(new SdcaNonCalibratedBinaryTrainer(env, options));
        }
        public static void Example()
        {
            // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
            // as a catalog of available operations and as the source of randomness.
            // Setting the seed to a fixed number in this example to make outputs deterministic.
            var mlContext = new MLContext(seed: 0);

            // Create a list of training data points.
            var dataPoints = GenerateRandomDataPoints(1000);

            // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
            var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);

            // ML.NET doesn't cache data set by default. Therefore, if one reads a data set from a file and accesses it many times,
            // it can be slow due to expensive featurization and disk operations. When the considered data can fit into memory,
            // a solution is to cache the data in memory. Caching is especially helpful when working with iterative algorithms
            // which needs many data passes.
            trainingData = mlContext.Data.Cache(trainingData);

            // Define trainer options.
            var options = new SdcaNonCalibratedBinaryTrainer.Options()
            {
                // Specify loss function.
                LossFunction = new HingeLoss(),
                // Make the convergence tolerance tighter.
                ConvergenceTolerance = 0.05f,
                // Increase the maximum number of passes over training data.
                MaximumNumberOfIterations = 30,
                // Give the instances of the positive class slightly more weight.
                PositiveInstanceWeight = 1.2f,
            };

            // Define the trainer.
            var pipeline = mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(options);

            // Train the model.
            var model = pipeline.Fit(trainingData);

            // Create testing data. Use different random seed to make it different from training data.
            var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));

            // Run the model on test data set.
            var transformedTestData = model.Transform(testData);

            // Convert IDataView object to a list.
            var predictions = mlContext.Data.CreateEnumerable <Prediction>(transformedTestData, reuseRowObject: false).ToList();

            // Print 5 predictions.
            foreach (var p in predictions.Take(5))
            {
                Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
            }

            // Expected output:
            //   Label: True, Prediction: False
            //   Label: False, Prediction: False
            //   Label: True, Prediction: True
            //   Label: True, Prediction: True
            //   Label: False, Prediction: True

            // Evaluate the overall metrics.
            var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData);

            PrintMetrics(metrics);

            // Expected output:
            //   Accuracy: 0.61
            //   AUC: 0.67
            //   F1 Score: 0.65
            //   Negative Precision: 0.69
            //   Negative Recall: 0.45
            //   Positive Precision: 0.56
            //   Positive Recall: 0.77
            //
            //   TEST POSITIVE RATIO:    0.4760 (238.0/(238.0+262.0))
            //   Confusion table
            //             ||======================
            //   PREDICTED || positive | negative | Recall
            //   TRUTH     ||======================
            //    positive ||      178 |       60 | 0.7479
            //    negative ||      134 |      128 | 0.4885
            //             ||======================
            //   Precision ||   0.5705 |   0.6809 |
        }