コード例 #1
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            SdcaBinaryTrainer.Options options,
            Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumnName   = labelName;
                options.FeatureColumnName = featuresName;

                var trainer = new SdcaBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
コード例 #2
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights,
            SdcaBinaryTrainer.Options options,
            Action <LinearBinaryModelParameters, ParameterMixingCalibratedPredictor> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                options.LabelColumn   = labelName;
                options.FeatureColumn = featuresName;

                var trainer = new SdcaBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        // Under the default log-loss we assume a calibrated predictor.
                        var model = trans.Model;
                        var cali = (ParameterMixingCalibratedPredictor)model;
                        var pred = (LinearBinaryModelParameters)cali.SubPredictor;
                        onFit(pred, cali);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
コード例 #3
0
        public void TestEstimatorSymSgdInitPredictor()
        {
            (var pipe, var dataView) = GetBinaryClassificationPipeline();
            var transformedData = pipe.Fit(dataView).Transform(dataView);

            var initPredictor = new SdcaBinaryTrainer(Env, "Label", "Features").Fit(transformedData);
            var data          = initPredictor.Transform(transformedData);

            var withInitPredictor = new SymSgdClassificationTrainer(Env, "Label", "Features").Train(transformedData, initialPredictor: initPredictor.Model);
            var outInitData       = withInitPredictor.Transform(transformedData);

            var notInitPredictor = new SymSgdClassificationTrainer(Env, "Label", "Features").Train(transformedData);
            var outNoInitData    = notInitPredictor.Transform(transformedData);

            int numExamples = 10;
            var col1        = data.GetColumn <float>(Env, "Score").Take(numExamples).ToArray();
            var col2        = outInitData.GetColumn <float>(Env, "Score").Take(numExamples).ToArray();
            var col3        = outNoInitData.GetColumn <float>(Env, "Score").Take(numExamples).ToArray();

            bool col12Diff = default;
            bool col23Diff = default;
            bool col13Diff = default;

            for (int i = 0; i < numExamples; i++)
            {
                col12Diff = col12Diff || (col1[i] != col2[i]);
                col23Diff = col23Diff || (col2[i] != col3[i]);
                col13Diff = col13Diff || (col1[i] != col3[i]);
            }
            Contracts.Assert(col12Diff && col23Diff && col13Diff);
            Done();
        }
コード例 #4
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="catalog">The binary classification catalog trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights = null,
            float?l2Const     = null,
            float?l1Threshold = null,
            int?maxIterations = null,
            Action <CalibratedModelParametersBase <LinearBinaryModelParameters, PlattCalibrator> > onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new SdcaBinaryTrainer(env, labelName, featuresName, weightsName, l2Const, l1Threshold, maxIterations);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        onFit(trans.Model);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
コード例 #5
0
        public void SetupBreastCancerPipeline()
        {
            _breastCancerExample = new BreastCancerData()
            {
                Features = new[] { 5f, 1f, 1f, 1f, 2f, 1f, 3f, 1f, 1f }
            };

            string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt");

            var env    = new MLContext(seed: 1, conc: 1);
            var reader = new TextLoader(env, columns: new[]
            {
                new TextLoader.Column("Label", DataKind.BL, 0),
                new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(1, 9) })
            },
                                        hasHeader: false
                                        );

            IDataView data = reader.Read(_breastCancerDataPath);

            var pipeline = new SdcaBinaryTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; });

            var model = pipeline.Fit(data);

            _breastCancerModel = model.CreatePredictionEngine <BreastCancerData, BreastCancerPrediction>(env);
        }
コード例 #6
0
        public void SetupBreastCancerPipeline()
        {
            _breastCancerExample = new BreastCancerData()
            {
                Features = new[] { 5f, 1f, 1f, 1f, 2f, 1f, 3f, 1f, 1f }
            };

            string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt");

            using (var env = new ConsoleEnvironment(seed: 1, conc: 1, verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "\t",
                    HasHeader = false,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.BL, 0),
                        new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(1, 9) })
                    }
                });

                IDataView data = reader.Read(_breastCancerDataPath);

                var pipeline = new SdcaBinaryTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; });

                var model = pipeline.Fit(data);

                _breastCancerModel = model.MakePredictionFunction <BreastCancerData, BreastCancerPrediction>(env);
            }
        }
コード例 #7
0
        private IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Arguments()
            {
                Separator = "tab",
                HasHeader = true,
                Column    = new[] {
                    new TextLoader.Column("Label", DataKind.BL, 0),
                    new TextLoader.Column("SentimentText", DataKind.Text, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Arguments()
            {
                Column = new TextFeaturizingEstimator.Column
                {
                    Name   = "Features",
                    Source = new[] { "SentimentText" }
                },
                KeepDiacritics               = false,
                KeepPunctuations             = false,
                TextCase                     = TextNormalizingEstimator.CaseNormalizationMode.Lower,
                OutputTokens                 = true,
                UsePredefinedStopWordRemover = true,
                VectorNormalizer             = normalize ? TextFeaturizingEstimator.TextNormKind.L2 : TextFeaturizingEstimator.TextNormKind.None,
                CharFeatureExtractor         = new NgramExtractorTransform.NgramExtractorArguments()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments()
                {
                    NgramLength = 2, AllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            using (var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, args).Read(new MultiFileSource(trainFilename));
                var trans  = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaBinaryTrainer(env, new SdcaBinaryTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached    = new CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }
コード例 #8
0
        public void OVAUncalibrated()
        {
            var(pipeline, data) = GetMultiClassPipeline();
            var sdcaTrainer = new SdcaBinaryTrainer(Env, "Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; s.Calibrator = null; });

            pipeline.Append(new Ova(Env, sdcaTrainer, useProbabilities: false))
            .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
コード例 #9
0
        private IDataScorerTransform _TrainSentiment()
        {
            bool normalize = true;

            var args = new TextLoader.Options()
            {
                Separators = new[] { '\t' },
                HasHeader  = true,
                Columns    = new[] {
                    new TextLoader.Column("Label", DataKind.Boolean, 0),
                    new TextLoader.Column("SentimentText", DataKind.String, 1)
                }
            };

            var args2 = new TextFeaturizingEstimator.Options()
            {
                KeepDiacritics               = false,
                KeepPunctuations             = false,
                CaseMode                     = TextNormalizingEstimator.CaseMode.Lower,
                OutputTokens                 = true,
                UsePredefinedStopWordRemover = true,
                Norm = normalize ? TextFeaturizingEstimator.NormFunction.L2 : TextFeaturizingEstimator.NormFunction.None,
                CharFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 3, UseAllLengths = false
                },
                WordFeatureExtractor = new WordBagEstimator.Options()
                {
                    NgramLength = 2, UseAllLengths = true
                },
            };

            var trainFilename = FileHelper.GetTestFile("wikipedia-detox-250-line-data.tsv");

            /*using (*/
            var env = EnvHelper.NewTestEnvironment(seed: 1, conc: 1);
            {
                // Pipeline
                var loader = new TextLoader(env, args).Load(new MultiFileSource(trainFilename));
                var trans  = TextFeaturizingEstimator.Create(env, args2, loader);

                // Train
                var trainer = new SdcaBinaryTrainer(env, new SdcaBinaryTrainer.Options
                {
                });

                var cached    = new CacheDataView(env, trans, prefetch: null);
                var predictor = trainer.Fit(cached);

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                return(ScoreUtils.GetScorer(predictor.Model, scoreRoles, env, trainRoles.Schema));
            }
        }
コード例 #10
0
        public void Pkpd()
        {
            var(pipeline, data) = GetMultiClassPipeline();

            var sdcaTrainer = new SdcaBinaryTrainer(Env, "Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });

            pipeline.Append(new Pkpd(Env, sdcaTrainer))
            .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
コード例 #11
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and a custom loss.
        /// Note that because we cannot be sure that all loss functions will produce naturally calibrated outputs, setting
        /// a custom loss function will not produce a calibrated probability column.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="loss">The custom loss.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="advancedSettings">A delegate to set more settings.
        /// The settings here will override the ones provided in the direct method signature,
        /// if both are present and have different values.
        /// The columns names, however need to be provided directly, not through the <paramref name="advancedSettings"/>.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), and the predicted label.</returns>
        /// <seealso cref="Sdca(BinaryClassificationContext.BinaryClassificationTrainers, Scalar{bool}, Vector{float}, Scalar{float}, float?, float?, int?, Action{SdcaBinaryTrainer.Arguments}, Action{LinearBinaryModelParameters, ParameterMixingCalibratedPredictor})"/>
        public static (Scalar <float> score, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features,
            ISupportSdcaClassificationLoss loss,
            Scalar <float> weights = null,
            float?l2Const          = null,
            float?l1Threshold      = null,
            int?maxIterations      = null,
            Action <SdcaBinaryTrainer.Arguments> advancedSettings = null,
            Action <LinearBinaryModelParameters> onFit            = null
            )
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValue(loss, nameof(loss));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(advancedSettings);
            Contracts.CheckValueOrNull(onFit);

            bool hasProbs = loss is LogLoss;

            var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new SdcaBinaryTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        var model = trans.Model;
                        if (model is ParameterMixingCalibratedPredictor cali)
                        {
                            onFit((LinearBinaryModelParameters)cali.SubPredictor);
                        }
                        else
                        {
                            onFit((LinearBinaryModelParameters)model);
                        }
                    }));
                }
                return(trainer);
            }, label, features, weights, hasProbs);

            return(rec.Output);
        }
コード例 #12
0
        public void MetacomponentsFeaturesRenamed()
        {
            var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
                       .Read(GetDataPath(TestDatasets.irisData.trainFilename));

            var sdcaTrainer = new SdcaBinaryTrainer(Env, "Label", "Vars", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });
            var pipeline    = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                              .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest)
                              .Append(new Ova(Env, sdcaTrainer))
                              .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            var model = pipeline.Fit(data);

            TestEstimatorCore(pipeline, data);
            Done();
        }
コード例 #13
0
        public void SdcaWorkout()
        {
            var dataPath = GetDataPath("breast-cancer.txt");

            var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10)))
                       .Read(dataPath);
            IEstimator <ITransformer> est = new SdcaBinaryTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f);

            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaRegressionTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f);
            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaMultiClassTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f);
            TestEstimatorCore(est, data.AsDynamic);

            Done();
        }
コード例 #14
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and a custom loss.
        /// Note that because we cannot be sure that all loss functions will produce naturally calibrated outputs, setting
        /// a custom loss function will not produce a calibrated probability column.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="loss">The custom loss.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="options">Advanced arguments to the algorithm.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), and the predicted label.</returns>
        public static (Scalar <float> score, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features,
            Scalar <float> weights,
            ISupportSdcaClassificationLoss loss,
            SdcaBinaryTrainer.Options options,
            Action <LinearBinaryModelParameters> onFit = null
            )
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckValueOrNull(options);
            Contracts.CheckValueOrNull(onFit);

            bool hasProbs = loss is LogLoss;

            var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
                (env, labelName, featuresName, weightsName) =>
            {
                options.FeatureColumn = featuresName;
                options.LabelColumn   = labelName;

                var trainer = new SdcaBinaryTrainer(env, options);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        var model = trans.Model;
                        if (model is ParameterMixingCalibratedPredictor cali)
                        {
                            onFit((LinearBinaryModelParameters)cali.SubPredictor);
                        }
                        else
                        {
                            onFit((LinearBinaryModelParameters)model);
                        }
                    }));
                }
                return(trainer);
            }, label, features, weights, hasProbs);

            return(rec.Output);
        }
コード例 #15
0
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="advancedSettings">A delegate to set more settings.
        /// The settings here will override the ones provided in the direct method signature,
        /// if both are present and have different values.
        /// The columns names, however need to be provided directly, not through the <paramref name="advancedSettings"/>.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
        /// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[SDCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs)]
        /// ]]></format>
        /// </example>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights = null,
            float?l2Const     = null,
            float?l1Threshold = null,
            int?maxIterations = null,
            Action <SdcaBinaryTrainer.Arguments> advancedSettings = null,
            Action <LinearBinaryModelParameters, ParameterMixingCalibratedPredictor> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(advancedSettings);
            Contracts.CheckValueOrNull(onFit);

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new SdcaBinaryTrainer(env, labelName, featuresName, weightsName, loss: new LogLoss(), l2Const, l1Threshold, maxIterations, advancedSettings);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        // Under the default log-loss we assume a calibrated predictor.
                        var model = trans.Model;
                        var cali = (ParameterMixingCalibratedPredictor)model;
                        var pred = (LinearBinaryModelParameters)cali.SubPredictor;
                        onFit(pred, cali);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }