Beispiel #1
0
        void New_FileBasedSavingOfData()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                var trainData = pipeline.Fit(new MultiFileSource(dataPath)).Read(new MultiFileSource(dataPath));

                using (var file = env.CreateOutputFile("i.idv"))
                    trainData.SaveAsBinary(env, file.CreateWriteStream());

                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label");
                var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));

                // Train.
                var model = trainer.Train(new RoleMappedData(loadedTrainData, DefaultColumnNames.Label, DefaultColumnNames.Features));
                DeleteOutputPath("i.idv");
            }
        }
        public void SetupBreastCancerPipeline()
        {
            _breastCancerExample = new BreastCancerData()
            {
                Features = new[] { 5f, 1f, 1f, 1f, 2f, 1f, 3f, 1f, 1f }
            };

            string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt");

            using (var env = new ConsoleEnvironment(seed: 1, conc: 1, verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "\t",
                    HasHeader = false,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.BL, 0),
                        new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(1, 9) })
                    }
                });

                IDataView data = reader.Read(_breastCancerDataPath);

                var pipeline = new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; });

                var model = pipeline.Fit(data);

                _breastCancerModel = model.MakePredictionFunction <BreastCancerData, BreastCancerPrediction>(env);
            }
        }
        void FileBasedSavingOfData()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);
                var saver = new BinarySaver(env, new BinarySaver.Arguments());
                using (var ch = env.Start("SaveData"))
                    using (var file = env.CreateOutputFile("i.idv"))
                    {
                        DataSaverUtils.SaveDataView(ch, saver, trans, file);
                    }

                var binData    = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));
                var trainRoles = new RoleMappedData(binData, label: "Label", feature: "Features");
                var trainer    = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });
                var predictor = trainer.Train(new Runtime.TrainContext(trainRoles));

                DeleteOutputPath("i.idv");
            }
        }
Beispiel #4
0
        public void AutoNormalizationAndCaching()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(false), loader);

                // Train.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads           = 1,
                    ConvergenceTolerance = 1f
                });

                // Auto-caching.
                IDataView trainData  = trainer.Info.WantCaching ? (IDataView) new CacheDataView(env, trans, prefetch: null) : trans;
                var       trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features");

                // Auto-normalization.
                NormalizeTransform.CreateIfNeeded(env, ref trainRoles, trainer);
                var predictor = trainer.Train(new Runtime.TrainContext(trainRoles));
            }
        }
Beispiel #5
0
        public void TrainWithInitialPredictor()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans     = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);
                var trainData = trans;

                var cachedTrain = new CacheDataView(env, trainData, prefetch: null);
                // Train the first predictor.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });
                var trainRoles = new RoleMappedData(cachedTrain, label: "Label", feature: "Features");
                var predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));

                // Train the second predictor on the same data.
                var secondTrainer  = new AveragedPerceptronTrainer(env, new AveragedPerceptronTrainer.Arguments());
                var finalPredictor = secondTrainer.Train(new TrainContext(trainRoles, initialPredictor: predictor));
            }
        }
Beispiel #6
0
        public void New_ReconfigurablePrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var dataReader = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                                 .Fit(new MultiFileSource(dataPath));

                var data     = dataReader.Read(new MultiFileSource(dataPath));
                var testData = dataReader.Read(new MultiFileSource(testDataPath));

                // Pipeline.
                var pipeline = new MyTextTransform(env, MakeSentimentTextTransformArgs())
                               .Fit(data);

                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    NumThreads = 1
                }, "Features", "Label");
                var trainData = pipeline.Transform(data);
                var model     = trainer.Fit(trainData);

                var scoredTest = model.Transform(pipeline.Transform(testData));
                var metrics    = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments()).Evaluate(scoredTest, "Label", "Probability");

                var newModel      = new BinaryPredictionTransformer <IPredictorProducing <float> >(env, model.Model, trainData.Schema, model.FeatureColumn, threshold: 0.01f, thresholdColumn: DefaultColumnNames.Probability);
                var newScoredTest = newModel.Transform(pipeline.Transform(testData));
                var newMetrics    = new MyBinaryClassifierEvaluator(env, new BinaryClassifierEvaluator.Arguments {
                    Threshold = 0.01f, UseRawScoreThreshold = false
                }).Evaluate(newScoredTest, "Label", "Probability");
            }
        }
Beispiel #7
0
        public void SdcaWorkout()
        {
            var dataPath = GetDataPath("breast-cancer.txt");

            var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10)))
                       .Read(new MultiFileSource(dataPath));

            IEstimator <ITransformer> est = new LinearClassificationTrainer(Env, new LinearClassificationTrainer.Arguments {
                ConvergenceTolerance = 1e-2f
            }, "Features", "Label");

            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaRegressionTrainer(Env, new SdcaRegressionTrainer.Arguments {
                ConvergenceTolerance = 1e-2f
            }, "Features", "Label");
            TestEstimatorCore(est, data.AsDynamic);

            est = new SdcaMultiClassTrainer(Env, new SdcaMultiClassTrainer.Arguments {
                ConvergenceTolerance = 1e-2f
            }, "Features", "Label");
            TestEstimatorCore(est, data.AsDynamic);

            Done();
        }
Beispiel #8
0
        public void New_TrainWithInitialPredictor()
        {
            var dataPath = GetDataPath(SentimentDataPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                var data = new TextLoader(env, MakeSentimentTextLoaderArgs()).Read(new MultiFileSource(dataPath));

                // Pipeline.
                var pipeline = new TextTransform(env, "SentimentText", "Features");

                // Train the pipeline, prepare train set.
                var trainData = pipeline.FitAndTransform(data);

                // Train the first predictor.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                }, "Features", "Label");
                var firstModel = trainer.Fit(trainData);

                // Train the second predictor on the same data.
                var secondTrainer = new AveragedPerceptronTrainer(env, new AveragedPerceptronTrainer.Arguments());

                var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features");
                var finalModel = secondTrainer.Train(new TrainContext(trainRoles, initialPredictor: firstModel.Model));
            }
        }
        public void TestEstimatorSymSgdInitPredictor()
        {
            (var pipe, var dataView) = GetBinaryClassificationPipeline();
            var transformedData = pipe.Fit(dataView).Transform(dataView);

            var initPredictor = new LinearClassificationTrainer(Env, "Features", "Label").Fit(transformedData);
            var data          = initPredictor.Transform(transformedData);

            var withInitPredictor = new SymSgdClassificationTrainer(Env, "Features", "Label").Train(transformedData, initialPredictor: initPredictor.Model);
            var outInitData       = withInitPredictor.Transform(transformedData);

            var notInitPredictor = new SymSgdClassificationTrainer(Env, "Features", "Label").Train(transformedData);
            var outNoInitData    = notInitPredictor.Transform(transformedData);

            int numExamples = 10;
            var col1        = data.GetColumn <float>(Env, "Score").Take(numExamples).ToArray();
            var col2        = outInitData.GetColumn <float>(Env, "Score").Take(numExamples).ToArray();
            var col3        = outNoInitData.GetColumn <float>(Env, "Score").Take(numExamples).ToArray();

            bool col12Diff = default;
            bool col23Diff = default;
            bool col13Diff = default;

            for (int i = 0; i < numExamples; i++)
            {
                col12Diff = col12Diff || (col1[i] != col2[i]);
                col23Diff = col23Diff || (col2[i] != col3[i]);
                col13Diff = col13Diff || (col1[i] != col3[i]);
            }
            Contracts.Assert(col12Diff && col23Diff && col13Diff);
            Done();
        }
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and a custom loss.
        /// Note that because we cannot be sure that all loss functions will produce naturally calibrated outputs, setting
        /// a custom loss function will not produce a calibrated probability column.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="loss">The custom loss.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), and the predicted label.</returns>
        /// <seealso cref="Sdca(BinaryClassificationContext.BinaryClassificationTrainers, Scalar{bool}, Vector{float}, Scalar{float}, float?, float?, int?, Action{LinearBinaryPredictor, ParameterMixingCalibratedPredictor})"/>
        public static (Scalar <float> score, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features,
            ISupportSdcaClassificationLoss loss,
            Scalar <float> weights = null,
            float?l2Const          = null,
            float?l1Threshold      = null,
            int?maxIterations      = null,
            Action <LinearBinaryPredictor> onFit = null
            )
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValue(loss, nameof(loss));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(onFit);

            bool hasProbs = loss is LogLoss;

            var args = new LinearClassificationTrainer.Arguments()
            {
                L2Const       = l2Const,
                L1Threshold   = l1Threshold,
                MaxIterations = maxIterations,
                LossFunction  = new TrivialSdcaClassificationLossFactory(loss)
            };

            var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new LinearClassificationTrainer(env, args, featuresName, labelName, weightsName);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        var model = trans.Model;
                        if (model is ParameterMixingCalibratedPredictor cali)
                        {
                            onFit((LinearBinaryPredictor)cali.SubPredictor);
                        }
                        else
                        {
                            onFit((LinearBinaryPredictor)model);
                        }
                    }));
                }
                return(trainer);
            }, label, features, weights, hasProbs);

            return(rec.Output);
        }
        void ReconfigurablePrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var        cached     = new CacheDataView(env, trans, prefetch: null);
                var        trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                IPredictor predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));
                using (var ch = env.Start("Calibrator training"))
                {
                    predictor = CalibratorUtils.TrainCalibrator(env, ch, new PlattCalibratorTrainer(env), int.MaxValue, predictor, trainRoles);
                }

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema);

                var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true);

                var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()
                {
                });
                var metricsDict = evaluator.Evaluate(dataEval);

                var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0];

                var bindable  = ScoreUtils.GetSchemaBindableMapper(env, predictor, null);
                var mapper    = bindable.Bind(env, trainRoles.Schema);
                var newScorer = new BinaryClassifierScorer(env, new BinaryClassifierScorer.Arguments {
                    Threshold = 0.01f, ThresholdColumn = DefaultColumnNames.Probability
                },
                                                           scoreRoles.Data, mapper, trainRoles.Schema);

                dataEval = new RoleMappedData(newScorer, label: "Label", feature: "Features", opt: true);
                var newEvaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()
                {
                    Threshold = 0.01f, UseRawScoreThreshold = false
                });
                metricsDict = newEvaluator.Evaluate(dataEval);
                var newMetrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0];
            }
        }
Beispiel #12
0
        public void OVAUncalibrated()
        {
            var(pipeline, data) = GetMultiClassPipeline();
            var sdcaTrainer = new LinearClassificationTrainer(Env, "Features", "Label", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; s.Calibrator = null; });

            pipeline.Append(new Ova(Env, sdcaTrainer, useProbabilities: false))
            .Append(new KeyToValueEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
Beispiel #13
0
        public void Pkpd()
        {
            var(pipeline, data) = GetMultiClassPipeline();

            var sdcaTrainer = new LinearClassificationTrainer(Env, "Features", "Label", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });

            pipeline.Append(new Pkpd(Env, sdcaTrainer))
            .Append(new KeyToValueEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
Beispiel #14
0
        public void Pkpd()
        {
            var(pipeline, data) = GetMultiClassPipeline();

            var sdcaTrainer = new LinearClassificationTrainer(Env, new LinearClassificationTrainer.Arguments {
                MaxIterations = 100, Shuffle = true, NumThreads = 1
            }, "Features", "Label");

            pipeline.Append(new Pkpd(Env, sdcaTrainer))
            .Append(new KeyToValueEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
Beispiel #15
0
        public void OVAUncalibrated()
        {
            var(pipeline, data) = GetMultiClassPipeline();

            var sdcaTrainer = new LinearClassificationTrainer(Env, new LinearClassificationTrainer.Arguments {
                MaxIterations = 100, Shuffle = true, NumThreads = 1, Calibrator = null
            }, "Features", "Label");

            pipeline.Append(new Ova(Env, sdcaTrainer, useProbabilities: false))
            .Append(new KeyToValueEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
        public void OVAWithExplicitCalibrator()
        {
            var(pipeline, data) = GetMultiClassPipeline();
            var calibrator = new PavCalibratorTrainer(Env);

            var sdcaTrainer = new LinearClassificationTrainer(Env, new LinearClassificationTrainer.Arguments {
                MaxIterations = 100, Shuffle = true, NumThreads = 1
            }, "Features", "Label");

            pipeline.Append(new Ova(Env, sdcaTrainer, "Label", calibrator: calibrator, maxCalibrationExamples: 990000))
            .Append(new KeyToValueEstimator(Env, "PredictedLabel"));

            TestEstimatorCore(pipeline, data);
            Done();
        }
Beispiel #17
0
        public void New_Metacomponents()
        {
            using (var env = new LocalEnvironment())
            {
                var data = new TextLoader(env, MakeIrisTextLoaderArgs())
                           .Read(new MultiFileSource(GetDataPath(TestDatasets.irisData.trainFilename)));

                var sdcaTrainer = new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });
                var pipeline    = new ConcatEstimator(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                                  .Append(new TermEstimator(env, "Label"), TransformerScope.TrainTest)
                                  .Append(new Ova(env, sdcaTrainer))
                                  .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                var model = pipeline.Fit(data);
            }
        }
        public void TrainSaveModelAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached     = new CacheDataView(env, trans, prefetch: null);
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));

                PredictionEngine <SentimentData, SentimentPrediction> model;
                using (var file = env.CreateTempFile())
                {
                    // Save model.
                    var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                    using (var ch = env.Start("saving"))
                        TrainUtils.SaveModel(env, ch, file, predictor, scoreRoles);

                    // Load model.
                    using (var fs = file.OpenReadStream())
                        model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(fs);
                }

                // Take a couple examples out of the test data and run predictions on top.
                var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath)));
                var testData   = testLoader.AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = model.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
        public void New_Metacomponents()
        {
            var dataPath = GetDataPath(IrisDataPath);
            using (var env = new TlcEnvironment())
            {
                var data = new TextLoader(env, MakeIrisTextLoaderArgs())
                    .Read(new MultiFileSource(dataPath));

                var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { MaxIterations = 100, Shuffle = true, NumThreads = 1 }, "Features", "Label");
                var pipeline = new ConcatEstimator(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                    .Append(new TermEstimator(env, "Label"), TransformerScope.TrainTest)
                    .Append(new Ova(env, sdcaTrainer))
                    .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                var model = pipeline.Fit(data);
            }
        }
Beispiel #20
0
        public void OVAUncalibrated()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var data = new TextLoader(env, GetIrisLoaderArgs()).Read(new MultiFileSource(dataPath));

                var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1, Calibrator = null
                }, "Features", "Label");
                var pipeline = new TermEstimator(env, "Label")
                               .Append(new Ova(env, sdcaTrainer, useProbabilities: false))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                TestEstimatorCore(pipeline, data);
            }
        }
        /// <summary>
        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
        /// </summary>
        /// <param name="ctx">The binary classification context trainer object.</param>
        /// <param name="label">The label, or dependent variable.</param>
        /// <param name="features">The features, or independent variables.</param>
        /// <param name="weights">The optional example weights.</param>
        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
        /// <param name="onFit">A delegate that is called every time the
        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
        public static (Scalar <float> score, Scalar <float> probability, Scalar <bool> predictedLabel) Sdca(
            this BinaryClassificationContext.BinaryClassificationTrainers ctx,
            Scalar <bool> label, Vector <float> features, Scalar <float> weights = null,
            float?l2Const     = null,
            float?l1Threshold = null,
            int?maxIterations = null,
            Action <LinearBinaryPredictor, ParameterMixingCalibratedPredictor> onFit = null)
        {
            Contracts.CheckValue(label, nameof(label));
            Contracts.CheckValue(features, nameof(features));
            Contracts.CheckValueOrNull(weights);
            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative, if specified.");
            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative, if specified.");
            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
            Contracts.CheckValueOrNull(onFit);

            var args = new LinearClassificationTrainer.Arguments()
            {
                L2Const       = l2Const,
                L1Threshold   = l1Threshold,
                MaxIterations = maxIterations,
            };

            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
                (env, labelName, featuresName, weightsName) =>
            {
                var trainer = new LinearClassificationTrainer(env, args, featuresName, labelName, weightsName);
                if (onFit != null)
                {
                    return(trainer.WithOnFitDelegate(trans =>
                    {
                        // Under the default log-loss we assume a calibrated predictor.
                        var model = trans.Model;
                        var cali = (ParameterMixingCalibratedPredictor)model;
                        var pred = (LinearBinaryPredictor)cali.SubPredictor;
                        onFit(pred, cali);
                    }));
                }
                return(trainer);
            }, label, features, weights);

            return(rec.Output);
        }
Beispiel #22
0
        public void New_TrainWithValidationSet()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var reader   = new TextLoader(env, MakeSentimentTextLoaderArgs());
                var pipeline = new TextTransform(env, "SentimentText", "Features");

                // Train the pipeline, prepare train and validation set.
                var data       = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));
                var preprocess = pipeline.Fit(data);
                var trainData  = preprocess.Transform(data);
                var validData  = preprocess.Transform(reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename))));

                // Train model with validation set.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(), "Features", "Label");
                var model   = trainer.Train(trainData, validData);
            }
        }
Beispiel #23
0
        void New_FileBasedSavingOfData()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                var trainData = new TextLoader(env, MakeSentimentTextLoaderArgs())
                                .Append(new TextTransform(env, "SentimentText", "Features"))
                                .FitAndRead(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));

                using (var file = env.CreateOutputFile("i.idv"))
                    trainData.SaveAsBinary(env, file.CreateWriteStream());

                var trainer         = new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => s.NumThreads = 1);
                var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));

                // Train.
                var model = trainer.Train(new RoleMappedData(loadedTrainData, DefaultColumnNames.Label, DefaultColumnNames.Features));
                DeleteOutputPath("i.idv");
            }
        }
Beispiel #24
0
        public void OVAWithExplicitCalibrator()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var calibrator = new PavCalibratorTrainer(env);

                var data = new TextLoader(env, GetIrisLoaderArgs()).Read(new MultiFileSource(dataPath));

                var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1
                }, "Features", "Label");
                var pipeline = new TermEstimator(env, "Label")
                               .Append(new Ova(env, sdcaTrainer, "Label", calibrator: calibrator, maxCalibrationExamples: 990000))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                TestEstimatorCore(pipeline, data);
            }
        }
Beispiel #25
0
        public void Evaluation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached     = new CacheDataView(env, trans, prefetch: null);
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));
                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema);

                // Create prediction engine and test predictions.
                var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer);

                // Take a couple examples out of the test data and run predictions on top.
                var testLoader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath)));
                var testData   = testLoader.AsEnumerable <SentimentData>(env, false);

                var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true);

                var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()
                {
                });
                var metricsDict = evaluator.Evaluate(dataEval);

                var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0];
            }
        }
Beispiel #26
0
        void MultithreadedPrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached     = new CacheDataView(env, trans, prefetch: null);
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema);

                // Create prediction engine and test predictions.
                var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer);

                // Take a couple examples out of the test data and run predictions on top.
                var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath)));
                var testData   = testLoader.AsEnumerable <SentimentData>(env, false);

                Parallel.ForEach(testData, (input) =>
                {
                    lock (model)
                    {
                        var prediction = model.Predict(input);
                    }
                });
            }
        }
        public void New_TrainWithValidationSet()
        {
            var dataPath           = GetDataPath(SentimentDataPath);
            var validationDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
                               .Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));

                // Train the pipeline, prepare train and validation set.
                var reader    = pipeline.Fit(new MultiFileSource(dataPath));
                var trainData = reader.Read(new MultiFileSource(dataPath));
                var validData = reader.Read(new MultiFileSource(validationDataPath));

                // Train model with validation set.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments(), "Features", "Label");
                var model   = trainer.Train(trainData, validData);
            }
        }
        public void SimpleTrainAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached     = new CacheDataView(env, trans, prefetch: null);
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema);

                // Create prediction engine and test predictions.
                var model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(scorer);

                // Take a couple examples out of the test data and run predictions on top.
                var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath)));
                var testData   = testLoader.AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = model.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
Beispiel #29
0
        public void IntrospectiveTraining()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));

                var words = WordBagTransform.Create(env, new WordBagTransform.Arguments()
                {
                    NgramLength = 1,
                    Column      = new[] { new WordBagTransform.Column()
                                          {
                                              Name = "Tokenize", Source = new[] { "SentimentText" }
                                          } }
                }, loader);

                var lda = new LdaTransform(env, new LdaTransform.Arguments()
                {
                    NumTopic      = 10,
                    NumIterations = 3,
                    NumThreads    = 1,
                    Column        = new[] { new LdaTransform.Column {
                                                Source = "Tokenize", Name = "Features"
                                            } }
                }, words);
                var trainData = lda;

                var cachedTrain = new CacheDataView(env, trainData, prefetch: null);
                // Train the first predictor.
                var linearTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });
                var             trainRoles      = new RoleMappedData(cachedTrain, label: "Label", feature: "Features");
                var             linearPredictor = linearTrainer.Train(new Runtime.TrainContext(trainRoles));
                VBuffer <float> weights         = default;
                linearPredictor.GetFeatureWeights(ref weights);

                var topicSummary = lda.GetTopicSummary();
                var treeTrainer  = new FastTreeBinaryClassificationTrainer(env, DefaultColumnNames.Label, DefaultColumnNames.Features, numTrees: 2);
                var ftPredictor  = treeTrainer.Train(new Runtime.TrainContext(trainRoles));
                FastTreeBinaryPredictor treePredictor;
                if (ftPredictor is CalibratedPredictorBase calibrator)
                {
                    treePredictor = (FastTreeBinaryPredictor)calibrator.SubPredictor;
                }
                else
                {
                    treePredictor = (FastTreeBinaryPredictor)ftPredictor;
                }
                var featureNameCollection = FeatureNameCollection.Create(trainRoles.Schema);
                foreach (var tree in treePredictor.GetTrees())
                {
                    var lteChild = tree.LteChild;
                    var gteChild = tree.GtChild;
                    // Get nodes.
                    for (var i = 0; i < tree.NumNodes; i++)
                    {
                        var node              = tree.GetNode(i, false, featureNameCollection);
                        var gainValue         = GetValue <double>(node.KeyValues, "GainValue");
                        var splitGain         = GetValue <double>(node.KeyValues, "SplitGain");
                        var featureName       = GetValue <string>(node.KeyValues, "SplitName");
                        var previousLeafValue = GetValue <double>(node.KeyValues, "PreviousLeafValue");
                        var threshold         = GetValue <string>(node.KeyValues, "Threshold").Split(new[] { ' ' }, 2)[1];
                        var nodeIndex         = i;
                    }
                    // Get leaves.
                    for (var i = 0; i < tree.NumLeaves; i++)
                    {
                        var node      = tree.GetNode(i, true, featureNameCollection);
                        var leafValue = GetValue <double>(node.KeyValues, "LeafValue");
                        var extras    = GetValue <string>(node.KeyValues, "Extras");
                        var nodeIndex = ~i;
                    }
                }
            }
        }
Beispiel #30
0
        void CrossValidation()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            int numFolds = 5;

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline.
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var       text  = TextTransform.Create(env, MakeSentimentTextTransformArgs(false), loader);
                IDataView trans = new GenerateNumberTransform(env, text, "StratificationColumn");
                // Train.
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads           = 1,
                    ConvergenceTolerance = 1f
                });


                var metrics = new List <BinaryClassificationMetrics>();
                for (int fold = 0; fold < numFolds; fold++)
                {
                    IDataView trainPipe = new RangeFilter(env, new RangeFilter.Arguments()
                    {
                        Column     = "StratificationColumn",
                        Min        = (Double)fold / numFolds,
                        Max        = (Double)(fold + 1) / numFolds,
                        Complement = true
                    }, trans);
                    trainPipe = new OpaqueDataView(trainPipe);
                    var trainData = new RoleMappedData(trainPipe, label: "Label", feature: "Features");
                    // Auto-normalization.
                    NormalizeTransform.CreateIfNeeded(env, ref trainData, trainer);
                    var preCachedData = trainData;
                    // Auto-caching.
                    if (trainer.Info.WantCaching)
                    {
                        var prefetch  = trainData.Schema.GetColumnRoles().Select(kc => kc.Value.Index).ToArray();
                        var cacheView = new CacheDataView(env, trainData.Data, prefetch);
                        // Because the prefetching worked, we know that these are valid columns.
                        trainData = new RoleMappedData(cacheView, trainData.Schema.GetColumnRoleNames());
                    }

                    var       predictor = trainer.Train(new Runtime.TrainContext(trainData));
                    IDataView testPipe  = new RangeFilter(env, new RangeFilter.Arguments()
                    {
                        Column     = "StratificationColumn",
                        Min        = (Double)fold / numFolds,
                        Max        = (Double)(fold + 1) / numFolds,
                        Complement = false
                    }, trans);
                    testPipe = new OpaqueDataView(testPipe);
                    var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, preCachedData.Data, testPipe, trainPipe);

                    var testRoles = new RoleMappedData(pipe, trainData.Schema.GetColumnRoleNames());

                    IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, testRoles, env, testRoles.Schema);

                    BinaryClassifierMamlEvaluator eval = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()
                    {
                    });
                    var dataEval    = new RoleMappedData(scorer, testRoles.Schema.GetColumnRoleNames(), opt: true);
                    var dict        = eval.Evaluate(dataEval);
                    var foldMetrics = BinaryClassificationMetrics.FromMetrics(env, dict["OverallMetrics"], dict["ConfusionMatrix"]);
                    metrics.Add(foldMetrics.Single());
                }
            }
        }