Exemple #1
0
        private static IPredictor TrainCore(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, RoleMappedData validData,
                                            IComponentFactory <ICalibratorTrainer> calibrator, int maxCalibrationExamples, bool?cacheData, IPredictor inputPredictor = null)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(data, nameof(data));
            ch.CheckValue(trainer, nameof(trainer));
            ch.CheckValueOrNull(validData);
            ch.CheckValueOrNull(inputPredictor);

            AddCacheIfWanted(env, ch, trainer, ref data, cacheData);
            ch.Trace("Training");
            if (validData != null)
            {
                AddCacheIfWanted(env, ch, trainer, ref validData, cacheData);
            }

            if (inputPredictor != null && !trainer.Info.SupportsIncrementalTraining)
            {
                ch.Warning("Ignoring " + nameof(TrainCommand.Arguments.InputModelFile) +
                           ": Trainer does not support incremental training.");
                inputPredictor = null;
            }
            ch.Assert(validData == null || trainer.Info.SupportsValidation);
            var predictor   = trainer.Train(new TrainContext(data, validData, inputPredictor));
            var caliTrainer = calibrator?.CreateComponent(env);

            return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, caliTrainer, maxCalibrationExamples, trainer, predictor, data));
        }
Exemple #2
0
        private ISingleFeaturePredictionTransformer <TScalarPredictor> TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls)
        {
            var view = MapLabels(data, cls);

            string trainerLabel = data.Schema.Label.Name;

            // REVIEW: In principle we could support validation sets and the like via the train context, but
            // this is currently unsupported.
            var transformer = trainer.Fit(view);

            if (_args.UseProbabilities)
            {
                var calibratedModel = transformer.Model as TDistPredictor;

                // REVIEW: restoring the RoleMappedData, as much as we can.
                // not having the weight column on the data passed to the TrainCalibrator should be addressed.
                var trainedData = new RoleMappedData(view, label: trainerLabel, feature: transformer.FeatureColumn);

                if (calibratedModel == null)
                {
                    calibratedModel = CalibratorUtils.TrainCalibrator(Host, ch, Calibrator, Args.MaxCalibrationExamples, transformer.Model, trainedData) as TDistPredictor;
                }

                Host.Check(calibratedModel != null, "Calibrated predictor does not implement the expected interface");
                return(new BinaryPredictionTransformer <TScalarPredictor>(Host, calibratedModel, trainedData.Data.Schema, transformer.FeatureColumn));
            }

            return(new BinaryPredictionTransformer <TScalarPredictor>(Host, transformer.Model, view.Schema, transformer.FeatureColumn));
        }
Exemple #3
0
        private TDistPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls1, int cls2)
        {
            string dstName;
            var    view = MapLabels(data, cls1, cls2, out dstName);

            var roles = data.Schema.GetColumnRoleNames()
                        .Where(kvp => kvp.Key.Value != CR.Label.Value)
                        .Prepend(CR.Label.Bind(dstName));
            var td = RoleMappedData.Create(view, roles);

            trainer.Train(td);

            ICalibratorTrainer calibrator;

            if (!Args.Calibrator.IsGood())
            {
                calibrator = null;
            }
            else
            {
                calibrator = Args.Calibrator.CreateInstance(Host);
            }
            TScalarPredictor predictor = trainer.CreatePredictor();
            var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, Args.MaxCalibrationExamples,
                                                              trainer, predictor, td);
            var dist = res as TDistPredictor;

            Host.Check(dist != null, "Calibrated predictor does not implement the expected interface");
            Host.Check(dist is IValueMapperDist, "Calibrated predictor does not implement the IValueMapperDist interface");
            return(dist);
        }
Exemple #4
0
        private TScalarPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls)
        {
            string dstName;
            var    view = MapLabels(data, cls, out dstName);

            var roles = data.Schema.GetColumnRoleNames()
                        .Where(kvp => kvp.Key.Value != CR.Label.Value)
                        .Prepend(CR.Label.Bind(dstName));
            var td = new RoleMappedData(view, roles);

            // REVIEW: In principle we could support validation sets and the like via the train context, but
            // this is currently unsupported.
            var predictor = trainer.Train(td);

            if (Args.UseProbabilities)
            {
                ICalibratorTrainer calibrator;
                if (!Args.Calibrator.IsGood())
                {
                    calibrator = null;
                }
                else
                {
                    calibrator = Args.Calibrator.CreateInstance(Host);
                }
                var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, Args.MaxCalibrationExamples,
                                                                  trainer, predictor, td);
                predictor = res as TScalarPredictor;
                Host.Check(predictor != null, "Calibrated predictor does not implement the expected interface");
            }
            return(predictor);
        }
        // cls is the "class id", zero-based.
        private TScalarPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls)
        {
            string dstName;
            var    view = MapLabels(data, cls, out dstName, ch);

            if (_args.cacheTransform != null)
            {
                var sub = ScikitSubComponent <IDataTransform, SignatureDataTransform> .AsSubComponent(_args.cacheTransform);

                view = sub.CreateInstance(Host, view);
            }

            var roles = data.Schema.GetColumnRoleNames()
                        .Where(kvp => kvp.Key.Value != CR.Label.Value)
                        .Prepend(CR.Label.Bind(dstName));
            var td = new RoleMappedData(view, roles);

            var predictor = trainer.Train(td);

            if (_args.useProbabilities)
            {
                var calSett = ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(_args.calibratorType);

                var calibrator = calSett.CreateInstance(Host);
                var res        = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch,
                                                                         calibrator, _args.maxCalibrationExamples,
                                                                         trainer, predictor, td);
                predictor = res as TScalarPredictor;
                Host.Check(predictor != null, "Calibrated predictor does not implement the expected interface");
            }
            return(predictor);
        }
 /// <summary>
 /// Fits the scored <see cref="IDataView"/> creating a <see cref="CalibratorTransformer{TICalibrator}"/> that can transform the data by adding a
 /// <see cref="DefaultColumnNames.Probability"/> column containing the calibrated <see cref="DefaultColumnNames.Score"/>.
 /// </summary>
 /// <param name="input"></param>
 /// <returns>A trained <see cref="CalibratorTransformer{TICalibrator}"/> that will transform the data by adding the
 /// <see cref="DefaultColumnNames.Probability"/> column.</returns>
 public CalibratorTransformer <TICalibrator> Fit(IDataView input)
 {
     using (var ch = Host.Start("Creating calibrator."))
     {
         var calibrator = (TICalibrator)CalibratorUtils.TrainCalibrator(Host, ch,
                                                                        _calibratorTrainer, input, LabelColumn.Name, ScoreColumn.Name, WeightColumn.Name);
         return(Create(Host, calibrator));
     }
 }
        void ReconfigurablePrediction()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new TlcEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var        cached     = new CacheDataView(env, trans, prefetch: null);
                var        trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                IPredictor predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));
                using (var ch = env.Start("Calibrator training"))
                {
                    predictor = CalibratorUtils.TrainCalibrator(env, ch, new PlattCalibratorTrainer(env), int.MaxValue, predictor, trainRoles);
                }

                var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema);

                var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true);

                var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()
                {
                });
                var metricsDict = evaluator.Evaluate(dataEval);

                var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0];

                var bindable  = ScoreUtils.GetSchemaBindableMapper(env, predictor, null);
                var mapper    = bindable.Bind(env, trainRoles.Schema);
                var newScorer = new BinaryClassifierScorer(env, new BinaryClassifierScorer.Arguments {
                    Threshold = 0.01f, ThresholdColumn = DefaultColumnNames.Probability
                },
                                                           scoreRoles.Data, mapper, trainRoles.Schema);

                dataEval = new RoleMappedData(newScorer, label: "Label", feature: "Features", opt: true);
                var newEvaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments()
                {
                    Threshold = 0.01f, UseRawScoreThreshold = false
                });
                metricsDict = newEvaluator.Evaluate(dataEval);
                var newMetrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0];
            }
        }
        private ISingleFeaturePredictionTransformer<TDistPredictor> TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls1, int cls2)
        {
            // this should not be necessary when the legacy constructor doesn't exist, and the label column is not an optional parameter on the
            // MetaMulticlassTrainer constructor.
            string trainerLabel = data.Schema.Label.Value.Name;

            var view = MapLabels(data, cls1, cls2);
            var transformer = trainer.Fit(view);

            // the validations in the calibrator check for the feature column, in the RoleMappedData
            var trainedData = new RoleMappedData(view, label: trainerLabel, feature: transformer.FeatureColumnName);

            var calibratedModel = transformer.Model as TDistPredictor;
            if (calibratedModel == null)
                calibratedModel = CalibratorUtils.GetCalibratedPredictor(Host, ch, Calibrator, transformer.Model, trainedData, Args.MaxCalibrationExamples) as TDistPredictor;

            return new BinaryPredictionTransformer<TDistPredictor>(Host, calibratedModel, trainedData.Data.Schema, transformer.FeatureColumnName);
        }
Exemple #9
0
        /// <summary>
        /// Fits the scored <see cref="IDataView"/> creating a <see cref="CalibratorTransformer{TICalibrator}"/> that can transform the data by adding a
        /// <see cref="DefaultColumnNames.Probability"/> column containing the calibrated <see cref="DefaultColumnNames.Score"/>.
        /// </summary>
        /// <param name="input"></param>
        /// <returns>A trained <see cref="CalibratorTransformer{TICalibrator}"/> that will transform the data by adding the
        /// <see cref="DefaultColumnNames.Probability"/> column.</returns>
        public CalibratorTransformer <TICalibrator> Fit(IDataView input)
        {
            TICalibrator calibrator = null;

            var roles = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >();

            roles.Add(RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, DefaultColumnNames.Score));
            roles.Add(RoleMappedSchema.ColumnRole.Label.Bind(LabelColumn.Name));
            roles.Add(RoleMappedSchema.ColumnRole.Feature.Bind(FeatureColumn.Name));
            if (WeightColumn.IsValid)
            {
                roles.Add(RoleMappedSchema.ColumnRole.Weight.Bind(WeightColumn.Name));
            }

            var roleMappedData = new RoleMappedData(input, opt: false, roles.ToArray());

            using (var ch = Host.Start("Creating calibrator."))
                calibrator = (TICalibrator)CalibratorUtils.TrainCalibrator(Host, ch, CalibratorTrainer, Predictor, roleMappedData);

            return(Create(Host, calibrator));
        }
Exemple #10
0
        private static IPredictor TrainCore(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, string name, RoleMappedData validData,
                                            ICalibratorTrainer calibrator, int maxCalibrationExamples, bool?cacheData, IPredictor inpPredictor = null)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(data, nameof(data));
            ch.CheckValue(trainer, nameof(trainer));
            ch.CheckNonEmpty(name, nameof(name));
            ch.CheckValueOrNull(validData);
            ch.CheckValueOrNull(inpPredictor);

            var trainerRmd = trainer as ITrainer <RoleMappedData>;

            if (trainerRmd == null)
            {
                throw ch.ExceptUserArg(nameof(TrainCommand.Arguments.Trainer), "Trainer '{0}' does not accept known training data type", name);
            }

            Action <IChannel, ITrainer, Action <object>, object, object, object> trainCoreAction = TrainCore;
            IPredictor predictor;

            AddCacheIfWanted(env, ch, trainer, ref data, cacheData);
            ch.Trace("Training");
            if (validData != null)
            {
                AddCacheIfWanted(env, ch, trainer, ref validData, cacheData);
            }

            var genericExam = trainCoreAction.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(
                typeof(RoleMappedData),
                inpPredictor != null ? inpPredictor.GetType() : typeof(IPredictor));
            Action <RoleMappedData> trainExam = trainerRmd.Train;

            genericExam.Invoke(null, new object[] { ch, trainerRmd, trainExam, data, validData, inpPredictor });

            ch.Trace("Constructing predictor");
            predictor = trainerRmd.CreatePredictor();
            return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, calibrator, maxCalibrationExamples, trainer, predictor, data));
        }
        /// <summary>
        /// Trains a model.
        /// </summary>
        /// <param name="env">host</param>
        /// <param name="ch">channel</param>
        /// <param name="data">traing data</param>
        /// <param name="validData">validation data</param>
        /// <param name="calibrator">calibrator</param>
        /// <param name="maxCalibrationExamples">number of examples used to calibrate</param>
        /// <param name="cacheData">cache training data</param>
        /// <param name="inputPredictor">for continuous training, initial state</param>
        /// <returns>predictor</returns>
        public IPredictor Train(IHostEnvironment env, IChannel ch, RoleMappedData data, RoleMappedData validData = null,
                                ICalibratorTrainer calibrator = null, int maxCalibrationExamples = 0,
                                bool?cacheData = null, IPredictor inputPredictor = null)
        {
            /*
             * return TrainUtils.Train(env, ch, data, Trainer, LoadName, validData, calibrator, maxCalibrationExamples,
             *                      cacheData, inpPredictor);
             */

            var trainer = Trainer;
            var name    = LoadName;

            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(data, nameof(data));
            ch.CheckValue(trainer, nameof(trainer));
            ch.CheckNonEmpty(name, nameof(name));
            ch.CheckValueOrNull(validData);
            ch.CheckValueOrNull(inputPredictor);

            AddCacheIfWanted(env, ch, trainer, ref data, cacheData);
            ch.Trace(MessageSensitivity.None, "Training");
            if (validData != null)
            {
                AddCacheIfWanted(env, ch, trainer, ref validData, cacheData);
            }

            if (inputPredictor != null && !trainer.Info.SupportsIncrementalTraining)
            {
                ch.Warning(MessageSensitivity.None, "Ignoring " + nameof(TrainCommand.Arguments.InputModelFile) +
                           ": Trainer does not support incremental training.");
                inputPredictor = null;
            }
            ch.Assert(validData == null || trainer.Info.SupportsValidation);
            var predictor = trainer.Train(new TrainContext(data, validData, null, inputPredictor));

            return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, calibrator, maxCalibrationExamples, trainer, predictor, data));
        }
Exemple #12
0
            public IPredictor Calibrate(IChannel ch, IDataView data, ICalibratorTrainer caliTrainer, int maxRows)
            {
                Host.CheckValue(ch, nameof(ch));
                ch.CheckValue(data, nameof(data));
                ch.CheckValue(caliTrainer, nameof(caliTrainer));

                if (caliTrainer.NeedsTraining)
                {
                    var bound = new Bound(this, new RoleMappedSchema(data.Schema));
                    using (var curs = data.GetRowCursor(col => true))
                    {
                        var scoreGetter = (ValueGetter <Single>)bound.CreateScoreGetter(curs, col => true, out Action disposer);

                        // We assume that we can use the label column of the first predictor, since if the labels are not identical
                        // then the whole model is garbage anyway.
                        var labelGetter = bound.GetLabelGetter(curs, 0, out Action disp);
                        disposer += disp;
                        var weightGetter = bound.GetWeightGetter(curs, 0, out disp);
                        disposer += disp;
                        try
                        {
                            int num = 0;
                            while (curs.MoveNext())
                            {
                                Single label = 0;
                                labelGetter(ref label);
                                if (!FloatUtils.IsFinite(label))
                                {
                                    continue;
                                }
                                Single score = 0;
                                scoreGetter(ref score);
                                if (!FloatUtils.IsFinite(score))
                                {
                                    continue;
                                }
                                Single weight = 0;
                                weightGetter(ref weight);
                                if (!FloatUtils.IsFinite(weight))
                                {
                                    continue;
                                }

                                caliTrainer.ProcessTrainingExample(score, label > 0, weight);

                                if (maxRows > 0 && ++num >= maxRows)
                                {
                                    break;
                                }
                            }
                        }
                        finally
                        {
                            disposer?.Invoke();
                        }
                    }
                }

                var calibrator = caliTrainer.FinishTraining(ch);

                return(CalibratorUtils.CreateCalibratedPredictor(Host, this, calibrator));
            }