private static IPredictor TrainCore(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, RoleMappedData validData, IComponentFactory <ICalibratorTrainer> calibrator, int maxCalibrationExamples, bool?cacheData, IPredictor inputPredictor = null) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(trainer, nameof(trainer)); ch.CheckValueOrNull(validData); ch.CheckValueOrNull(inputPredictor); AddCacheIfWanted(env, ch, trainer, ref data, cacheData); ch.Trace("Training"); if (validData != null) { AddCacheIfWanted(env, ch, trainer, ref validData, cacheData); } if (inputPredictor != null && !trainer.Info.SupportsIncrementalTraining) { ch.Warning("Ignoring " + nameof(TrainCommand.Arguments.InputModelFile) + ": Trainer does not support incremental training."); inputPredictor = null; } ch.Assert(validData == null || trainer.Info.SupportsValidation); var predictor = trainer.Train(new TrainContext(data, validData, inputPredictor)); var caliTrainer = calibrator?.CreateComponent(env); return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, caliTrainer, maxCalibrationExamples, trainer, predictor, data)); }
private ISingleFeaturePredictionTransformer <TScalarPredictor> TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls) { var view = MapLabels(data, cls); string trainerLabel = data.Schema.Label.Name; // REVIEW: In principle we could support validation sets and the like via the train context, but // this is currently unsupported. var transformer = trainer.Fit(view); if (_args.UseProbabilities) { var calibratedModel = transformer.Model as TDistPredictor; // REVIEW: restoring the RoleMappedData, as much as we can. // not having the weight column on the data passed to the TrainCalibrator should be addressed. var trainedData = new RoleMappedData(view, label: trainerLabel, feature: transformer.FeatureColumn); if (calibratedModel == null) { calibratedModel = CalibratorUtils.TrainCalibrator(Host, ch, Calibrator, Args.MaxCalibrationExamples, transformer.Model, trainedData) as TDistPredictor; } Host.Check(calibratedModel != null, "Calibrated predictor does not implement the expected interface"); return(new BinaryPredictionTransformer <TScalarPredictor>(Host, calibratedModel, trainedData.Data.Schema, transformer.FeatureColumn)); } return(new BinaryPredictionTransformer <TScalarPredictor>(Host, transformer.Model, view.Schema, transformer.FeatureColumn)); }
private TDistPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls1, int cls2) { string dstName; var view = MapLabels(data, cls1, cls2, out dstName); var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != CR.Label.Value) .Prepend(CR.Label.Bind(dstName)); var td = RoleMappedData.Create(view, roles); trainer.Train(td); ICalibratorTrainer calibrator; if (!Args.Calibrator.IsGood()) { calibrator = null; } else { calibrator = Args.Calibrator.CreateInstance(Host); } TScalarPredictor predictor = trainer.CreatePredictor(); var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, Args.MaxCalibrationExamples, trainer, predictor, td); var dist = res as TDistPredictor; Host.Check(dist != null, "Calibrated predictor does not implement the expected interface"); Host.Check(dist is IValueMapperDist, "Calibrated predictor does not implement the IValueMapperDist interface"); return(dist); }
private TScalarPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls) { string dstName; var view = MapLabels(data, cls, out dstName); var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != CR.Label.Value) .Prepend(CR.Label.Bind(dstName)); var td = new RoleMappedData(view, roles); // REVIEW: In principle we could support validation sets and the like via the train context, but // this is currently unsupported. var predictor = trainer.Train(td); if (Args.UseProbabilities) { ICalibratorTrainer calibrator; if (!Args.Calibrator.IsGood()) { calibrator = null; } else { calibrator = Args.Calibrator.CreateInstance(Host); } var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, Args.MaxCalibrationExamples, trainer, predictor, td); predictor = res as TScalarPredictor; Host.Check(predictor != null, "Calibrated predictor does not implement the expected interface"); } return(predictor); }
// cls is the "class id", zero-based. private TScalarPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls) { string dstName; var view = MapLabels(data, cls, out dstName, ch); if (_args.cacheTransform != null) { var sub = ScikitSubComponent <IDataTransform, SignatureDataTransform> .AsSubComponent(_args.cacheTransform); view = sub.CreateInstance(Host, view); } var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != CR.Label.Value) .Prepend(CR.Label.Bind(dstName)); var td = new RoleMappedData(view, roles); var predictor = trainer.Train(td); if (_args.useProbabilities) { var calSett = ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(_args.calibratorType); var calibrator = calSett.CreateInstance(Host); var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, _args.maxCalibrationExamples, trainer, predictor, td); predictor = res as TScalarPredictor; Host.Check(predictor != null, "Calibrated predictor does not implement the expected interface"); } return(predictor); }
/// <summary> /// Fits the scored <see cref="IDataView"/> creating a <see cref="CalibratorTransformer{TICalibrator}"/> that can transform the data by adding a /// <see cref="DefaultColumnNames.Probability"/> column containing the calibrated <see cref="DefaultColumnNames.Score"/>. /// </summary> /// <param name="input"></param> /// <returns>A trained <see cref="CalibratorTransformer{TICalibrator}"/> that will transform the data by adding the /// <see cref="DefaultColumnNames.Probability"/> column.</returns> public CalibratorTransformer <TICalibrator> Fit(IDataView input) { using (var ch = Host.Start("Creating calibrator.")) { var calibrator = (TICalibrator)CalibratorUtils.TrainCalibrator(Host, ch, _calibratorTrainer, input, LabelColumn.Name, ScoreColumn.Name, WeightColumn.Name); return(Create(Host, calibrator)); } }
void ReconfigurablePrediction() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new TlcEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = new TextLoader(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); IPredictor predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); using (var ch = env.Start("Calibrator training")) { predictor = CalibratorUtils.TrainCalibrator(env, ch, new PlattCalibratorTrainer(env), int.MaxValue, predictor, trainRoles); } var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); IDataScorerTransform scorer = ScoreUtils.GetScorer(predictor, scoreRoles, env, trainRoles.Schema); var dataEval = new RoleMappedData(scorer, label: "Label", feature: "Features", opt: true); var evaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { }); var metricsDict = evaluator.Evaluate(dataEval); var metrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; var bindable = ScoreUtils.GetSchemaBindableMapper(env, predictor, null); var mapper = bindable.Bind(env, trainRoles.Schema); var newScorer = new BinaryClassifierScorer(env, new BinaryClassifierScorer.Arguments { Threshold = 0.01f, ThresholdColumn = DefaultColumnNames.Probability }, scoreRoles.Data, mapper, trainRoles.Schema); dataEval = new RoleMappedData(newScorer, label: "Label", feature: "Features", opt: true); var newEvaluator = new BinaryClassifierMamlEvaluator(env, new BinaryClassifierMamlEvaluator.Arguments() { Threshold = 0.01f, UseRawScoreThreshold = false }); metricsDict = newEvaluator.Evaluate(dataEval); var newMetrics = BinaryClassificationMetrics.FromMetrics(env, metricsDict["OverallMetrics"], metricsDict["ConfusionMatrix"])[0]; } }
private ISingleFeaturePredictionTransformer<TDistPredictor> TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls1, int cls2) { // this should not be necessary when the legacy constructor doesn't exist, and the label column is not an optional parameter on the // MetaMulticlassTrainer constructor. string trainerLabel = data.Schema.Label.Value.Name; var view = MapLabels(data, cls1, cls2); var transformer = trainer.Fit(view); // the validations in the calibrator check for the feature column, in the RoleMappedData var trainedData = new RoleMappedData(view, label: trainerLabel, feature: transformer.FeatureColumnName); var calibratedModel = transformer.Model as TDistPredictor; if (calibratedModel == null) calibratedModel = CalibratorUtils.GetCalibratedPredictor(Host, ch, Calibrator, transformer.Model, trainedData, Args.MaxCalibrationExamples) as TDistPredictor; return new BinaryPredictionTransformer<TDistPredictor>(Host, calibratedModel, trainedData.Data.Schema, transformer.FeatureColumnName); }
/// <summary> /// Fits the scored <see cref="IDataView"/> creating a <see cref="CalibratorTransformer{TICalibrator}"/> that can transform the data by adding a /// <see cref="DefaultColumnNames.Probability"/> column containing the calibrated <see cref="DefaultColumnNames.Score"/>. /// </summary> /// <param name="input"></param> /// <returns>A trained <see cref="CalibratorTransformer{TICalibrator}"/> that will transform the data by adding the /// <see cref="DefaultColumnNames.Probability"/> column.</returns> public CalibratorTransformer <TICalibrator> Fit(IDataView input) { TICalibrator calibrator = null; var roles = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >(); roles.Add(RoleMappedSchema.CreatePair(MetadataUtils.Const.ScoreValueKind.Score, DefaultColumnNames.Score)); roles.Add(RoleMappedSchema.ColumnRole.Label.Bind(LabelColumn.Name)); roles.Add(RoleMappedSchema.ColumnRole.Feature.Bind(FeatureColumn.Name)); if (WeightColumn.IsValid) { roles.Add(RoleMappedSchema.ColumnRole.Weight.Bind(WeightColumn.Name)); } var roleMappedData = new RoleMappedData(input, opt: false, roles.ToArray()); using (var ch = Host.Start("Creating calibrator.")) calibrator = (TICalibrator)CalibratorUtils.TrainCalibrator(Host, ch, CalibratorTrainer, Predictor, roleMappedData); return(Create(Host, calibrator)); }
private static IPredictor TrainCore(IHostEnvironment env, IChannel ch, RoleMappedData data, ITrainer trainer, string name, RoleMappedData validData, ICalibratorTrainer calibrator, int maxCalibrationExamples, bool?cacheData, IPredictor inpPredictor = null) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(trainer, nameof(trainer)); ch.CheckNonEmpty(name, nameof(name)); ch.CheckValueOrNull(validData); ch.CheckValueOrNull(inpPredictor); var trainerRmd = trainer as ITrainer <RoleMappedData>; if (trainerRmd == null) { throw ch.ExceptUserArg(nameof(TrainCommand.Arguments.Trainer), "Trainer '{0}' does not accept known training data type", name); } Action <IChannel, ITrainer, Action <object>, object, object, object> trainCoreAction = TrainCore; IPredictor predictor; AddCacheIfWanted(env, ch, trainer, ref data, cacheData); ch.Trace("Training"); if (validData != null) { AddCacheIfWanted(env, ch, trainer, ref validData, cacheData); } var genericExam = trainCoreAction.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod( typeof(RoleMappedData), inpPredictor != null ? inpPredictor.GetType() : typeof(IPredictor)); Action <RoleMappedData> trainExam = trainerRmd.Train; genericExam.Invoke(null, new object[] { ch, trainerRmd, trainExam, data, validData, inpPredictor }); ch.Trace("Constructing predictor"); predictor = trainerRmd.CreatePredictor(); return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, calibrator, maxCalibrationExamples, trainer, predictor, data)); }
/// <summary> /// Trains a model. /// </summary> /// <param name="env">host</param> /// <param name="ch">channel</param> /// <param name="data">traing data</param> /// <param name="validData">validation data</param> /// <param name="calibrator">calibrator</param> /// <param name="maxCalibrationExamples">number of examples used to calibrate</param> /// <param name="cacheData">cache training data</param> /// <param name="inputPredictor">for continuous training, initial state</param> /// <returns>predictor</returns> public IPredictor Train(IHostEnvironment env, IChannel ch, RoleMappedData data, RoleMappedData validData = null, ICalibratorTrainer calibrator = null, int maxCalibrationExamples = 0, bool?cacheData = null, IPredictor inputPredictor = null) { /* * return TrainUtils.Train(env, ch, data, Trainer, LoadName, validData, calibrator, maxCalibrationExamples, * cacheData, inpPredictor); */ var trainer = Trainer; var name = LoadName; Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(trainer, nameof(trainer)); ch.CheckNonEmpty(name, nameof(name)); ch.CheckValueOrNull(validData); ch.CheckValueOrNull(inputPredictor); AddCacheIfWanted(env, ch, trainer, ref data, cacheData); ch.Trace(MessageSensitivity.None, "Training"); if (validData != null) { AddCacheIfWanted(env, ch, trainer, ref validData, cacheData); } if (inputPredictor != null && !trainer.Info.SupportsIncrementalTraining) { ch.Warning(MessageSensitivity.None, "Ignoring " + nameof(TrainCommand.Arguments.InputModelFile) + ": Trainer does not support incremental training."); inputPredictor = null; } ch.Assert(validData == null || trainer.Info.SupportsValidation); var predictor = trainer.Train(new TrainContext(data, validData, null, inputPredictor)); return(CalibratorUtils.TrainCalibratorIfNeeded(env, ch, calibrator, maxCalibrationExamples, trainer, predictor, data)); }
public IPredictor Calibrate(IChannel ch, IDataView data, ICalibratorTrainer caliTrainer, int maxRows) { Host.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(caliTrainer, nameof(caliTrainer)); if (caliTrainer.NeedsTraining) { var bound = new Bound(this, new RoleMappedSchema(data.Schema)); using (var curs = data.GetRowCursor(col => true)) { var scoreGetter = (ValueGetter <Single>)bound.CreateScoreGetter(curs, col => true, out Action disposer); // We assume that we can use the label column of the first predictor, since if the labels are not identical // then the whole model is garbage anyway. var labelGetter = bound.GetLabelGetter(curs, 0, out Action disp); disposer += disp; var weightGetter = bound.GetWeightGetter(curs, 0, out disp); disposer += disp; try { int num = 0; while (curs.MoveNext()) { Single label = 0; labelGetter(ref label); if (!FloatUtils.IsFinite(label)) { continue; } Single score = 0; scoreGetter(ref score); if (!FloatUtils.IsFinite(score)) { continue; } Single weight = 0; weightGetter(ref weight); if (!FloatUtils.IsFinite(weight)) { continue; } caliTrainer.ProcessTrainingExample(score, label > 0, weight); if (maxRows > 0 && ++num >= maxRows) { break; } } } finally { disposer?.Invoke(); } } } var calibrator = caliTrainer.FinishTraining(ch); return(CalibratorUtils.CreateCalibratedPredictor(Host, this, calibrator)); }