private protected SingleFeaturePredictionTransformerBase(IHost host, ModelLoadContext ctx) : base(host, ctx) { FeatureColumnName = ctx.LoadStringOrNull(); if (FeatureColumnName == null) { FeatureColumnType = null; } else if (!TrainSchema.TryGetColumnIndex(FeatureColumnName, out int col)) { throw Host.ExceptSchemaMismatch(nameof(FeatureColumnName), "feature", FeatureColumnName); } else { FeatureColumnType = TrainSchema[col].Type; } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, ModelAsPredictor); }
internal SingleFeaturePredictionTransformerBase(IHost host, ModelLoadContext ctx) : base(host, ctx) { FeatureColumn = ctx.LoadStringOrNull(); if (FeatureColumn == null) { FeatureColumnType = null; } else if (!TrainSchema.TryGetColumnIndex(FeatureColumn, out int col)) { throw Host.ExceptSchemaMismatch(nameof(FeatureColumn), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn); } else { FeatureColumnType = TrainSchema[col].Type; } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, Model); }
/// <summary> /// Creates a data scorer from the 'LoadName{settings}' string. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="settings">The settings string.</param> /// <param name="data">The data to score.</param> /// <param name="predictor">The predictor to score.</param> /// <param name="trainSchema">The training data schema from which the scorer can optionally extract /// additional information, for example, label names. If this is <c>null</c>, no information will be /// extracted.</param> /// <returns>The scored data.</returns> internal static IDataScorerTransform CreateScorer(this IHostEnvironment env, string settings, RoleMappedData data, IPredictor predictor, RoleMappedSchema trainSchema = null) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValue(predictor, nameof(predictor)); env.CheckValueOrNull(trainSchema); Type factoryType = typeof(IComponentFactory <IDataView, ISchemaBoundMapper, RoleMappedSchema, IDataScorerTransform>); Type signatureType = typeof(SignatureDataScorer); ICommandLineComponentFactory scorerFactorySettings = CmdParser.CreateComponentFactory( factoryType, signatureType, settings); var bindable = ScoreUtils.GetSchemaBindableMapper(env, predictor, scorerFactorySettings: scorerFactorySettings); var mapper = bindable.Bind(env, data.Schema); return(CreateCore <IDataScorerTransform>(env, factoryType, signatureType, settings, data.Data, mapper, trainSchema)); }
/// <summary> /// Initializes a new reference of <see cref="SingleFeaturePredictionTransformerBase{TModel, TScorer}"/>. /// </summary> /// <param name="host">The local instance of <see cref="IHost"/>.</param> /// <param name="model">The model used for scoring.</param> /// <param name="trainSchema">The schema of the training data.</param> /// <param name="featureColumn">The feature column name.</param> public SingleFeaturePredictionTransformerBase(IHost host, TModel model, Schema trainSchema, string featureColumn) : base(host, model, trainSchema) { FeatureColumn = featureColumn; FeatureColumn = featureColumn; if (featureColumn == null) { FeatureColumnType = null; } else if (!trainSchema.TryGetColumnIndex(featureColumn, out int col)) { throw Host.ExceptSchemaMismatch(nameof(featureColumn), RoleMappedSchema.ColumnRole.Feature.Value, featureColumn); } else { FeatureColumnType = trainSchema[col].Type; } BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model); }
public BindableMapper(IHostEnvironment env, IFeatureContributionMapper predictor, int topContributionsCount, int bottomContributionsCount, bool normalize, bool stringify) { Contracts.CheckValue(env, nameof(env)); _env = env; _env.CheckValue(predictor, nameof(predictor)); if (topContributionsCount < 0) { throw env.Except($"Number of top contribution must be non negative"); } if (bottomContributionsCount < 0) { throw env.Except($"Number of bottom contribution must be non negative"); } _topContributionsCount = topContributionsCount; _bottomContributionsCount = bottomContributionsCount; _normalize = normalize; Stringify = stringify; Predictor = predictor; GenericMapper = ScoreUtils.GetSchemaBindableMapper(_env, Predictor, null); }
// Factory method for SignatureDataScorer. private static IDataScorerTransform Create(IHostEnvironment env, Arguments args, IDataView data, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValue(mapper, nameof(mapper)); if (args.Top < 0) { throw env.Except($"Number of top contribution must be non negative"); } if (args.Bottom < 0) { throw env.Except($"Number of bottom contribution must be non negative"); } var contributionMapper = mapper as RowMapper; env.CheckParam(mapper != null, nameof(mapper), "Unexpected mapper"); var scorer = ScoreUtils.GetScorerComponent(env, contributionMapper); var scoredPipe = scorer.CreateComponent(env, data, contributionMapper, trainSchema); return(scoredPipe); }
public BindableMapper(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); _env = env; _env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // IFeatureContributionMapper: Predictor // int: topContributionsCount // int: bottomContributionsCount // bool: normalize // bool: stringify ctx.LoadModel <IFeatureContributionMapper, SignatureLoadModel>(env, out Predictor, ModelFileUtils.DirPredictor); GenericMapper = ScoreUtils.GetSchemaBindableMapper(_env, Predictor, null); _topContributionsCount = ctx.Reader.ReadInt32(); Contracts.CheckDecode(0 <= _topContributionsCount); _bottomContributionsCount = ctx.Reader.ReadInt32(); Contracts.CheckDecode(0 <= _bottomContributionsCount); _normalize = ctx.Reader.ReadBoolByte(); Stringify = ctx.Reader.ReadBoolByte(); }
private void RunCore(IChannel ch) { Host.AssertValue(ch); ch.Trace("Creating loader"); LoadModelObjects(ch, true, out var predictor, true, out var trainSchema, out var loader); ch.AssertValue(predictor); ch.AssertValueOrNull(trainSchema); ch.AssertValue(loader); ch.Trace("Creating pipeline"); var scorer = ImplOptions.Scorer; ch.Assert(scorer == null || scorer is ICommandLineComponentFactory, "ScoreCommand should only be used from the command line."); var bindable = ScoreUtils.GetSchemaBindableMapper(Host, predictor, scorerFactorySettings: scorer as ICommandLineComponentFactory); ch.AssertValue(bindable); // REVIEW: We probably ought to prefer role mappings from the training schema. string feat = TrainUtils.MatchNameOrDefaultOrNull(ch, loader.Schema, nameof(ImplOptions.FeatureColumn), ImplOptions.FeatureColumn, DefaultColumnNames.Features); string group = TrainUtils.MatchNameOrDefaultOrNull(ch, loader.Schema, nameof(ImplOptions.GroupColumn), ImplOptions.GroupColumn, DefaultColumnNames.GroupId); var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, ImplOptions.CustomColumns); var schema = new RoleMappedSchema(loader.Schema, label: null, feature: feat, group: group, custom: customCols, opt: true); var mapper = bindable.Bind(Host, schema); if (scorer == null) { scorer = ScoreUtils.GetScorerComponent(Host, mapper); } loader = LegacyCompositeDataLoader.ApplyTransform(Host, loader, "Scorer", scorer.ToString(), (env, view) => scorer.CreateComponent(env, view, mapper, trainSchema)); loader = LegacyCompositeDataLoader.Create(Host, loader, ImplOptions.PostTransform); if (!string.IsNullOrWhiteSpace(ImplOptions.OutputModelFile)) { ch.Trace("Saving the data pipe"); SaveLoader(loader, ImplOptions.OutputModelFile); } ch.Trace("Creating saver"); IDataSaver writer; if (ImplOptions.Saver == null) { var ext = Path.GetExtension(ImplOptions.OutputDataFile); var isText = ext == ".txt" || ext == ".tlc"; if (isText) { writer = new TextSaver(Host, new TextSaver.Arguments()); } else { writer = new BinarySaver(Host, new BinarySaver.Arguments()); } } else { writer = ImplOptions.Saver.CreateComponent(Host); } ch.Assert(writer != null); var outputIsBinary = writer is BinaryWriter; bool outputAllColumns = ImplOptions.OutputAllColumns == true || (ImplOptions.OutputAllColumns == null && Utils.Size(ImplOptions.OutputColumns) == 0 && outputIsBinary); bool outputNamesAndLabels = ImplOptions.OutputAllColumns == true || Utils.Size(ImplOptions.OutputColumns) == 0; if (ImplOptions.OutputAllColumns == true && Utils.Size(ImplOptions.OutputColumns) != 0) { ch.Warning(nameof(ImplOptions.OutputAllColumns) + "=+ always writes all columns irrespective of " + nameof(ImplOptions.OutputColumns) + " specified."); } if (!outputAllColumns && Utils.Size(ImplOptions.OutputColumns) != 0) { foreach (var outCol in ImplOptions.OutputColumns) { if (!loader.Schema.TryGetColumnIndex(outCol, out int dummyColIndex)) { throw ch.ExceptUserArg(nameof(Arguments.OutputColumns), "Column '{0}' not found.", outCol); } } } uint maxScoreId = 0; if (!outputAllColumns) { maxScoreId = loader.Schema.GetMaxAnnotationKind(out int colMax, AnnotationUtils.Kinds.ScoreColumnSetId); } ch.Assert(outputAllColumns || maxScoreId > 0); // score set IDs are one-based var cols = new List <int>(); for (int i = 0; i < loader.Schema.Count; i++) { if (!ImplOptions.KeepHidden && loader.Schema[i].IsHidden) { continue; } if (!(outputAllColumns || ShouldAddColumn(loader.Schema, i, maxScoreId, outputNamesAndLabels))) { continue; } var type = loader.Schema[i].Type; if (writer.IsColumnSavable(type)) { cols.Add(i); } else { ch.Warning("The column '{0}' will not be written as it has unsavable column type.", loader.Schema[i].Name); } } ch.Check(cols.Count > 0, "No valid columns to save"); ch.Trace("Scoring and saving data"); using (var file = Host.CreateOutputFile(ImplOptions.OutputDataFile)) using (var stream = file.CreateWriteStream()) writer.SaveData(stream, loader, cols.ToArray()); }
private void RunCore(IChannel ch, string cmd) { Host.AssertValue(ch); Host.AssertNonEmpty(cmd); ch.Trace("Constructing trainer"); ITrainer trainer = ImplOptions.Trainer.CreateComponent(Host); IPredictor inputPredictor = null; if (ImplOptions.ContinueTrain && !TrainUtils.TryLoadPredictor(ch, Host, ImplOptions.InputModelFile, out inputPredictor)) { ch.Warning("No input model file specified or model file did not contain a predictor. The model state cannot be initialized."); } ch.Trace("Constructing the training pipeline"); IDataView trainPipe = CreateLoader(); var schema = trainPipe.Schema; string label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.LabelColumn), ImplOptions.LabelColumn, DefaultColumnNames.Label); string features = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.FeatureColumn), ImplOptions.FeatureColumn, DefaultColumnNames.Features); string group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.GroupColumn), ImplOptions.GroupColumn, DefaultColumnNames.GroupId); string weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.WeightColumn), ImplOptions.WeightColumn, DefaultColumnNames.Weight); string name = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(Arguments.NameColumn), ImplOptions.NameColumn, DefaultColumnNames.Name); TrainUtils.AddNormalizerIfNeeded(Host, ch, trainer, ref trainPipe, features, ImplOptions.NormalizeFeatures); ch.Trace("Binding columns"); var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, ImplOptions.CustomColumns); var data = new RoleMappedData(trainPipe, label, features, group, weight, name, customCols); RoleMappedData validData = null; if (!string.IsNullOrWhiteSpace(ImplOptions.ValidationFile)) { if (!trainer.Info.SupportsValidation) { ch.Warning("Ignoring validationFile: Trainer does not accept validation dataset."); } else { ch.Trace("Constructing the validation pipeline"); IDataView validPipe = CreateRawLoader(dataFile: ImplOptions.ValidationFile); validPipe = ApplyTransformUtils.ApplyAllTransformsToData(Host, trainPipe, validPipe); validData = new RoleMappedData(validPipe, data.Schema.GetColumnRoleNames()); } } // In addition to the training set, some trainers can accept two data sets, validation set and test set, // in training phase. The major difference between validation set and test set is that training process may // indirectly use validation set to improve the model but the learned model should totally independent of test set. // Similar to validation set, the trainer can report the scores computed using test set. RoleMappedData testDataUsedInTrainer = null; if (!string.IsNullOrWhiteSpace(ImplOptions.TestFile)) { // In contrast to the if-else block for validation above, we do not throw a warning if test file is provided // because this is TrainTest command. if (trainer.Info.SupportsTest) { ch.Trace("Constructing the test pipeline"); IDataView testPipeUsedInTrainer = CreateRawLoader(dataFile: ImplOptions.TestFile); testPipeUsedInTrainer = ApplyTransformUtils.ApplyAllTransformsToData(Host, trainPipe, testPipeUsedInTrainer); testDataUsedInTrainer = new RoleMappedData(testPipeUsedInTrainer, data.Schema.GetColumnRoleNames()); } } var predictor = TrainUtils.Train(Host, ch, data, trainer, validData, ImplOptions.Calibrator, ImplOptions.MaxCalibrationExamples, ImplOptions.CacheData, inputPredictor, testDataUsedInTrainer); ILegacyDataLoader testPipe; bool hasOutfile = !string.IsNullOrEmpty(ImplOptions.OutputModelFile); var tempFilePath = hasOutfile ? null : Path.GetTempFileName(); using (var file = new SimpleFileHandle(ch, hasOutfile ? ImplOptions.OutputModelFile : tempFilePath, true, !hasOutfile)) { TrainUtils.SaveModel(Host, ch, file, predictor, data, cmd); ch.Trace("Constructing the testing pipeline"); using (var stream = file.OpenReadStream()) using (var rep = RepositoryReader.Open(stream, ch)) testPipe = LoadLoader(rep, ImplOptions.TestFile, true); } // Score. ch.Trace("Scoring and evaluating"); ch.Assert(ImplOptions.Scorer == null || ImplOptions.Scorer is ICommandLineComponentFactory, "TrainTestCommand should only be used from the command line."); IDataScorerTransform scorePipe = ScoreUtils.GetScorer(ImplOptions.Scorer, predictor, testPipe, features, group, customCols, Host, data.Schema); // Evaluate. var evaluator = ImplOptions.Evaluator?.CreateComponent(Host) ?? EvaluateUtils.GetEvaluator(Host, scorePipe.Schema); var dataEval = new RoleMappedData(scorePipe, label, features, group, weight, name, customCols, opt: true); var metrics = evaluator.Evaluate(dataEval); MetricWriter.PrintWarnings(ch, metrics); evaluator.PrintFoldResults(ch, metrics); if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out var overall)) { throw ch.Except("No overall metrics found"); } overall = evaluator.GetOverallResults(overall); MetricWriter.PrintOverallMetrics(Host, ch, ImplOptions.SummaryFilename, overall, 1); evaluator.PrintAdditionalMetrics(ch, metrics); Dictionary <string, IDataView>[] metricValues = { metrics }; SendTelemetryMetric(metricValues); if (!string.IsNullOrWhiteSpace(ImplOptions.OutputDataFile)) { var perInst = evaluator.GetPerInstanceMetrics(dataEval); var perInstData = new RoleMappedData(perInst, label, null, group, weight, name, customCols); var idv = evaluator.GetPerInstanceDataViewToSave(perInstData); MetricWriter.SavePerInstance(Host, ch, ImplOptions.OutputDataFile, idv); } }
private FoldResult RunFold(int fold) { var host = GetHost(); host.Assert(0 <= fold && fold <= _numFolds); // REVIEW: Make channels buffered in multi-threaded environments. using (var ch = host.Start($"Fold {fold}")) { ch.Trace("Constructing trainer"); ITrainer trainer = _trainer.CreateComponent(host); // Train pipe. var trainFilter = new RangeFilter.Options(); trainFilter.Column = _splitColumn; trainFilter.Min = (Double)fold / _numFolds; trainFilter.Max = (Double)(fold + 1) / _numFolds; trainFilter.Complement = true; IDataView trainPipe = new RangeFilter(host, trainFilter, _inputDataView); trainPipe = new OpaqueDataView(trainPipe); var trainData = _createExamples(host, ch, trainPipe, trainer); // Test pipe. var testFilter = new RangeFilter.Options(); testFilter.Column = trainFilter.Column; testFilter.Min = trainFilter.Min; testFilter.Max = trainFilter.Max; ch.Assert(!testFilter.Complement); IDataView testPipe = new RangeFilter(host, testFilter, _inputDataView); testPipe = new OpaqueDataView(testPipe); var testData = _applyTransformsToTestData(host, ch, testPipe, trainData, trainPipe); // Validation pipe and examples. RoleMappedData validData = null; if (_getValidationDataView != null) { ch.Assert(_applyTransformsToValidationData != null); if (!trainer.Info.SupportsValidation) { ch.Warning("Trainer does not accept validation dataset."); } else { ch.Trace("Constructing the validation pipeline"); IDataView validLoader = _getValidationDataView(); var validPipe = ApplyTransformUtils.ApplyAllTransformsToData(host, _inputDataView, validLoader); validPipe = new OpaqueDataView(validPipe); validData = _applyTransformsToValidationData(host, ch, validPipe, trainData, trainPipe); } } // Train. var predictor = TrainUtils.Train(host, ch, trainData, trainer, validData, _calibrator, _maxCalibrationExamples, _cacheData, _inputPredictor); // Score. ch.Trace("Scoring and evaluating"); ch.Assert(_scorer == null || _scorer is ICommandLineComponentFactory, "CrossValidationCommand should only be used from the command line."); var bindable = ScoreUtils.GetSchemaBindableMapper(host, predictor, scorerFactorySettings: _scorer as ICommandLineComponentFactory); ch.AssertValue(bindable); var mapper = bindable.Bind(host, testData.Schema); var scorerComp = _scorer ?? ScoreUtils.GetScorerComponent(host, mapper); IDataScorerTransform scorePipe = scorerComp.CreateComponent(host, testData.Data, mapper, trainData.Schema); // Save per-fold model. string modelFileName = ConstructPerFoldName(_outputModelFile, fold); if (modelFileName != null && _loader != null) { using (var file = host.CreateOutputFile(modelFileName)) { var rmd = new RoleMappedData( CompositeDataLoader.ApplyTransform(host, _loader, null, null, (e, newSource) => ApplyTransformUtils.ApplyAllTransformsToData(e, trainData.Data, newSource)), trainData.Schema.GetColumnRoleNames()); TrainUtils.SaveModel(host, ch, file, predictor, rmd, _cmd); } } // Evaluate. var eval = _evaluator?.CreateComponent(host) ?? EvaluateUtils.GetEvaluator(host, scorePipe.Schema); // Note that this doesn't require the provided columns to exist (because of the "opt" parameter). // We don't normally expect the scorer to drop columns, but if it does, we should not require // all the columns in the test pipeline to still be present. var dataEval = new RoleMappedData(scorePipe, testData.Schema.GetColumnRoleNames(), opt: true); var dict = eval.Evaluate(dataEval); RoleMappedData perInstance = null; if (_savePerInstance) { var perInst = eval.GetPerInstanceMetrics(dataEval); perInstance = new RoleMappedData(perInst, dataEval.Schema.GetColumnRoleNames(), opt: true); } return(new FoldResult(dict, dataEval.Schema.Schema, perInstance, trainData.Schema)); } }