public static BindingsImpl Create(ModelLoadContext ctx, ISchema input, IHostEnvironment env, ISchemaBindableMapper bindable, Func <ColumnType, bool> outputTypeMatches, Func <ColumnType, ISchemaBoundRowMapper, ColumnType> getPredColType) { Contracts.AssertValue(env); env.AssertValue(ctx); // *** Binary format *** // <base info> // int: id of the scores column kind (metadata output) // int: id of the column used for deriving the predicted label column string suffix; var roles = LoadBaseInfo(ctx, out suffix); string scoreKind = ctx.LoadNonEmptyString(); string scoreCol = ctx.LoadNonEmptyString(); var mapper = bindable.Bind(env, RoleMappedSchema.Create(input, roles)); var rowMapper = mapper as ISchemaBoundRowMapper; env.CheckParam(rowMapper != null, nameof(bindable), "Bindable expected to be an " + nameof(ISchemaBindableMapper) + "!"); // Find the score column of the mapper. int scoreColIndex; env.CheckDecode(mapper.OutputSchema.TryGetColumnIndex(scoreCol, out scoreColIndex)); var scoreType = mapper.OutputSchema.GetColumnType(scoreColIndex); env.CheckDecode(outputTypeMatches(scoreType)); var predColType = getPredColType(scoreType, rowMapper); return(new BindingsImpl(input, rowMapper, suffix, scoreKind, false, scoreColIndex, predColType)); }
public BindingsImpl ApplyToSchema(ISchema input, ISchemaBindableMapper bindable, IHostEnvironment env) { Contracts.AssertValue(env); env.AssertValue(input); env.AssertValue(bindable); string scoreCol = RowMapper.OutputSchema.GetColumnName(ScoreColumnIndex); var schema = RoleMappedSchema.Create(input, RowMapper.GetInputColumnRoles()); // Checks compatibility of the predictor input types. var mapper = bindable.Bind(env, schema); var rowMapper = mapper as ISchemaBoundRowMapper; env.CheckParam(rowMapper != null, nameof(bindable), "Mapper must implement ISchemaBoundRowMapper"); int mapperScoreColumn; bool tmp = rowMapper.OutputSchema.TryGetColumnIndex(scoreCol, out mapperScoreColumn); env.Check(tmp, "Mapper doesn't have expected score column"); return(new BindingsImpl(input, rowMapper, Suffix, ScoreColumnKind, true, mapperScoreColumn, PredColType)); }
/// <summary> /// Create the bindings given the env, bindable, input schema, column roles, and column name suffix. /// </summary> private static Bindings Create(IHostEnvironment env, ISchemaBindableMapper bindable, ISchema input, IEnumerable <KeyValuePair <RoleMappedSchema.ColumnRole, string> > roles, string suffix, bool user = true) { Contracts.AssertValue(env); Contracts.AssertValue(bindable); Contracts.AssertValue(input); Contracts.AssertValue(roles); Contracts.AssertValueOrNull(suffix); var mapper = bindable.Bind(env, RoleMappedSchema.Create(input, roles)); // We don't actually depend on this invariant, but if this assert fires it means the bindable // did the wrong thing. Contracts.Assert(mapper.InputSchema.Schema == input); var rowMapper = mapper as ISchemaBoundRowMapper; Contracts.Check(rowMapper != null, "Predictor expected to be a RowMapper!"); return(Create(input, rowMapper, suffix, user)); }
/// <summary> /// Creates a RoleMappedData from the given schema and role/column-name pairs. /// This skips null or empty column-names. /// </summary> public static RoleMappedData Create(IDataView data, IEnumerable <KeyValuePair <RoleMappedSchema.ColumnRole, string> > roles) { Contracts.CheckValue(data, nameof(data)); Contracts.CheckValue(roles, nameof(roles)); return(new RoleMappedData(data, RoleMappedSchema.Create(data.Schema, roles))); }
/// <summary> /// Creates a RoleMappedData from the given data with no column role assignments. /// </summary> public static RoleMappedData Create(IDataView data) { Contracts.CheckValue(data, nameof(data)); return(new RoleMappedData(data, RoleMappedSchema.Create(data.Schema))); }
/// <summary> /// Loads multiple artifacts of interest from the input model file, given the context /// established by the command line arguments. /// </summary> /// <param name="ch">The channel to which to provide output.</param> /// <param name="wantPredictor">Whether we want a predictor from the model file. If /// <c>false</c> we will not even attempt to load a predictor. If <c>null</c> we will /// load the predictor, if present. If <c>true</c> we will load the predictor, or fail /// noisily if we cannot.</param> /// <param name="predictor">The predictor in the model, or <c>null</c> if /// <paramref name="wantPredictor"/> was false, or <paramref name="wantPredictor"/> was /// <c>null</c> and no predictor was present.</param> /// <param name="wantTrainSchema">Whether we want the training schema. Unlike /// <paramref name="wantPredictor"/>, this has no "hard fail if not present" option. If /// this is <c>true</c>, it is still possible for <paramref name="trainSchema"/> to remain /// <c>null</c> if there were no role mappings, or pipeline.</param> /// <param name="trainSchema">The training schema if <paramref name="wantTrainSchema"/> /// is true, and there were role mappings stored in the model.</param> /// <param name="pipe">The data pipe constructed from the combination of the /// model and command line arguments.</param> protected void LoadModelObjects( IChannel ch, bool?wantPredictor, out IPredictor predictor, bool wantTrainSchema, out RoleMappedSchema trainSchema, out IDataLoader pipe) { // First handle the case where there is no input model file. // Everything must come from the command line. using (var file = Host.OpenInputFile(Args.InputModelFile)) using (var strm = file.OpenReadStream()) using (var rep = RepositoryReader.Open(strm, Host)) { // First consider loading the predictor. if (wantPredictor == false) { predictor = null; } else { ch.Trace("Loading predictor"); predictor = ModelFileUtils.LoadPredictorOrNull(Host, rep); if (wantPredictor == true) { Host.Check(predictor != null, "Could not load predictor from model file"); } } // Next create the loader. var sub = Args.Loader; IDataLoader trainPipe = null; if (sub.IsGood()) { // The loader is overridden from the command line. pipe = sub.CreateInstance(Host, new MultiFileSource(Args.DataFile)); if (Args.LoadTransforms == true) { Host.CheckUserArg(!string.IsNullOrWhiteSpace(Args.InputModelFile), nameof(Args.InputModelFile)); pipe = LoadTransformChain(pipe); } } else { var loadTrans = Args.LoadTransforms ?? true; pipe = LoadLoader(rep, Args.DataFile, loadTrans); if (loadTrans) { trainPipe = pipe; } } if (Utils.Size(Args.Transform) > 0) { pipe = CompositeDataLoader.Create(Host, pipe, Args.Transform); } // Next consider loading the training data's role mapped schema. trainSchema = null; if (wantTrainSchema) { // First try to get the role mappings. var trainRoleMappings = ModelFileUtils.LoadRoleMappingsOrNull(Host, rep); if (trainRoleMappings != null) { // Next create the training schema. In the event that the loaded pipeline happens // to be the training pipe, we can just use that. If it differs, then we need to // load the full pipeline from the model, relying upon the fact that all loaders // can be loaded with no data at all, to get their schemas. if (trainPipe == null) { trainPipe = ModelFileUtils.LoadLoader(Host, rep, new MultiFileSource(null), loadTransforms: true); } trainSchema = RoleMappedSchema.Create(trainPipe.Schema, trainRoleMappings); } // If the role mappings are null, an alternative would be to fail. However the idea // is that the scorer should always still succeed, although perhaps with reduced // functionality, even when the training schema is null, since not all versions of // TLC models will have the role mappings preserved, I believe. And, we do want to // maintain backwards compatibility. } } }