internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(modelStream); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); // Load transforms. var pipe = env.LoadTransforms(modelStream, _srcDataView); // Load predictor (if present) and apply default scorer. // REVIEW: distinguish the case of predictor / no predictor? var predictor = env.LoadPredictorOrNull(modelStream); if (predictor != null) { var roles = ModelFileUtils.LoadRoleMappingsOrNull(env, modelStream); pipe = roles != null ? env.CreateDefaultScorer(RoleMappedData.CreateOpt(pipe, roles), predictor) : env.CreateDefaultScorer(env.CreateExamples(pipe, "Features"), predictor); } _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
/// <summary> /// Create a new <see cref="IDataView"/> over an in-memory collection of the items of user-defined type. /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to not /// modify the contents of <paramref name="data"/> while the data view is being actively cursored. /// /// One typical usage for in-memory data view could be: create the data view, train a predictor. /// Once the predictor is fully trained, modify the contents of the underlying collection and /// train another predictor. /// </summary> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="env">The host environment to use for data view creation.</param> /// <param name="data">The data to wrap around.</param> /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>, /// the schema definition is inferred from <typeparamref name="TRow"/>.</param> /// <returns>The constructed <see cref="IDataView"/>.</returns> public static IDataView CreateDataView <TRow>(this IHostEnvironment env, IList <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValueOrNull(schemaDefinition); return(DataViewConstructionUtils.CreateFromList(env, data, schemaDefinition)); }
private static Func <Schema, IRowToRowMapper> StreamChecker(IHostEnvironment env, Stream modelStream) { env.CheckValue(modelStream, nameof(modelStream)); return(schema => { var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, new EmptyDataView(env, schema)); var transformer = new TransformWrapper(env, pipe); env.CheckParam(transformer.IsRowToRowMapper, nameof(transformer), "Must be a row to row mapper"); return transformer.GetRowToRowMapper(schema); }); }
private protected PredictionEngineBase(IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.CheckValue(env, nameof(env)); env.AssertValue(transformer); Transformer = transformer; var makeMapper = TransformerChecker(env, transformer); env.AssertValue(makeMapper); _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition); PredictionEngineCore(env, _inputRow, makeMapper(_inputRow.Schema), ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition, out _disposer, out _outputRow); }
/// <summary> /// The 'reapply' constructor. /// </summary> private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform <TSrc, TDst, TState> transform, IDataView newSource) : base(env, RegistrationName, transform) { Host.AssertValue(transform); Host.AssertValue(newSource); _source = newSource; _filterFunc = transform._filterFunc; _typedSource = TypedCursorable <TSrc> .Create(Host, newSource, false, transform._inputSchemaDefinition); _addedSchema = transform._addedSchema; _bindings = new ColumnBindings(Schema.Create(newSource.Schema), DataViewConstructionUtils.GetSchemaColumns(_addedSchema)); }
/// <summary> /// The 'reapply' constructor. /// </summary> private MapTransform(IHostEnvironment env, MapTransform <TSrc, TDst> transform, IDataView newSource) : base(env, RegistrationName, transform) { Host.AssertValue(transform); Host.AssertValue(newSource); Source = newSource; _mapAction = transform._mapAction; _typedSource = TypedCursorable <TSrc> .Create(Host, newSource, false, transform._inputSchemaDefinition); _addedSchema = transform._addedSchema; _bindings = new ColumnBindings(Data.Schema.Create(newSource.Schema), DataViewConstructionUtils.GetSchemaColumns(_addedSchema)); }
internal BatchPredictionEngine(IHostEnvironment env, IDataView dataPipeline, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(dataPipeline); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); var pipe = ApplyTransformUtils.ApplyAllTransformsToData(env, dataPipeline, _srcDataView); _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
internal BatchPredictionEngine(IHostEnvironment env, Stream modelStream, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.AssertValue(env); Contracts.AssertValue(modelStream); Contracts.AssertValueOrNull(inputSchemaDefinition); Contracts.AssertValueOrNull(outputSchemaDefinition); // Initialize pipe. _srcDataView = DataViewConstructionUtils.CreateFromEnumerable(env, new TSrc[] { }, inputSchemaDefinition); var pipe = DataViewConstructionUtils.LoadPipeWithPredictor(env, modelStream, _srcDataView); _pipeEngine = new PipeEngine <TDst>(env, pipe, ignoreMissingColumns, outputSchemaDefinition); }
private PredictionEngine(IHostEnvironment env, Func <ISchema, IRowToRowMapper> makeMapper, bool ignoreMissingColumns, SchemaDefinition inputSchemaDefinition, SchemaDefinition outputSchemaDefinition) { Contracts.CheckValue(env, nameof(env)); env.AssertValue(makeMapper); _inputRow = DataViewConstructionUtils.CreateInputRow <TSrc>(env, inputSchemaDefinition); var mapper = makeMapper(_inputRow.Schema); var cursorable = TypedCursorable <TDst> .Create(env, new EmptyDataView(env, mapper.Schema), ignoreMissingColumns, outputSchemaDefinition); var outputRow = mapper.GetRow(_inputRow, col => true, out _disposer); _outputRow = cursorable.GetRow(outputRow); }
/// <summary> /// Create a a map transform that is savable iff <paramref name="saveAction"/> and <paramref name="loadFunc"/> are /// not null. /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="mapAction">The action by which we map source to destination columns</param> /// <param name="saveAction">An action that allows us to save state to the serialization stream. May be /// null simultaneously with <paramref name="loadFunc"/>.</param> /// <param name="loadFunc">A function that given the serialization stream and a data view, returns /// an <see cref="ITransformTemplate"/>. The intent is, this returned object should itself be a /// <see cref="MapTransform{TSrc,TDst}"/>, but this is not strictly necessary. This delegate should be /// a static non-lambda method that this assembly can legally call. May be null simultaneously with /// <paramref name="saveAction"/>.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public MapTransform(IHostEnvironment env, IDataView source, Action <TSrc, TDst> mapAction, Action <BinaryWriter> saveAction, LambdaTransform.LoadDelegate loadFunc, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, RegistrationName, saveAction, loadFunc) { Host.AssertValue(source); Host.AssertValue(mapAction); Host.AssertValueOrNull(inputSchemaDefinition); Host.AssertValueOrNull(outputSchemaDefinition); Source = source; _mapAction = mapAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(Host, Source, false, inputSchemaDefinition); var outSchema = outputSchemaDefinition == null ? InternalSchemaDefinition.Create(typeof(TDst), SchemaDefinition.Direction.Write) : InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Data.Schema.Create(Source.Schema), DataViewConstructionUtils.GetSchemaColumns(outSchema)); }
/// <summary> /// Create a filter transform that is savable iff <paramref name="saveAction"/> and <paramref name="loadFunc"/> are /// not null. /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether /// to keep the row.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="saveAction">An action that allows us to save state to the serialization stream. May be /// null simultaneously with <paramref name="loadFunc"/>.</param> /// <param name="loadFunc">A function that given the serialization stream and a data view, returns /// an <see cref="ITransformTemplate"/>. The intent is, this returned object should itself be a /// <see cref="CustomMappingTransformer{TSrc,TDst}"/>, but this is not strictly necessary. This delegate should be /// a static non-lambda method that this assembly can legally call. May be null simultaneously with /// <paramref name="saveAction"/>.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc, Action <TState> initStateAction, Action <BinaryWriter> saveAction, LambdaTransform.LoadDelegate loadFunc, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, RegistrationName, saveAction, loadFunc) { Host.AssertValue(source, "source"); Host.AssertValue(filterFunc, "filterFunc"); Host.AssertValueOrNull(initStateAction); Host.AssertValueOrNull(inputSchemaDefinition); Host.AssertValueOrNull(outputSchemaDefinition); _source = source; _filterFunc = filterFunc; _initStateAction = initStateAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(Host, Source, false, inputSchemaDefinition); var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Schema.Create(Source.Schema), DataViewConstructionUtils.GetSchemaColumns(outSchema)); }