public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { var addedCols = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema); var addedSchemaShape = SchemaShape.Create(SchemaBuilder.MakeSchema(addedCols)); var result = inputSchema.Columns.ToDictionary(x => x.Name); var inputDef = InternalSchemaDefinition.Create(typeof(TSrc), Transformer.InputSchemaDefinition); foreach (var col in inputDef.Columns) { if (!result.TryGetValue(col.ColumnName, out var column)) { throw Contracts.ExceptSchemaMismatch(nameof(inputSchema), "input", col.ColumnName); } SchemaShape.GetColumnTypeShape(col.ColumnType, out var vecKind, out var itemType, out var isKey); // Special treatment for vectors: if we expect variable vector, we also allow fixed-size vector. if (itemType != column.ItemType || isKey != column.IsKey || vecKind == SchemaShape.Column.VectorKind.Scalar && column.Kind != SchemaShape.Column.VectorKind.Scalar || vecKind == SchemaShape.Column.VectorKind.Vector && column.Kind != SchemaShape.Column.VectorKind.Vector || vecKind == SchemaShape.Column.VectorKind.VariableVector && column.Kind == SchemaShape.Column.VectorKind.Scalar) { throw Contracts.ExceptSchemaMismatch(nameof(inputSchema), "input", col.ColumnName, col.ColumnType.ToString(), column.GetTypeString()); } } foreach (var addedCol in addedSchemaShape.Columns) { result[addedCol.Name] = addedCol; } return(new SchemaShape(result.Values)); }
/// <summary> /// The 'reapply' constructor. /// </summary> private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform <TSrc, TDst, TState> transform, IDataView newSource) { _host.AssertValue(transform); _host.AssertValue(newSource); _source = newSource; _filterFunc = transform._filterFunc; _typedSource = TypedCursorable <TSrc> .Create(_host, newSource, false, transform._inputSchemaDefinition); _addedSchema = transform._addedSchema; _bindings = new ColumnBindings(newSource.Schema, DataViewConstructionUtils.GetSchemaColumns(_addedSchema)); }
public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { var addedCols = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema); var addedSchemaShape = SchemaShape.Create(new Schema(addedCols)); var result = inputSchema.Columns.ToDictionary(x => x.Name); foreach (var addedCol in addedSchemaShape.Columns) { result[addedCol.Name] = addedCol; } return(new SchemaShape(result.Values)); }
/// <summary> /// Create a filter transform /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether /// to keep the row.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc, Action <TState> initStateAction, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { _host = env.Register(RegistrationName); _host.AssertValue(source, "source"); _host.AssertValue(filterFunc, "filterFunc"); _host.AssertValueOrNull(initStateAction); _host.AssertValueOrNull(inputSchemaDefinition); _host.AssertValueOrNull(outputSchemaDefinition); _source = source; _filterFunc = filterFunc; _initStateAction = initStateAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(_host, Source, false, inputSchemaDefinition); var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Source.Schema, DataViewConstructionUtils.GetSchemaColumns(outSchema)); }
/// <summary> /// Create a filter transform that is savable iff <paramref name="saveAction"/> and <paramref name="loadFunc"/> are /// not null. /// </summary> /// <param name="env">The host environment</param> /// <param name="source">The dataview upon which we construct the transform</param> /// <param name="filterFunc">The function by which we transform source to destination columns and decide whether /// to keep the row.</param> /// <param name="initStateAction">The function that is called once per cursor to initialize state. Can be null.</param> /// <param name="saveAction">An action that allows us to save state to the serialization stream. May be /// null simultaneously with <paramref name="loadFunc"/>.</param> /// <param name="loadFunc">A function that given the serialization stream and a data view, returns /// an <see cref="ITransformTemplate"/>. The intent is, this returned object should itself be a /// <see cref="CustomMappingTransformer{TSrc,TDst}"/>, but this is not strictly necessary. This delegate should be /// a static non-lambda method that this assembly can legally call. May be null simultaneously with /// <paramref name="saveAction"/>.</param> /// <param name="inputSchemaDefinition">The schema definition overrides for <typeparamref name="TSrc"/></param> /// <param name="outputSchemaDefinition">The schema definition overrides for <typeparamref name="TDst"/></param> public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func <TSrc, TDst, TState, bool> filterFunc, Action <TState> initStateAction, Action <BinaryWriter> saveAction, LambdaTransform.LoadDelegate loadFunc, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(env, RegistrationName, saveAction, loadFunc) { Host.AssertValue(source, "source"); Host.AssertValue(filterFunc, "filterFunc"); Host.AssertValueOrNull(initStateAction); Host.AssertValueOrNull(inputSchemaDefinition); Host.AssertValueOrNull(outputSchemaDefinition); _source = source; _filterFunc = filterFunc; _initStateAction = initStateAction; _inputSchemaDefinition = inputSchemaDefinition; _typedSource = TypedCursorable <TSrc> .Create(Host, Source, false, inputSchemaDefinition); var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition); _addedSchema = outSchema; _bindings = new ColumnBindings(Schema.Create(Source.Schema), DataViewConstructionUtils.GetSchemaColumns(outSchema)); }