/// <summary> /// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer. /// Used for schema propagation and verification in a pipeline. /// </summary> public SchemaShape GetOutputSchema(SchemaShape inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); var result = inputSchema.ToDictionary(x => x.Name); foreach (var colPair in _columns) { if (!inputSchema.TryFindColumn(colPair.InputColumnName, out var col)) { throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", colPair.InputColumnName); } if (!CountFeatureSelectionUtils.IsValidColumnType(col.ItemType)) { throw _host.ExceptUserArg(nameof(inputSchema), "Column '{0}' does not have compatible type. Expected types are float, double or string.", colPair.InputColumnName); } var metadata = new List <SchemaShape.Column>(); if (col.Annotations.TryFindColumn(AnnotationUtils.Kinds.SlotNames, out var slotMeta)) { metadata.Add(slotMeta); } if (col.Annotations.TryFindColumn(AnnotationUtils.Kinds.CategoricalSlotRanges, out var categoricalSlotMeta)) { metadata.Add(categoricalSlotMeta); } metadata.Add(new SchemaShape.Column(AnnotationUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BooleanDataViewType.Instance, false)); result[colPair.Name] = new SchemaShape.Column(colPair.Name, col.Kind, col.ItemType, false, new SchemaShape(metadata.ToArray())); } return(new SchemaShape(result.Values)); }
/// <summary> /// Trains and returns a <see cref="ITransformer"/>. /// </summary> public ITransformer Fit(IDataView input) { _host.CheckValue(input, nameof(input)); int[] colSizes; var scores = CountFeatureSelectionUtils.Train(_host, input, _columns.Select(column => column.InputColumnName).ToArray(), out colSizes); var size = _columns.Length; using (var ch = _host.Start("Dropping Slots")) { // If no slots should be dropped from a column, use copy column to generate the corresponding output column. SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns; (string outputColumnName, string inputColumnName)[] copyColumnsPairs;