/// <summary> /// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer. /// Used for schema propagation and verification in a pipeline. /// </summary> public SchemaShape GetOutputSchema(SchemaShape inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); var result = inputSchema.ToDictionary(x => x.Name); foreach (var colPair in _columns) { if (!inputSchema.TryFindColumn(colPair.InputColumnName, out var col)) { throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", colPair.InputColumnName); } if (!CountFeatureSelectionUtils.IsValidColumnType(col.ItemType)) { throw _host.ExceptUserArg(nameof(inputSchema), "Column '{0}' does not have compatible type. Expected types are float, double or string.", colPair.InputColumnName); } var metadata = new List <SchemaShape.Column>(); if (col.Annotations.TryFindColumn(AnnotationUtils.Kinds.SlotNames, out var slotMeta)) { metadata.Add(slotMeta); } if (col.Annotations.TryFindColumn(AnnotationUtils.Kinds.CategoricalSlotRanges, out var categoricalSlotMeta)) { metadata.Add(categoricalSlotMeta); } if (col.IsNormalized() && col.Annotations.TryFindColumn(AnnotationUtils.Kinds.IsNormalized, out var isNormalizedAnnotation)) { metadata.Add(isNormalizedAnnotation); } result[colPair.Name] = new SchemaShape.Column(colPair.Name, col.Kind, col.ItemType, false, new SchemaShape(metadata.ToArray())); } return(new SchemaShape(result.Values)); }
/// <summary> /// Trains and returns a <see cref="ITransformer"/>. /// </summary> public ITransformer Fit(IDataView input) { _host.CheckValue(input, nameof(input)); int[] colSizes; var scores = CountFeatureSelectionUtils.Train(_host, input, _columns.Select(column => column.InputColumnName).ToArray(), out colSizes); var size = _columns.Length; using (var ch = _host.Start("Dropping Slots")) { // If no slots should be dropped from a column, use copy column to generate the corresponding output column. SlotsDroppingTransformer.ColumnOptions[] dropSlotsColumns; (string outputColumnName, string inputColumnName)[] copyColumnsPairs;
/// <summary> /// Create method corresponding to SignatureDataTransform. /// </summary> public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(RegistrationName); host.CheckValue(args, nameof(args)); host.CheckValue(input, nameof(input)); host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); host.CheckUserArg(args.Count > 0, nameof(args.Count)); int[] colSizes; var scores = CountFeatureSelectionUtils.Train(host, input, args.Column, out colSizes); var size = args.Column.Length; using (var ch = host.Start("Dropping Slots")) { int[] selectedCount; var columns = CreateDropSlotsColumns(args, size, scores, out selectedCount); if (columns.Count <= 0) { ch.Info("No features are being dropped."); return(NopTransform.CreateIfNeeded(host, input)); } for (int i = 0; i < selectedCount.Length; i++) { ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount[i], colSizes[i], args.Column[i]); } ch.Info("Total number of slots selected: {0}", selectedCount.Sum()); var dsArgs = new DropSlotsTransform.Arguments(); dsArgs.Column = columns.ToArray(); return(new DropSlotsTransform(host, dsArgs, input)); } }