public SelectColumnsDataTransform(IHostEnvironment env, ColumnSelectingTransformer transform, Mapper mapper, IDataView input) { _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(SelectColumnsDataTransform)); _transform = transform; _mapper = mapper; Source = input; }
// Factory method for SignatureDataTransform. private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(options, nameof(options)); var transform = new ColumnSelectingTransformer(env, options.KeepColumns, options.DropColumns, options.KeepHidden, options.IgnoreMissing); return(new SelectColumnsDataTransform(env, transform, new Mapper(transform, input.Schema), input)); }
public Mapper(ColumnSelectingTransformer transform, DataViewSchema inputSchema) { _host = transform._host.Register(nameof(Mapper)); _inputSchema = inputSchema; OutputToInputMap = BuildOutputToInputMap(transform.SelectColumns, transform.KeepColumns, transform.KeepHidden, _inputSchema); OutputSchema = GenerateOutputSchema(OutputToInputMap, _inputSchema); }
/// Factory method for SignatureDataTransform. internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register("Categorical"); h.CheckValue(options, nameof(options)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); var replaceCols = new List <MissingValueReplacingEstimator.ColumnInfo>(); var naIndicatorCols = new List <MissingValueIndicatorTransformer.Column>(); var naConvCols = new List <TypeConvertingEstimator.ColumnInfo>(); var concatCols = new List <ColumnConcatenatingTransformer.TaggedColumn>(); var dropCols = new List <string>(); var tmpIsMissingColNames = input.Schema.GetTempColumnNames(options.Columns.Length, "IsMissing"); var tmpReplaceColNames = input.Schema.GetTempColumnNames(options.Columns.Length, "Replace"); for (int i = 0; i < options.Columns.Length; i++) { var column = options.Columns[i]; var addInd = column.ConcatIndicator ?? options.Concat; if (!addInd) { replaceCols.Add(new MissingValueReplacingEstimator.ColumnInfo(column.Name, column.Source, (MissingValueReplacingEstimator.ColumnInfo.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); continue; } // Check that the indicator column has a type that can be converted to the NAReplaceTransform output type, // so that they can be concatenated. if (!input.Schema.TryGetColumnIndex(column.Source, out int inputCol)) { throw h.Except("Column '{0}' does not exist", column.Source); } var replaceType = input.Schema[inputCol].Type; var replaceItemType = replaceType.GetItemType(); if (!Data.Conversion.Conversions.Instance.TryGetStandardConversion(BooleanDataViewType.Instance, replaceItemType, out Delegate conv, out bool identity)) { throw h.Except("Cannot concatenate indicator column of type '{0}' to input column of type '{1}'", BooleanDataViewType.Instance, replaceItemType); } // Find a temporary name for the NAReplaceTransform and NAIndicatorTransform output columns. var tmpIsMissingColName = tmpIsMissingColNames[i]; var tmpReplacementColName = tmpReplaceColNames[i]; // Add an NAHandleTransform column. naIndicatorCols.Add(new MissingValueIndicatorTransformer.Column() { Name = tmpIsMissingColName, Source = column.Source }); // Add a ConvertTransform column if necessary. if (!identity) { if (!replaceItemType.RawType.TryGetDataKind(out DataKind replaceItemTypeKind)) { throw h.Except("Cannot get a DataKind for type '{0}'", replaceItemType.RawType); } naConvCols.Add(new TypeConvertingEstimator.ColumnInfo(tmpIsMissingColName, replaceItemTypeKind, tmpIsMissingColName)); } // Add the NAReplaceTransform column. replaceCols.Add(new MissingValueReplacingEstimator.ColumnInfo(tmpReplacementColName, column.Source, (MissingValueReplacingEstimator.ColumnInfo.ReplacementMode)(column.Kind ?? options.ReplaceWith), column.ImputeBySlot ?? options.ImputeBySlot)); // Add the ConcatTransform column. if (replaceType is VectorType) { concatCols.Add(new ColumnConcatenatingTransformer.TaggedColumn() { Name = column.Name, Source = new[] { new KeyValuePair <string, string>(tmpReplacementColName, tmpReplacementColName), new KeyValuePair <string, string>("IsMissing", tmpIsMissingColName) } }); } else { concatCols.Add(new ColumnConcatenatingTransformer.TaggedColumn() { Name = column.Name, Source = new[] { new KeyValuePair <string, string>(column.Source, tmpReplacementColName), new KeyValuePair <string, string>(string.Format("IsMissing.{0}", column.Source), tmpIsMissingColName), } }); } // Add the temp column to the list of columns to drop at the end. dropCols.Add(tmpIsMissingColName); dropCols.Add(tmpReplacementColName); } IDataTransform output = null; // Create the indicator columns. if (naIndicatorCols.Count > 0) { output = MissingValueIndicatorTransformer.Create(h, new MissingValueIndicatorTransformer.Options() { Columns = naIndicatorCols.ToArray() }, input); } // Convert the indicator columns to the correct type so that they can be concatenated to the NAReplace outputs. if (naConvCols.Count > 0) { h.AssertValue(output); //REVIEW: all this need to be converted to estimatorChain as soon as we done with dropcolumns. output = new TypeConvertingTransformer(h, naConvCols.ToArray()).Transform(output) as IDataTransform; } // Create the NAReplace transform. output = MissingValueReplacingTransformer.Create(env, output ?? input, replaceCols.ToArray()); // Concat the NAReplaceTransform output and the NAIndicatorTransform output. if (naIndicatorCols.Count > 0) { output = ColumnConcatenatingTransformer.Create(h, new ColumnConcatenatingTransformer.TaggedOptions() { Columns = concatCols.ToArray() }, output); } // Finally, drop the temporary indicator columns. if (dropCols.Count > 0) { output = ColumnSelectingTransformer.CreateDrop(h, output, dropCols.ToArray()) as IDataTransform; } return(output); }
public static IDataTransform CreateDrop(IHostEnvironment env, IDataView input, params string[] dropColumns) { var transform = new ColumnSelectingTransformer(env, null, dropColumns); return(new SelectColumnsDataTransform(env, transform, new Mapper(transform, input.Schema), input)); }
public static IDataTransform CreateKeep(IHostEnvironment env, IDataView input, string[] keepColumns, bool keepHidden = false) { var transform = new ColumnSelectingTransformer(env, keepColumns, null, keepHidden); return(new SelectColumnsDataTransform(env, transform, new Mapper(transform, input.Schema), input)); }