public SelectColumnsDataTransform(IHostEnvironment env, SelectColumnsTransform transform, Mapper mapper, IDataView input)
 {
     _host      = Contracts.CheckRef(env, nameof(env)).Register(nameof(SelectColumnsDataTransform));
     _transform = transform;
     _mapper    = mapper;
     Source     = input;
 }
        // Factory method for SignatureDataTransform.
        private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(args, nameof(args));
            var transform = new SelectColumnsTransform(env, args.KeepColumns, args.DropColumns,
                                                       args.KeepHidden, args.IgnoreMissing);

            return(new SelectColumnsDataTransform(env, transform, new Mapper(transform, input.Schema), input));
        }
            public Mapper(SelectColumnsTransform transform, ISchema inputSchema)
            {
                _host        = transform._host.Register(nameof(Mapper));
                _inputSchema = Runtime.Data.Schema.Create(inputSchema);

                _outputToInputMap = BuildOutputToInputMap(transform.SelectColumns,
                                                          transform.KeepColumns,
                                                          transform.KeepHidden,
                                                          _inputSchema);
                Schema = GenerateOutputSchema(_outputToInputMap, _inputSchema);
            }
        public static IDataTransform CreateDrop(IHostEnvironment env, IDataView input, params string[] dropColumns)
        {
            var transform = new SelectColumnsTransform(env, null, dropColumns);

            return(new SelectColumnsDataTransform(env, transform, new Mapper(transform, input.Schema), input));
        }
        public static IDataTransform CreateKeep(IHostEnvironment env, IDataView input, string[] keepColumns, bool keepHidden = false)
        {
            var transform = new SelectColumnsTransform(env, keepColumns, null, keepHidden);

            return(new SelectColumnsDataTransform(env, transform, new Mapper(transform, input.Schema), input));
        }
예제 #6
0
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register("Categorical");

            h.CheckValue(args, nameof(args));
            h.CheckValue(input, nameof(input));
            h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column));

            var replaceCols          = new List <NAReplaceTransform.ColumnInfo>();
            var naIndicatorCols      = new List <NAIndicatorTransform.Column>();
            var naConvCols           = new List <ConvertingTransform.ColumnInfo>();
            var concatCols           = new List <ConcatTransform.TaggedColumn>();
            var dropCols             = new List <string>();
            var tmpIsMissingColNames = input.Schema.GetTempColumnNames(args.Column.Length, "IsMissing");
            var tmpReplaceColNames   = input.Schema.GetTempColumnNames(args.Column.Length, "Replace");

            for (int i = 0; i < args.Column.Length; i++)
            {
                var column = args.Column[i];

                var addInd = column.ConcatIndicator ?? args.Concat;
                if (!addInd)
                {
                    replaceCols.Add(new NAReplaceTransform.ColumnInfo(column.Source, column.Name, (NAReplaceTransform.ColumnInfo.ReplacementMode)(column.Kind ?? args.ReplaceWith), column.ImputeBySlot ?? args.ImputeBySlot));
                    continue;
                }

                // Check that the indicator column has a type that can be converted to the NAReplaceTransform output type,
                // so that they can be concatenated.
                if (!input.Schema.TryGetColumnIndex(column.Source, out int inputCol))
                {
                    throw h.Except("Column '{0}' does not exist", column.Source);
                }
                var replaceType = input.Schema.GetColumnType(inputCol);
                if (!Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion(BoolType.Instance, replaceType.ItemType, out Delegate conv, out bool identity))
                {
                    throw h.Except("Cannot concatenate indicator column of type '{0}' to input column of type '{1}'",
                                   BoolType.Instance, replaceType.ItemType);
                }

                // Find a temporary name for the NAReplaceTransform and NAIndicatorTransform output columns.
                var tmpIsMissingColName   = tmpIsMissingColNames[i];
                var tmpReplacementColName = tmpReplaceColNames[i];

                // Add an NAHandleTransform column.
                naIndicatorCols.Add(new NAIndicatorTransform.Column()
                {
                    Name = tmpIsMissingColName, Source = column.Source
                });

                // Add a ConvertTransform column if necessary.
                if (!identity)
                {
                    naConvCols.Add(new ConvertingTransform.ColumnInfo(tmpIsMissingColName, tmpIsMissingColName, replaceType.ItemType.RawKind));
                }

                // Add the NAReplaceTransform column.
                replaceCols.Add(new NAReplaceTransform.ColumnInfo(column.Source, tmpReplacementColName, (NAReplaceTransform.ColumnInfo.ReplacementMode)(column.Kind ?? args.ReplaceWith), column.ImputeBySlot ?? args.ImputeBySlot));

                // Add the ConcatTransform column.
                if (replaceType.IsVector)
                {
                    concatCols.Add(new ConcatTransform.TaggedColumn()
                    {
                        Name   = column.Name,
                        Source = new[] {
                            new KeyValuePair <string, string>(tmpReplacementColName, tmpReplacementColName),
                            new KeyValuePair <string, string>("IsMissing", tmpIsMissingColName)
                        }
                    });
                }
                else
                {
                    concatCols.Add(new ConcatTransform.TaggedColumn()
                    {
                        Name   = column.Name,
                        Source = new[]
                        {
                            new KeyValuePair <string, string>(column.Source, tmpReplacementColName),
                            new KeyValuePair <string, string>(string.Format("IsMissing.{0}", column.Source), tmpIsMissingColName),
                        }
                    });
                }

                // Add the temp column to the list of columns to drop at the end.
                dropCols.Add(tmpIsMissingColName);
                dropCols.Add(tmpReplacementColName);
            }

            IDataTransform output = null;

            // Create the indicator columns.
            if (naIndicatorCols.Count > 0)
            {
                output = NAIndicatorTransform.Create(h, new NAIndicatorTransform.Arguments()
                {
                    Column = naIndicatorCols.ToArray()
                }, input);
            }

            // Convert the indicator columns to the correct type so that they can be concatenated to the NAReplace outputs.
            if (naConvCols.Count > 0)
            {
                h.AssertValue(output);
                //REVIEW: all this need to be converted to estimatorChain as soon as we done with dropcolumns.
                output = new ConvertingTransform(h, naConvCols.ToArray()).Transform(output) as IDataTransform;
            }
            // Create the NAReplace transform.
            output = NAReplaceTransform.Create(env, output ?? input, replaceCols.ToArray());

            // Concat the NAReplaceTransform output and the NAIndicatorTransform output.
            if (naIndicatorCols.Count > 0)
            {
                output = ConcatTransform.Create(h, new ConcatTransform.TaggedArguments()
                {
                    Column = concatCols.ToArray()
                }, output);
            }

            // Finally, drop the temporary indicator columns.
            if (dropCols.Count > 0)
            {
                output = SelectColumnsTransform.CreateDrop(h, output, dropCols.ToArray());
            }

            return(output);
        }