public static CommonOutputs.TransformOutput Replace(IHostEnvironment env, NAReplaceTransform.Arguments input) { var h = EntryPointUtils.CheckArgsAndCreateHost(env, "NAReplace", input); var xf = NAReplaceTransform.Create(h, input, input.Data); return new CommonOutputs.TransformOutput() { Model = new TransformModel(h, xf, input.Data), OutputData = xf }; }
public SlotCursor(NAReplaceTransform parent, int iinfo, ISlotCursor cursor, VectorType type) : base(parent.Host, cursor) { Ch.Assert(0 <= iinfo && iinfo < parent.Infos.Length); Ch.AssertValue(cursor); Ch.AssertValue(type); var srcGetter = cursor.GetGetter <T>(); _type = type; _getter = CreateGetter(parent, iinfo, cursor, type); }
private ValueGetter <VBuffer <T> > CreateGetter(NAReplaceTransform parent, int iinfo, ISlotCursor cursor, VectorType type) { var src = default(VBuffer <T>); ValueGetter <VBuffer <T> > getter; var getSrc = cursor.GetGetter <T>(); var isNA = (RefPredicate <T>)parent._isNAs[iinfo]; var isDefault = Conversions.Instance.GetIsDefaultPredicate <T>(type.ItemType); if (parent._repIsDefault[iinfo] == null) { // One replacement value for all slots. Ch.Assert(parent._repValues[iinfo] is T); T rep = (T)parent._repValues[iinfo]; bool repIsDefault = isDefault(ref rep); return((ref VBuffer <T> dst) => { getSrc(ref src); parent.FillValues(ref src, ref dst, isNA, rep, repIsDefault); }); } // Replacement values by slot. Ch.Assert(parent._repValues[iinfo] is T[]); // The replacement array. T[] repArray = (T[])parent._repValues[iinfo]; return(getter = (ref VBuffer <T> dst) => { getSrc(ref src); Ch.Check(0 <= Position && Position < repArray.Length); T rep = repArray[(int)Position]; parent.FillValues(ref src, ref dst, isNA, rep, isDefault(ref rep)); }); }
private ISlotCursor GetSlotCursorCore <T>(NAReplaceTransform parent, int iinfo, ISlotCursor cursor, VectorType type) => new SlotCursor <T>(parent, iinfo, cursor, type);
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register("Categorical"); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); var replaceCols = new List <NAReplaceTransform.ColumnInfo>(); var naIndicatorCols = new List <NAIndicatorTransform.Column>(); var naConvCols = new List <ConvertTransform.Column>(); var concatCols = new List <ConcatTransform.TaggedColumn>(); var dropCols = new List <string>(); var tmpIsMissingColNames = input.Schema.GetTempColumnNames(args.Column.Length, "IsMissing"); var tmpReplaceColNames = input.Schema.GetTempColumnNames(args.Column.Length, "Replace"); for (int i = 0; i < args.Column.Length; i++) { var column = args.Column[i]; var addInd = column.ConcatIndicator ?? args.Concat; if (!addInd) { replaceCols.Add(new NAReplaceTransform.ColumnInfo(column.Source, column.Name, (NAReplaceTransform.ColumnInfo.ReplacementMode)(column.Kind ?? args.ReplaceWith), column.ImputeBySlot ?? args.ImputeBySlot)); continue; } // Check that the indicator column has a type that can be converted to the NAReplaceTransform output type, // so that they can be concatenated. if (!input.Schema.TryGetColumnIndex(column.Source, out int inputCol)) { throw h.Except("Column '{0}' does not exist", column.Source); } var replaceType = input.Schema.GetColumnType(inputCol); if (!Conversions.Instance.TryGetStandardConversion(BoolType.Instance, replaceType.ItemType, out Delegate conv, out bool identity)) { throw h.Except("Cannot concatenate indicator column of type '{0}' to input column of type '{1}'", BoolType.Instance, replaceType.ItemType); } // Find a temporary name for the NAReplaceTransform and NAIndicatorTransform output columns. var tmpIsMissingColName = tmpIsMissingColNames[i]; var tmpReplacementColName = tmpReplaceColNames[i]; // Add an NAHandleTransform column. naIndicatorCols.Add(new NAIndicatorTransform.Column() { Name = tmpIsMissingColName, Source = column.Source }); // Add a ConvertTransform column if necessary. if (!identity) { naConvCols.Add(new ConvertTransform.Column() { Name = tmpIsMissingColName, Source = tmpIsMissingColName, ResultType = replaceType.ItemType.RawKind }); } // Add the NAReplaceTransform column. replaceCols.Add(new NAReplaceTransform.ColumnInfo(column.Source, tmpReplacementColName, (NAReplaceTransform.ColumnInfo.ReplacementMode)(column.Kind ?? args.ReplaceWith), column.ImputeBySlot ?? args.ImputeBySlot)); // Add the ConcatTransform column. if (replaceType.IsVector) { concatCols.Add(new ConcatTransform.TaggedColumn() { Name = column.Name, Source = new[] { new KeyValuePair <string, string>(tmpReplacementColName, tmpReplacementColName), new KeyValuePair <string, string>("IsMissing", tmpIsMissingColName) } }); } else { concatCols.Add(new ConcatTransform.TaggedColumn() { Name = column.Name, Source = new[] { new KeyValuePair <string, string>(column.Source, tmpReplacementColName), new KeyValuePair <string, string>(string.Format("IsMissing.{0}", column.Source), tmpIsMissingColName), } }); } // Add the temp column to the list of columns to drop at the end. dropCols.Add(tmpIsMissingColName); dropCols.Add(tmpReplacementColName); } IDataTransform output = null; // Create the indicator columns. if (naIndicatorCols.Count > 0) { output = NAIndicatorTransform.Create(h, new NAIndicatorTransform.Arguments() { Column = naIndicatorCols.ToArray() }, input); } // Convert the indicator columns to the correct type so that they can be concatenated to the NAReplace outputs. if (naConvCols.Count > 0) { h.AssertValue(output); output = new ConvertTransform(h, new ConvertTransform.Arguments() { Column = naConvCols.ToArray() }, output); } // Create the NAReplace transform. output = NAReplaceTransform.Create(env, output ?? input, replaceCols.ToArray()); // Concat the NAReplaceTransform output and the NAIndicatorTransform output. if (naIndicatorCols.Count > 0) { output = ConcatTransform.Create(h, new ConcatTransform.TaggedArguments() { Column = concatCols.ToArray() }, output); } // Finally, drop the temporary indicator columns. if (dropCols.Count > 0) { output = new DropColumnsTransform(h, new DropColumnsTransform.Arguments() { Column = dropCols.ToArray() }, output); } return(output); }