/// <summary> /// Computes the types (column and slotnames), the length reduction, categorical feature indices /// and whether the column is suppressed. /// The slotsMin and slotsMax arrays should be sorted and the intervals should not overlap. /// </summary> /// <param name="input">The input schema</param> /// <param name="slotsMin">The beginning indices of the ranges of slots to be dropped</param> /// <param name="slotsMax">The end indices of the ranges of slots to be dropped</param> /// <param name="iinfo">The column index in Infos</param> /// <param name="slotDropper">The slots to be dropped.</param> /// <param name="suppressed">Whether the column is suppressed (all slots dropped)</param> /// <param name="type">The column type</param> /// <param name="categoricalRanges">Categorical feature indices.</param> private void ComputeType(Schema input, int[] slotsMin, int[] slotsMax, int iinfo, SlotDropper slotDropper, out bool suppressed, out ColumnType type, out int[] categoricalRanges) { Contracts.AssertValue(slotDropper); Contracts.AssertValue(input); Contracts.AssertNonEmpty(slotsMin); Contracts.AssertNonEmpty(slotsMax); Contracts.Assert(slotsMin.Length == slotsMax.Length); Contracts.Assert(0 <= iinfo && iinfo < Infos.Length); categoricalRanges = null; // Register for metadata. Propagate the IsNormalized metadata. using (var bldr = Metadata.BuildMetadata(iinfo, input, Infos[iinfo].Source, MetadataUtils.Kinds.IsNormalized, MetadataUtils.Kinds.KeyValues)) { var typeSrc = Infos[iinfo].TypeSrc; if (!typeSrc.IsVector) { type = typeSrc; suppressed = slotsMin.Length > 0 && slotsMin[0] == 0; } else if (!typeSrc.IsKnownSizeVector) { type = typeSrc; suppressed = false; } else { Host.Assert(typeSrc.IsKnownSizeVector); var dstLength = slotDropper.DstLength; var hasSlotNames = input.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.VectorSize); type = new VectorType(typeSrc.ItemType.AsPrimitive, Math.Max(dstLength, 1)); suppressed = dstLength == 0; if (hasSlotNames && dstLength > 0) { // Add slot name metadata. bldr.AddGetter <VBuffer <ReadOnlyMemory <char> > >(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, dstLength), GetSlotNames); } } if (!suppressed) { if (MetadataUtils.TryGetCategoricalFeatureIndices(Source.Schema, Infos[iinfo].Source, out categoricalRanges)) { VBuffer <int> dst = default(VBuffer <int>); GetCategoricalSlotRangesCore(iinfo, slotDropper.SlotsMin, slotDropper.SlotsMax, categoricalRanges, ref dst); // REVIEW: cache dst as opposed to caculating it again. if (dst.Length > 0) { Contracts.Assert(dst.Length % 2 == 0); bldr.AddGetter <VBuffer <int> >(MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.GetCategoricalType(dst.Length / 2), GetCategoricalSlotRanges); } } } } }
private DropSlotsTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, null) { Host.AssertValue(ctx); // *** Binary format *** // <base> // for each added column // int[]: slotsMin // int[]: slotsMax (no count) Host.AssertNonEmpty(Infos); var size = Infos.Length; _exes = new ColInfoEx[size]; for (int i = 0; i < size; i++) { int[] slotsMin = ctx.Reader.ReadIntArray(); Host.CheckDecode(Utils.Size(slotsMin) > 0); int[] slotsMax = ctx.Reader.ReadIntArray(slotsMin.Length); bool suppressed; ColumnType typeDst; SlotDropper slotDropper = new SlotDropper(Infos[i].TypeSrc.ValueCount, slotsMin, slotsMax); int[] categoricalRanges; ComputeType(input.Schema, slotsMin, slotsMax, i, slotDropper, out suppressed, out typeDst, out categoricalRanges); _exes[i] = new ColInfoEx(slotDropper, suppressed, typeDst, categoricalRanges); Host.CheckDecode(AreRangesValid(i)); } Metadata.Seal(); }
public ColInfoEx(SlotDropper slotDropper, bool suppressed, ColumnType typeDst, int[] categoricalRanges) { Contracts.AssertValue(slotDropper); SlotDropper = slotDropper; Suppressed = suppressed; TypeDst = typeDst; CategoricalRanges = categoricalRanges; }
/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public DropSlotsTransform(IHostEnvironment env, Arguments args, IDataView input) : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, null) { Host.CheckNonEmpty(args.Column, nameof(args.Column)); var size = Infos.Length; _exes = new ColInfoEx[size]; for (int i = 0; i < size; i++) { var col = args.Column[i]; int[] slotsMin; int[] slotsMax; GetSlotsMinMax(col, out slotsMin, out slotsMax); SlotDropper slotDropper = new SlotDropper(Infos[i].TypeSrc.ValueCount, slotsMin, slotsMax); bool suppressed; ColumnType typeDst; int[] categoricalRanges; ComputeType(Source.Schema, slotsMin, slotsMax, i, slotDropper, out suppressed, out typeDst, out categoricalRanges); _exes[i] = new ColInfoEx(slotDropper, suppressed, typeDst, categoricalRanges); } Metadata.Seal(); }