/// <summary> /// Computes the types (column and slotnames), the length reduction, categorical feature indices /// and whether the column is suppressed. /// The slotsMin and slotsMax arrays should be sorted and the intervals should not overlap. /// </summary> /// <param name="input">The input schema</param> /// <param name="slotsMin">The beginning indices of the ranges of slots to be dropped</param> /// <param name="slotsMax">The end indices of the ranges of slots to be dropped</param> /// <param name="iinfo">The column index in Infos</param> /// <param name="slotDropper">The slots to be dropped.</param> /// <param name="suppressed">Whether the column is suppressed (all slots dropped)</param> /// <param name="type">The column type</param> /// <param name="categoricalRanges">Categorical feature indices.</param> private void ComputeType(Schema input, int[] slotsMin, int[] slotsMax, int iinfo, SlotDropper slotDropper, out bool suppressed, out ColumnType type, out int[] categoricalRanges) { Contracts.AssertValue(slotDropper); Contracts.AssertValue(input); Contracts.AssertNonEmpty(slotsMin); Contracts.AssertNonEmpty(slotsMax); Contracts.Assert(slotsMin.Length == slotsMax.Length); Contracts.Assert(0 <= iinfo && iinfo < Infos.Length); categoricalRanges = null; // Register for metadata. Propagate the IsNormalized metadata. using (var bldr = Metadata.BuildMetadata(iinfo, input, Infos[iinfo].Source, MetadataUtils.Kinds.IsNormalized, MetadataUtils.Kinds.KeyValues)) { var typeSrc = Infos[iinfo].TypeSrc; if (!typeSrc.IsVector) { type = typeSrc; suppressed = slotsMin.Length > 0 && slotsMin[0] == 0; } else if (!typeSrc.IsKnownSizeVector) { type = typeSrc; suppressed = false; } else { Host.Assert(typeSrc.IsKnownSizeVector); var dstLength = slotDropper.DstLength; var hasSlotNames = input.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.VectorSize); type = new VectorType(typeSrc.ItemType.AsPrimitive, Math.Max(dstLength, 1)); suppressed = dstLength == 0; if (hasSlotNames && dstLength > 0) { // Add slot name metadata. bldr.AddGetter <VBuffer <ReadOnlyMemory <char> > >(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, dstLength), GetSlotNames); } } if (!suppressed) { if (MetadataUtils.TryGetCategoricalFeatureIndices(Source.Schema, Infos[iinfo].Source, out categoricalRanges)) { VBuffer <int> dst = default(VBuffer <int>); GetCategoricalSlotRangesCore(iinfo, slotDropper.SlotsMin, slotDropper.SlotsMax, categoricalRanges, ref dst); // REVIEW: cache dst as opposed to caculating it again. if (dst.Length > 0) { Contracts.Assert(dst.Length % 2 == 0); bldr.AddGetter <VBuffer <int> >(MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.GetCategoricalType(dst.Length / 2), GetCategoricalSlotRanges); } } } } }
private void CacheTypes(out ColumnType[] types, out ColumnType[] typesSlotNames, out bool[] echoSrc, out bool[] isNormalized, out ColumnType[] typesCategoricals) { Contracts.AssertNonEmpty(Infos); echoSrc = new bool[Infos.Length]; isNormalized = new bool[Infos.Length]; types = new ColumnType[Infos.Length]; typesSlotNames = new ColumnType[Infos.Length]; typesCategoricals = new ColumnType[Infos.Length]; for (int i = 0; i < Infos.Length; i++) { var info = Infos[i]; // REVIEW: Add support for implicit conversions? if (info.SrcTypes.Length == 1 && info.SrcTypes[0].IsVector) { // All meta-data is passed through in this case, so don't need the slot names type. echoSrc[i] = true; DvBool b = DvBool.False; isNormalized[i] = info.SrcTypes[0].ItemType.IsNumber && Input.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, info.SrcIndices[0], ref b) && b.IsTrue; types[i] = info.SrcTypes[0]; continue; } // The single scalar and multiple vector case. isNormalized[i] = info.SrcTypes[0].ItemType.IsNumber; if (isNormalized[i]) { foreach (var srcCol in info.SrcIndices) { DvBool b = DvBool.False; if (!Input.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, srcCol, ref b) || !b.IsTrue) { isNormalized[i] = false; break; } } } types[i] = new VectorType(info.SrcTypes[0].ItemType.AsPrimitive, info.SrcSize); if (info.SrcSize == 0) { continue; } bool hasCategoricals = false; int catCount = 0; for (int j = 0; j < info.SrcTypes.Length; j++) { if (info.SrcTypes[j].ValueCount == 0) { hasCategoricals = false; break; } if (MetadataUtils.TryGetCategoricalFeatureIndices(Input, info.SrcIndices[j], out int[] typeCat))