コード例 #1
0
        /// <summary>
        /// Computes the types (column and slotnames), the length reduction, categorical feature indices
        /// and whether the column is suppressed.
        /// The slotsMin and slotsMax arrays should be sorted and the intervals should not overlap.
        /// </summary>
        /// <param name="input">The input schema</param>
        /// <param name="slotsMin">The beginning indices of the ranges of slots to be dropped</param>
        /// <param name="slotsMax">The end indices of the ranges of slots to be dropped</param>
        /// <param name="iinfo">The column index in Infos</param>
        /// <param name="slotDropper">The slots to be dropped.</param>
        /// <param name="suppressed">Whether the column is suppressed (all slots dropped)</param>
        /// <param name="type">The column type</param>
        /// <param name="categoricalRanges">Categorical feature indices.</param>
        private void ComputeType(Schema input, int[] slotsMin, int[] slotsMax, int iinfo,
                                 SlotDropper slotDropper, out bool suppressed, out ColumnType type, out int[] categoricalRanges)
        {
            Contracts.AssertValue(slotDropper);
            Contracts.AssertValue(input);
            Contracts.AssertNonEmpty(slotsMin);
            Contracts.AssertNonEmpty(slotsMax);
            Contracts.Assert(slotsMin.Length == slotsMax.Length);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);

            categoricalRanges = null;
            // Register for metadata. Propagate the IsNormalized metadata.
            using (var bldr = Metadata.BuildMetadata(iinfo, input, Infos[iinfo].Source,
                                                     MetadataUtils.Kinds.IsNormalized, MetadataUtils.Kinds.KeyValues))
            {
                var typeSrc = Infos[iinfo].TypeSrc;
                if (!typeSrc.IsVector)
                {
                    type       = typeSrc;
                    suppressed = slotsMin.Length > 0 && slotsMin[0] == 0;
                }
                else if (!typeSrc.IsKnownSizeVector)
                {
                    type       = typeSrc;
                    suppressed = false;
                }
                else
                {
                    Host.Assert(typeSrc.IsKnownSizeVector);
                    var dstLength    = slotDropper.DstLength;
                    var hasSlotNames = input.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.VectorSize);
                    type       = new VectorType(typeSrc.ItemType.AsPrimitive, Math.Max(dstLength, 1));
                    suppressed = dstLength == 0;

                    if (hasSlotNames && dstLength > 0)
                    {
                        // Add slot name metadata.
                        bldr.AddGetter <VBuffer <ReadOnlyMemory <char> > >(MetadataUtils.Kinds.SlotNames,
                                                                           new VectorType(TextType.Instance, dstLength), GetSlotNames);
                    }
                }

                if (!suppressed)
                {
                    if (MetadataUtils.TryGetCategoricalFeatureIndices(Source.Schema, Infos[iinfo].Source, out categoricalRanges))
                    {
                        VBuffer <int> dst = default(VBuffer <int>);
                        GetCategoricalSlotRangesCore(iinfo, slotDropper.SlotsMin, slotDropper.SlotsMax, categoricalRanges, ref dst);
                        // REVIEW: cache dst as opposed to caculating it again.
                        if (dst.Length > 0)
                        {
                            Contracts.Assert(dst.Length % 2 == 0);

                            bldr.AddGetter <VBuffer <int> >(MetadataUtils.Kinds.CategoricalSlotRanges,
                                                            MetadataUtils.GetCategoricalType(dst.Length / 2), GetCategoricalSlotRanges);
                        }
                    }
                }
            }
        }
コード例 #2
0
        private DropSlotsTransform(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, ctx, input, null)
        {
            Host.AssertValue(ctx);
            // *** Binary format ***
            // <base>
            // for each added column
            //   int[]: slotsMin
            //   int[]: slotsMax (no count)
            Host.AssertNonEmpty(Infos);
            var size = Infos.Length;

            _exes = new ColInfoEx[size];
            for (int i = 0; i < size; i++)
            {
                int[] slotsMin = ctx.Reader.ReadIntArray();
                Host.CheckDecode(Utils.Size(slotsMin) > 0);
                int[]       slotsMax = ctx.Reader.ReadIntArray(slotsMin.Length);
                bool        suppressed;
                ColumnType  typeDst;
                SlotDropper slotDropper = new SlotDropper(Infos[i].TypeSrc.ValueCount, slotsMin, slotsMax);
                int[]       categoricalRanges;
                ComputeType(input.Schema, slotsMin, slotsMax, i, slotDropper, out suppressed, out typeDst, out categoricalRanges);
                _exes[i] = new ColInfoEx(slotDropper, suppressed, typeDst, categoricalRanges);
                Host.CheckDecode(AreRangesValid(i));
            }
            Metadata.Seal();
        }
コード例 #3
0
 public ColInfoEx(SlotDropper slotDropper, bool suppressed, ColumnType typeDst, int[] categoricalRanges)
 {
     Contracts.AssertValue(slotDropper);
     SlotDropper       = slotDropper;
     Suppressed        = suppressed;
     TypeDst           = typeDst;
     CategoricalRanges = categoricalRanges;
 }
コード例 #4
0
        /// <summary>
        /// Public constructor corresponding to SignatureDataTransform.
        /// </summary>
        public DropSlotsTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, null)
        {
            Host.CheckNonEmpty(args.Column, nameof(args.Column));

            var size = Infos.Length;

            _exes = new ColInfoEx[size];
            for (int i = 0; i < size; i++)
            {
                var   col = args.Column[i];
                int[] slotsMin;
                int[] slotsMax;
                GetSlotsMinMax(col, out slotsMin, out slotsMax);
                SlotDropper slotDropper = new SlotDropper(Infos[i].TypeSrc.ValueCount, slotsMin, slotsMax);
                bool        suppressed;
                ColumnType  typeDst;
                int[]       categoricalRanges;
                ComputeType(Source.Schema, slotsMin, slotsMax, i, slotDropper, out suppressed, out typeDst, out categoricalRanges);
                _exes[i] = new ColInfoEx(slotDropper, suppressed, typeDst, categoricalRanges);
            }
            Metadata.Seal();
        }