コード例 #1
0
        /// <summary>
        /// Computes the types (column and slotnames), the length reduction, categorical feature indices
        /// and whether the column is suppressed.
        /// The slotsMin and slotsMax arrays should be sorted and the intervals should not overlap.
        /// </summary>
        /// <param name="input">The input schema</param>
        /// <param name="slotsMin">The beginning indices of the ranges of slots to be dropped</param>
        /// <param name="slotsMax">The end indices of the ranges of slots to be dropped</param>
        /// <param name="iinfo">The column index in Infos</param>
        /// <param name="slotDropper">The slots to be dropped.</param>
        /// <param name="suppressed">Whether the column is suppressed (all slots dropped)</param>
        /// <param name="type">The column type</param>
        /// <param name="categoricalRanges">Categorical feature indices.</param>
        private void ComputeType(Schema input, int[] slotsMin, int[] slotsMax, int iinfo,
                                 SlotDropper slotDropper, out bool suppressed, out ColumnType type, out int[] categoricalRanges)
        {
            Contracts.AssertValue(slotDropper);
            Contracts.AssertValue(input);
            Contracts.AssertNonEmpty(slotsMin);
            Contracts.AssertNonEmpty(slotsMax);
            Contracts.Assert(slotsMin.Length == slotsMax.Length);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);

            categoricalRanges = null;
            // Register for metadata. Propagate the IsNormalized metadata.
            using (var bldr = Metadata.BuildMetadata(iinfo, input, Infos[iinfo].Source,
                                                     MetadataUtils.Kinds.IsNormalized, MetadataUtils.Kinds.KeyValues))
            {
                var typeSrc = Infos[iinfo].TypeSrc;
                if (!typeSrc.IsVector)
                {
                    type       = typeSrc;
                    suppressed = slotsMin.Length > 0 && slotsMin[0] == 0;
                }
                else if (!typeSrc.IsKnownSizeVector)
                {
                    type       = typeSrc;
                    suppressed = false;
                }
                else
                {
                    Host.Assert(typeSrc.IsKnownSizeVector);
                    var dstLength    = slotDropper.DstLength;
                    var hasSlotNames = input.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.VectorSize);
                    type       = new VectorType(typeSrc.ItemType.AsPrimitive, Math.Max(dstLength, 1));
                    suppressed = dstLength == 0;

                    if (hasSlotNames && dstLength > 0)
                    {
                        // Add slot name metadata.
                        bldr.AddGetter <VBuffer <ReadOnlyMemory <char> > >(MetadataUtils.Kinds.SlotNames,
                                                                           new VectorType(TextType.Instance, dstLength), GetSlotNames);
                    }
                }

                if (!suppressed)
                {
                    if (MetadataUtils.TryGetCategoricalFeatureIndices(Source.Schema, Infos[iinfo].Source, out categoricalRanges))
                    {
                        VBuffer <int> dst = default(VBuffer <int>);
                        GetCategoricalSlotRangesCore(iinfo, slotDropper.SlotsMin, slotDropper.SlotsMax, categoricalRanges, ref dst);
                        // REVIEW: cache dst as opposed to caculating it again.
                        if (dst.Length > 0)
                        {
                            Contracts.Assert(dst.Length % 2 == 0);

                            bldr.AddGetter <VBuffer <int> >(MetadataUtils.Kinds.CategoricalSlotRanges,
                                                            MetadataUtils.GetCategoricalType(dst.Length / 2), GetCategoricalSlotRanges);
                        }
                    }
                }
            }
        }
コード例 #2
0
        // Computes the column type and whether multiple indicator vectors need to be concatenated.
        // Also populates the metadata.
        private static void ComputeType(KeyToVectorTransform trans, ISchema input, int iinfo, ColInfo info, bool bag,
                                        MetadataDispatcher md, out VectorType type, out bool concat)
        {
            Contracts.AssertValue(trans);
            Contracts.AssertValue(input);
            Contracts.AssertValue(info);
            Contracts.Assert(info.TypeSrc.ItemType.IsKey);
            Contracts.AssertValue(md);

            int size = info.TypeSrc.ItemType.KeyCount;

            Contracts.Assert(size > 0);

            // See if the source has key names.
            var typeNames = input.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, info.Source);

            if (typeNames == null || !typeNames.IsKnownSizeVector || !typeNames.ItemType.IsText ||
                typeNames.VectorSize != size)
            {
                typeNames = null;
            }

            // Don't pass through any source column metadata.
            using (var bldr = md.BuildMetadata(iinfo))
            {
                if (bag || info.TypeSrc.ValueCount == 1)
                {
                    // Output is a single vector computed as the sum of the output indicator vectors.
                    concat = false;
                    type   = new VectorType(NumberType.Float, size);
                    if (typeNames != null)
                    {
                        bldr.AddGetter <VBuffer <DvText> >(MetadataUtils.Kinds.SlotNames, typeNames, trans.GetKeyNames);
                    }
                }
                else
                {
                    // Output is the concatenation of the multiple output indicator vectors.
                    concat = true;
                    type   = new VectorType(NumberType.Float, info.TypeSrc.ValueCount, size);
                    if (typeNames != null && type.VectorSize > 0)
                    {
                        bldr.AddGetter <VBuffer <DvText> >(MetadataUtils.Kinds.SlotNames,
                                                           new VectorType(TextType.Instance, type), trans.GetSlotNames);
                    }
                }

                if (!bag && info.TypeSrc.ValueCount > 0)
                {
                    bldr.AddGetter <VBuffer <DvInt4> >(MetadataUtils.Kinds.CategoricalSlotRanges,
                                                       MetadataUtils.GetCategoricalType(info.TypeSrc.ValueCount), trans.GetCategoricalSlotRanges);
                }

                if (!bag || info.TypeSrc.ValueCount == 1)
                {
                    bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, DvBool.True);
                }
            }
        }