コード例 #1
0
        /// <summary>
        /// Computes the types (column and slotnames), the length reduction, categorical feature indices
        /// and whether the column is suppressed.
        /// The slotsMin and slotsMax arrays should be sorted and the intervals should not overlap.
        /// </summary>
        /// <param name="input">The input schema</param>
        /// <param name="slotsMin">The beginning indices of the ranges of slots to be dropped</param>
        /// <param name="slotsMax">The end indices of the ranges of slots to be dropped</param>
        /// <param name="iinfo">The column index in Infos</param>
        /// <param name="slotDropper">The slots to be dropped.</param>
        /// <param name="suppressed">Whether the column is suppressed (all slots dropped)</param>
        /// <param name="type">The column type</param>
        /// <param name="categoricalRanges">Categorical feature indices.</param>
        private void ComputeType(Schema input, int[] slotsMin, int[] slotsMax, int iinfo,
                                 SlotDropper slotDropper, out bool suppressed, out ColumnType type, out int[] categoricalRanges)
        {
            Contracts.AssertValue(slotDropper);
            Contracts.AssertValue(input);
            Contracts.AssertNonEmpty(slotsMin);
            Contracts.AssertNonEmpty(slotsMax);
            Contracts.Assert(slotsMin.Length == slotsMax.Length);
            Contracts.Assert(0 <= iinfo && iinfo < Infos.Length);

            categoricalRanges = null;
            // Register for metadata. Propagate the IsNormalized metadata.
            using (var bldr = Metadata.BuildMetadata(iinfo, input, Infos[iinfo].Source,
                                                     MetadataUtils.Kinds.IsNormalized, MetadataUtils.Kinds.KeyValues))
            {
                var typeSrc = Infos[iinfo].TypeSrc;
                if (!typeSrc.IsVector)
                {
                    type       = typeSrc;
                    suppressed = slotsMin.Length > 0 && slotsMin[0] == 0;
                }
                else if (!typeSrc.IsKnownSizeVector)
                {
                    type       = typeSrc;
                    suppressed = false;
                }
                else
                {
                    Host.Assert(typeSrc.IsKnownSizeVector);
                    var dstLength    = slotDropper.DstLength;
                    var hasSlotNames = input.HasSlotNames(Infos[iinfo].Source, Infos[iinfo].TypeSrc.VectorSize);
                    type       = new VectorType(typeSrc.ItemType.AsPrimitive, Math.Max(dstLength, 1));
                    suppressed = dstLength == 0;

                    if (hasSlotNames && dstLength > 0)
                    {
                        // Add slot name metadata.
                        bldr.AddGetter <VBuffer <ReadOnlyMemory <char> > >(MetadataUtils.Kinds.SlotNames,
                                                                           new VectorType(TextType.Instance, dstLength), GetSlotNames);
                    }
                }

                if (!suppressed)
                {
                    if (MetadataUtils.TryGetCategoricalFeatureIndices(Source.Schema, Infos[iinfo].Source, out categoricalRanges))
                    {
                        VBuffer <int> dst = default(VBuffer <int>);
                        GetCategoricalSlotRangesCore(iinfo, slotDropper.SlotsMin, slotDropper.SlotsMax, categoricalRanges, ref dst);
                        // REVIEW: cache dst as opposed to caculating it again.
                        if (dst.Length > 0)
                        {
                            Contracts.Assert(dst.Length % 2 == 0);

                            bldr.AddGetter <VBuffer <int> >(MetadataUtils.Kinds.CategoricalSlotRanges,
                                                            MetadataUtils.GetCategoricalType(dst.Length / 2), GetCategoricalSlotRanges);
                        }
                    }
                }
            }
        }
コード例 #2
0
            private void CacheTypes(out ColumnType[] types, out ColumnType[] typesSlotNames, out bool[] echoSrc,
                                    out bool[] isNormalized, out ColumnType[] typesCategoricals)
            {
                Contracts.AssertNonEmpty(Infos);
                echoSrc           = new bool[Infos.Length];
                isNormalized      = new bool[Infos.Length];
                types             = new ColumnType[Infos.Length];
                typesSlotNames    = new ColumnType[Infos.Length];
                typesCategoricals = new ColumnType[Infos.Length];

                for (int i = 0; i < Infos.Length; i++)
                {
                    var info = Infos[i];
                    // REVIEW: Add support for implicit conversions?
                    if (info.SrcTypes.Length == 1 && info.SrcTypes[0].IsVector)
                    {
                        // All meta-data is passed through in this case, so don't need the slot names type.
                        echoSrc[i] = true;
                        DvBool b = DvBool.False;
                        isNormalized[i] =
                            info.SrcTypes[0].ItemType.IsNumber &&
                            Input.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, info.SrcIndices[0], ref b) &&
                            b.IsTrue;
                        types[i] = info.SrcTypes[0];
                        continue;
                    }

                    // The single scalar and multiple vector case.
                    isNormalized[i] = info.SrcTypes[0].ItemType.IsNumber;
                    if (isNormalized[i])
                    {
                        foreach (var srcCol in info.SrcIndices)
                        {
                            DvBool b = DvBool.False;
                            if (!Input.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, srcCol, ref b) ||
                                !b.IsTrue)
                            {
                                isNormalized[i] = false;
                                break;
                            }
                        }
                    }

                    types[i] = new VectorType(info.SrcTypes[0].ItemType.AsPrimitive, info.SrcSize);
                    if (info.SrcSize == 0)
                    {
                        continue;
                    }

                    bool hasCategoricals = false;
                    int  catCount        = 0;
                    for (int j = 0; j < info.SrcTypes.Length; j++)
                    {
                        if (info.SrcTypes[j].ValueCount == 0)
                        {
                            hasCategoricals = false;
                            break;
                        }

                        if (MetadataUtils.TryGetCategoricalFeatureIndices(Input, info.SrcIndices[j], out int[] typeCat))