示例#1
0
        private void ComposeNgramString(uint[] ngram, int count, StringBuilder sb, int keyCount, TermGetter termGetter)
        {
            Host.AssertValue(sb);
            Host.AssertValue(ngram);
            Host.Assert(keyCount > 0);

            sb.Clear();
            DvText term = default(DvText);
            string sep  = "";

            for (int iterm = 0; iterm < count; iterm++)
            {
                sb.Append(sep);
                sep = "|";
                var unigram = ngram[iterm];
                if (unigram <= 0 || unigram > keyCount)
                {
                    sb.Append("*");
                }
                else
                {
                    termGetter((int)unigram - 1, ref term);
                    term.AddToStringBuilder(sb);
                }
            }
        }
示例#2
0
        /// <summary>
        /// Gets the mapping from T into a StringBuilder representation, using various heuristics.
        /// This StringBuilder representation will be a component of the composed KeyValues for the
        /// hash outputs.
        /// </summary>
        public static ValueMapper <T, StringBuilder> GetSimpleMapper <T>(ISchema schema, int col)
        {
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.ColumnCount);
            var type = schema.GetColumnType(col).ItemType;

            Contracts.Assert(type.RawType == typeof(T));
            var conv = Conversion.Conversions.Instance;

            // First: if not key, then get the standard string converison.
            if (!type.IsKey)
            {
                return(conv.GetStringConversion <T>(type));
            }

            bool identity;

            // Second choice: if key, utilize the KeyValues metadata for that key, if it has one and is text.
            if (schema.HasKeyNames(col, type.KeyCount))
            {
                // REVIEW: Non-textual KeyValues are certainly possible. Should we handle them?
                // Get the key names.
                VBuffer <DvText> keyValues = default(VBuffer <DvText>);
                schema.GetMetadata(MetadataUtils.Kinds.KeyValues, col, ref keyValues);
                DvText value = default(DvText);

                // REVIEW: We could optimize for identity, but it's probably not worthwhile.
                var keyMapper = conv.GetStandardConversion <T, uint>(type, NumberType.U4, out identity);
                return
                    ((ref T src, ref StringBuilder dst) =>
                {
                    ClearDst(ref dst);
                    uint intermediate = 0;
                    keyMapper(ref src, ref intermediate);
                    if (intermediate == 0)
                    {
                        return;
                    }
                    keyValues.GetItemOrDefault((int)(intermediate - 1), ref value);
                    value.AddToStringBuilder(dst);
                });
            }

            // Third choice: just use the key value itself, subject to offsetting by the min.
            return(conv.GetKeyStringConversion <T>(type.AsKey));
        }