private void ComposeNgramString(uint[] ngram, int count, StringBuilder sb, int keyCount, TermGetter termGetter) { Host.AssertValue(sb); Host.AssertValue(ngram); Host.Assert(keyCount > 0); sb.Clear(); DvText term = default(DvText); string sep = ""; for (int iterm = 0; iterm < count; iterm++) { sb.Append(sep); sep = "|"; var unigram = ngram[iterm]; if (unigram <= 0 || unigram > keyCount) { sb.Append("*"); } else { termGetter((int)unigram - 1, ref term); term.AddToStringBuilder(sb); } } }
/// <summary> /// Gets the mapping from T into a StringBuilder representation, using various heuristics. /// This StringBuilder representation will be a component of the composed KeyValues for the /// hash outputs. /// </summary> public static ValueMapper <T, StringBuilder> GetSimpleMapper <T>(ISchema schema, int col) { Contracts.AssertValue(schema); Contracts.Assert(0 <= col && col < schema.ColumnCount); var type = schema.GetColumnType(col).ItemType; Contracts.Assert(type.RawType == typeof(T)); var conv = Conversion.Conversions.Instance; // First: if not key, then get the standard string converison. if (!type.IsKey) { return(conv.GetStringConversion <T>(type)); } bool identity; // Second choice: if key, utilize the KeyValues metadata for that key, if it has one and is text. if (schema.HasKeyNames(col, type.KeyCount)) { // REVIEW: Non-textual KeyValues are certainly possible. Should we handle them? // Get the key names. VBuffer <DvText> keyValues = default(VBuffer <DvText>); schema.GetMetadata(MetadataUtils.Kinds.KeyValues, col, ref keyValues); DvText value = default(DvText); // REVIEW: We could optimize for identity, but it's probably not worthwhile. var keyMapper = conv.GetStandardConversion <T, uint>(type, NumberType.U4, out identity); return ((ref T src, ref StringBuilder dst) => { ClearDst(ref dst); uint intermediate = 0; keyMapper(ref src, ref intermediate); if (intermediate == 0) { return; } keyValues.GetItemOrDefault((int)(intermediate - 1), ref value); value.AddToStringBuilder(dst); }); } // Third choice: just use the key value itself, subject to offsetting by the min. return(conv.GetKeyStringConversion <T>(type.AsKey)); }