private ReadOnlyMemory <char> Textify(ref StringBuilder sb, ref StringBuilder temp, ref char[] cbuffer, ref Pair[] buffer, HashSet <Pair> pairs) { Contracts.AssertValueOrNull(sb); Contracts.AssertValueOrNull(temp); Contracts.AssertValueOrNull(cbuffer); Contracts.AssertValueOrNull(buffer); Contracts.Assert(Utils.Size(pairs) > 0); int count = pairs.Count; // Keep things in the same order they were inserted, by sorting on order. Utils.EnsureSize(ref buffer, count); pairs.CopyTo(buffer); pairs.Clear(); // Optimize the one value case, where we don't have to use the string builder. if (count == 1) { var value = buffer[0].Value; _stringifyMapper(ref value, ref temp); return(Utils.Size(temp) > 0 ? temp.ToString().AsMemory() : String.Empty.AsMemory()); } Array.Sort(buffer, 0, count, Comparer <Pair> .Create((x, y) => x.Order - y.Order)); if (sb == null) { sb = new StringBuilder(); } Contracts.Assert(sb.Length == 0); // The more general collision case. sb.Append('{'); for (int i = 0; i < count; ++i) { var pair = buffer[i]; if (i > 0) { sb.Append(','); } var value = pair.Value; _stringifyMapper(ref value, ref temp); InvertHashUtils.AppendToEnd(temp, sb, ref cbuffer); } sb.Append('}'); var retval = sb.ToString().AsMemory(); sb.Clear(); return(retval); }
public NgramIdFinder Decorate(int iinfo, NgramIdFinder finder) { Contracts.Assert(0 <= iinfo && iinfo < _parent._bindings.InfoCount); Contracts.Assert(_iinfoToCollector[iinfo] == null); Contracts.AssertValue(finder); var srcIndices = _parent._bindings.Infos[iinfo].SrcIndices; // Define the mapper from the ngram, to text. ValueMapper <NGram, StringBuilder> stringMapper; StringBuilder temp = null; char[] buffer = null; if (srcIndices.Length == 1) { // No need to include the column name. This will just be "A" or "(A,B,C)" depending // on the n-arity of the ngram. var srcMap = _srcTextGetters[srcIndices[0]]; Contracts.AssertValue(srcMap); stringMapper = (ref NGram src, ref StringBuilder dst) => { Contracts.Assert(src.ISrcCol == 0); if (src.Lim == 1) { srcMap(ref src.Grams[0], ref dst); return; } ClearDst(ref dst); for (int i = 0; i < src.Lim; ++i) { if (i > 0) { dst.Append('|'); } srcMap(ref src.Grams[i], ref temp); InvertHashUtils.AppendToEnd(temp, dst, ref buffer); } }; } else { Contracts.Assert(srcIndices.Length > 1); string[] srcNames = _friendlyNames[iinfo]; if (srcNames == null) { srcNames = new string[srcIndices.Length]; for (int i = 0; i < srcIndices.Length; ++i) { srcNames[i] = _parent.Source.Schema.GetColumnName(srcIndices[i]); } } Contracts.Assert(Utils.Size(srcNames) == srcIndices.Length); string[] friendlyNames = _friendlyNames?[iinfo]; // We need to disambiguate the column name. This will be the same as the above format, // just instead of "<Stuff>" it would be with "ColumnName:<Stuff>". stringMapper = (ref NGram src, ref StringBuilder dst) => { var srcMap = _srcTextGetters[srcIndices[src.ISrcCol]]; Contracts.AssertValue(srcMap); ClearDst(ref dst); dst.Append(srcNames[src.ISrcCol]); dst.Append(':'); for (int i = 0; i < src.Lim; ++i) { if (i > 0) { dst.Append('|'); } srcMap(ref src.Grams[i], ref temp); InvertHashUtils.AppendToEnd(temp, dst, ref buffer); } }; } var collector = _iinfoToCollector[iinfo] = new InvertHashCollector <NGram>( _parent._bindings.Types[iinfo].VectorSize, _invertHashMaxCounts[iinfo], stringMapper, EqualityComparer <NGram> .Default, (ref NGram src, ref NGram dst) => dst = src.Clone()); return ((uint[] ngram, int lim, int icol, ref bool more) => { Contracts.Assert(0 <= icol && icol < srcIndices.Length); Contracts.AssertValue(_srcTextGetters[srcIndices[icol]]); var result = finder(ngram, lim, icol, ref more); // For the hashing NgramIdFinder, a result of -1 indicates that // a slot does not exist for the given ngram. We do not pass ngrams // that do not have a slot to the InvertHash collector. if (result != -1) { // The following ngram is "unsafe", in that the ngram array is actually // re-used. The collector will utilize its copier to make it safe, in // the event that this is a key it needs to keep. var ngramObj = new NGram(ngram, lim, icol); collector.Add(result, ngramObj); } return result; }); }