public void AddWordVector(IChannel ch, string word, float[] wordVector) { ch.Assert(wordVector.Length == Dimension); if (_pool.Get(word) == null) { _pool.Add(word); WordVectors.AddRange(wordVector, Dimension); } }
public NormStr FindInPool(NormStr.Pool pool) { Contracts.CheckValue(pool, nameof(pool)); if (IsNA) { return(null); } return(pool.Get(_outerBuffer, _ichMin, IchLim)); }
private CustomStopWordsRemoverTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, ctx, input, TestIsTextVector) { Host.AssertValue(ctx); using (var ch = Host.Start("Deserialization")) { // *** Binary format *** // <base> ch.AssertNonEmpty(Infos); const string dir = "Stopwords"; NormStr.Pool stopwrods = null; bool res = ctx.TryProcessSubModel(dir, c => { Host.CheckValue(c, nameof(ctx)); c.CheckAtModel(GetStopwrodsManagerVersionInfo()); // *** Binary format *** // int: number of stopwords // int[]: stopwords string ids int cstr = ctx.Reader.ReadInt32(); Host.CheckDecode(cstr > 0); stopwrods = new NormStr.Pool(); for (int istr = 0; istr < cstr; istr++) { var nstr = stopwrods.Add(ctx.LoadString()); Host.CheckDecode(nstr.Id == istr); } // All stopwords are distinct. Host.CheckDecode(stopwrods.Count == cstr); // The deserialized pool should not have the empty string. Host.CheckDecode(stopwrods.Get("") == null); }); if (!res) { throw Host.ExceptDecode(); } _stopWordsMap = stopwrods; ch.Done(); } Metadata.Seal(); }
protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) { Host.AssertValueOrNull(ch); Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText); disposer = null; var getSrc = GetSrcGetter <VBuffer <ReadOnlyMemory <char> > >(input, iinfo); var src = default(VBuffer <ReadOnlyMemory <char> >); var buffer = new StringBuilder(); var list = new List <ReadOnlyMemory <char> >(); ValueGetter <VBuffer <ReadOnlyMemory <char> > > del = (ref VBuffer <ReadOnlyMemory <char> > dst) => { getSrc(ref src); list.Clear(); var srcValues = src.GetValues(); for (int i = 0; i < srcValues.Length; i++) { if (srcValues[i].IsEmpty) { continue; } buffer.Clear(); ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(srcValues[i].Span, buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (_stopWordsMap.Get(buffer) == null) { list.Add(srcValues[i]); } } VBufferUtils.Copy(list, ref dst, list.Count); }; return(del); }
public static NormStr FindInPool(ReadOnlyMemory <char> memory, NormStr.Pool pool) { Contracts.CheckValue(pool, nameof(pool)); return(pool.Get(memory)); }