private ValueGetter <VBuffer <ReadOnlyMemory <char> > > MakeGetterVec(DataViewRow input, int iinfo) { var getSrc = input.GetGetter <VBuffer <ReadOnlyMemory <char> > >(input.Schema[ColMapNewToOld[iinfo]]); Host.AssertValue(getSrc); var src = default(VBuffer <ReadOnlyMemory <char> >); var buffer = new StringBuilder(); var list = new List <ReadOnlyMemory <char> >(); var temp = default(ReadOnlyMemory <char>); return ((ref VBuffer <ReadOnlyMemory <char> > dst) => { getSrc(ref src); list.Clear(); var srcValues = src.GetValues(); for (int i = 0; i < srcValues.Length; i++) { NormalizeSrc(in srcValues[i], ref temp, buffer); if (!temp.IsEmpty) { list.Add(temp); } } VBufferUtils.Copy(list, ref dst, list.Count); }); }
private ValueGetter <VBuffer <ReadOnlyMemory <char> > > MakeGetterVec(IRow input, int iinfo) { var getSrc = GetSrcGetter <VBuffer <ReadOnlyMemory <char> > >(input, iinfo); Host.AssertValue(getSrc); var src = default(VBuffer <ReadOnlyMemory <char> >); var buffer = new StringBuilder(); var list = new List <ReadOnlyMemory <char> >(); var temp = default(ReadOnlyMemory <char>); return ((ref VBuffer <ReadOnlyMemory <char> > dst) => { getSrc(ref src); list.Clear(); for (int i = 0; i < src.Count; i++) { NormalizeSrc(ref src.Values[i], ref temp, buffer); if (!temp.IsEmpty) { list.Add(temp); } } VBufferUtils.Copy(list, ref dst, list.Count); }); }
protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) { Host.AssertValueOrNull(ch); Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText); disposer = null; var ex = _exes[iinfo]; Language stopWordslang = ex.Lang; var lang = default(ReadOnlyMemory <char>); var getLang = ex.LangsColIndex >= 0 ? input.GetGetter <ReadOnlyMemory <char> >(ex.LangsColIndex) : null; var getSrc = GetSrcGetter <VBuffer <ReadOnlyMemory <char> > >(input, iinfo); var src = default(VBuffer <ReadOnlyMemory <char> >); var buffer = new StringBuilder(); var list = new List <ReadOnlyMemory <char> >(); ValueGetter <VBuffer <ReadOnlyMemory <char> > > del = (ref VBuffer <ReadOnlyMemory <char> > dst) => { var langToUse = stopWordslang; UpdateLanguage(ref langToUse, getLang, ref lang); getSrc(ref src); list.Clear(); var srcValues = src.GetValues(); for (int i = 0; i < srcValues.Length; i++) { if (srcValues[i].IsEmpty) { continue; } buffer.Clear(); ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(srcValues[i].Span, buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (StopWords[(int)langToUse].Get(buffer) == null) { list.Add(srcValues[i]); } } VBufferUtils.Copy(list, ref dst, list.Count); }; return(del); }
protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer) { Host.AssertValueOrNull(ch); Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText); disposer = null; var getSrc = GetSrcGetter <VBuffer <DvText> >(input, iinfo); var src = default(VBuffer <DvText>); var buffer = new StringBuilder(); var list = new List <DvText>(); ValueGetter <VBuffer <DvText> > del = (ref VBuffer <DvText> dst) => { getSrc(ref src); list.Clear(); for (int i = 0; i < src.Count; i++) { if (!src.Values[i].HasChars) { continue; } buffer.Clear(); src.Values[i].AddLowerCaseToStringBuilder(buffer); // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.) if (_stopWordsMap.Get(buffer) == null) { list.Add(src.Values[i]); } } VBufferUtils.Copy(list, ref dst, list.Count); }; return(del); }
private void CopyLists() { VBufferUtils.Copy(_tokensList, ref _tokensBuffer, _tokensList.Count); if (_typesList != null) VBufferUtils.Copy(_typesList, ref _typesBuffer, _typesList.Count); }