Пример #1
0
            private ValueGetter <VBuffer <ReadOnlyMemory <char> > > MakeGetterVec(DataViewRow input, int iinfo)
            {
                var getSrc = input.GetGetter <VBuffer <ReadOnlyMemory <char> > >(input.Schema[ColMapNewToOld[iinfo]]);

                Host.AssertValue(getSrc);
                var src    = default(VBuffer <ReadOnlyMemory <char> >);
                var buffer = new StringBuilder();
                var list   = new List <ReadOnlyMemory <char> >();
                var temp   = default(ReadOnlyMemory <char>);

                return
                    ((ref VBuffer <ReadOnlyMemory <char> > dst) =>
                {
                    getSrc(ref src);
                    list.Clear();
                    var srcValues = src.GetValues();
                    for (int i = 0; i < srcValues.Length; i++)
                    {
                        NormalizeSrc(in srcValues[i], ref temp, buffer);
                        if (!temp.IsEmpty)
                        {
                            list.Add(temp);
                        }
                    }

                    VBufferUtils.Copy(list, ref dst, list.Count);
                });
            }
        private ValueGetter <VBuffer <ReadOnlyMemory <char> > > MakeGetterVec(IRow input, int iinfo)
        {
            var getSrc = GetSrcGetter <VBuffer <ReadOnlyMemory <char> > >(input, iinfo);

            Host.AssertValue(getSrc);
            var src    = default(VBuffer <ReadOnlyMemory <char> >);
            var buffer = new StringBuilder();
            var list   = new List <ReadOnlyMemory <char> >();
            var temp   = default(ReadOnlyMemory <char>);

            return
                ((ref VBuffer <ReadOnlyMemory <char> > dst) =>
            {
                getSrc(ref src);
                list.Clear();
                for (int i = 0; i < src.Count; i++)
                {
                    NormalizeSrc(ref src.Values[i], ref temp, buffer);
                    if (!temp.IsEmpty)
                    {
                        list.Add(temp);
                    }
                }

                VBufferUtils.Copy(list, ref dst, list.Count);
            });
        }
Пример #3
0
        protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer)
        {
            Host.AssertValueOrNull(ch);
            Host.AssertValue(input);
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText);
            disposer = null;

            var      ex            = _exes[iinfo];
            Language stopWordslang = ex.Lang;
            var      lang          = default(ReadOnlyMemory <char>);
            var      getLang       = ex.LangsColIndex >= 0 ? input.GetGetter <ReadOnlyMemory <char> >(ex.LangsColIndex) : null;

            var getSrc = GetSrcGetter <VBuffer <ReadOnlyMemory <char> > >(input, iinfo);
            var src    = default(VBuffer <ReadOnlyMemory <char> >);
            var buffer = new StringBuilder();
            var list   = new List <ReadOnlyMemory <char> >();

            ValueGetter <VBuffer <ReadOnlyMemory <char> > > del =
                (ref VBuffer <ReadOnlyMemory <char> > dst) =>
            {
                var langToUse = stopWordslang;
                UpdateLanguage(ref langToUse, getLang, ref lang);

                getSrc(ref src);
                list.Clear();

                var srcValues = src.GetValues();
                for (int i = 0; i < srcValues.Length; i++)
                {
                    if (srcValues[i].IsEmpty)
                    {
                        continue;
                    }
                    buffer.Clear();
                    ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(srcValues[i].Span, buffer);

                    // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.)
                    if (StopWords[(int)langToUse].Get(buffer) == null)
                    {
                        list.Add(srcValues[i]);
                    }
                }

                VBufferUtils.Copy(list, ref dst, list.Count);
            };

            return(del);
        }
Пример #4
0
        protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer)
        {
            Host.AssertValueOrNull(ch);
            Host.AssertValue(input);
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText);
            disposer = null;

            var getSrc = GetSrcGetter <VBuffer <DvText> >(input, iinfo);
            var src    = default(VBuffer <DvText>);
            var buffer = new StringBuilder();
            var list   = new List <DvText>();

            ValueGetter <VBuffer <DvText> > del =
                (ref VBuffer <DvText> dst) =>
            {
                getSrc(ref src);
                list.Clear();

                for (int i = 0; i < src.Count; i++)
                {
                    if (!src.Values[i].HasChars)
                    {
                        continue;
                    }
                    buffer.Clear();
                    src.Values[i].AddLowerCaseToStringBuilder(buffer);

                    // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.)
                    if (_stopWordsMap.Get(buffer) == null)
                    {
                        list.Add(src.Values[i]);
                    }
                }

                VBufferUtils.Copy(list, ref dst, list.Count);
            };

            return(del);
        }
 private void CopyLists()
 {
     VBufferUtils.Copy(_tokensList, ref _tokensBuffer, _tokensList.Count);
     if (_typesList != null)
         VBufferUtils.Copy(_typesList, ref _typesBuffer, _typesList.Count);
 }