예제 #1
0
 public void AddWordVector(IChannel ch, string word, float[] wordVector)
 {
     ch.Assert(wordVector.Length == Dimension);
     if (_pool.Get(word) == null)
     {
         _pool.Add(word);
         WordVectors.AddRange(wordVector, Dimension);
     }
 }
예제 #2
0
 public NormStr FindInPool(NormStr.Pool pool)
 {
     Contracts.CheckValue(pool, nameof(pool));
     if (IsNA)
     {
         return(null);
     }
     return(pool.Get(_outerBuffer, _ichMin, IchLim));
 }
예제 #3
0
        private CustomStopWordsRemoverTransform(IHost host, ModelLoadContext ctx, IDataView input)
            : base(host, ctx, input, TestIsTextVector)
        {
            Host.AssertValue(ctx);

            using (var ch = Host.Start("Deserialization"))
            {
                // *** Binary format ***
                // <base>
                ch.AssertNonEmpty(Infos);

                const string dir       = "Stopwords";
                NormStr.Pool stopwrods = null;
                bool         res       = ctx.TryProcessSubModel(dir,
                                                                c =>
                {
                    Host.CheckValue(c, nameof(ctx));
                    c.CheckAtModel(GetStopwrodsManagerVersionInfo());

                    // *** Binary format ***
                    // int: number of stopwords
                    // int[]: stopwords string ids
                    int cstr = ctx.Reader.ReadInt32();
                    Host.CheckDecode(cstr > 0);

                    stopwrods = new NormStr.Pool();
                    for (int istr = 0; istr < cstr; istr++)
                    {
                        var nstr = stopwrods.Add(ctx.LoadString());
                        Host.CheckDecode(nstr.Id == istr);
                    }

                    // All stopwords are distinct.
                    Host.CheckDecode(stopwrods.Count == cstr);
                    // The deserialized pool should not have the empty string.
                    Host.CheckDecode(stopwrods.Get("") == null);
                });
                if (!res)
                {
                    throw Host.ExceptDecode();
                }

                _stopWordsMap = stopwrods;
                ch.Done();
            }
            Metadata.Seal();
        }
예제 #4
0
        protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer)
        {
            Host.AssertValueOrNull(ch);
            Host.AssertValue(input);
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector & Infos[iinfo].TypeSrc.ItemType.IsText);
            disposer = null;

            var getSrc = GetSrcGetter <VBuffer <ReadOnlyMemory <char> > >(input, iinfo);
            var src    = default(VBuffer <ReadOnlyMemory <char> >);
            var buffer = new StringBuilder();
            var list   = new List <ReadOnlyMemory <char> >();

            ValueGetter <VBuffer <ReadOnlyMemory <char> > > del =
                (ref VBuffer <ReadOnlyMemory <char> > dst) =>
            {
                getSrc(ref src);
                list.Clear();

                var srcValues = src.GetValues();
                for (int i = 0; i < srcValues.Length; i++)
                {
                    if (srcValues[i].IsEmpty)
                    {
                        continue;
                    }
                    buffer.Clear();
                    ReadOnlyMemoryUtils.AddLowerCaseToStringBuilder(srcValues[i].Span, buffer);

                    // REVIEW nihejazi: Consider using a trie for string matching (Aho-Corasick, etc.)
                    if (_stopWordsMap.Get(buffer) == null)
                    {
                        list.Add(srcValues[i]);
                    }
                }

                VBufferUtils.Copy(list, ref dst, list.Count);
            };

            return(del);
        }
 public static NormStr FindInPool(ReadOnlyMemory <char> memory, NormStr.Pool pool)
 {
     Contracts.CheckValue(pool, nameof(pool));
     return(pool.Get(memory));
 }