Пример #1
0
        public HashSet <string> ToHashset(string text, NGramsType ngramsType)
        {
            var hs = new HashSet <string>();

            FillHashset(hs, text, ngramsType);
            return(hs);
        }
Пример #2
0
        public void FillHashset(HashSet <string> hs, string text, NGramsType ngramsType)
        {
            var terms = Run(text);   //Tokenizer.ParseText( text );

            hs.Clear();
            //NGramsType.NGram_1:
            foreach (var term in terms)
            {
                if (term != null)
                {
                    hs.Add(term);
                }
            }

            var ngrams = default(IEnumerable <string>);

            switch (ngramsType)
            {
            case NGramsType.NGram_2:
                ngrams = GetNGrams_2(terms);
                break;

            case NGramsType.NGram_3:
                ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms));
                break;

            case NGramsType.NGram_4:
                ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms)).Concat(GetNGrams_4(terms));
                break;
            }

            if (ngrams != null)
            {
                foreach (var ngram in ngrams)
                {
                    hs.Add(ngram);
                }
                ngrams = null;
            }
            terms = null;
        }
Пример #3
0
        public void Fill_TF_Dictionary(Dictionary <string, int> tfDictionary, string text, NGramsType ngramsType)
        {
            var terms = Run(text);

            tfDictionary.Clear();
            var count = default(int);

            //NGramsType.NGram_1:
            foreach (var term in terms)
            {
                //if ( term != null )
                //{
                if (tfDictionary.TryGetValue(term, out count))
                {
                    tfDictionary[term] = count + 1;
                }
                else
                {
                    tfDictionary.Add(term, 1);
                }
                //}
            }

            var ngrams = default(IEnumerable <string>);

            switch (ngramsType)
            {
            case NGramsType.NGram_2:
                ngrams = GetNGrams_2(terms);
                break;

            case NGramsType.NGram_3:
                ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms));
                break;

            case NGramsType.NGram_4:
                ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms)).Concat(GetNGrams_4(terms));
                break;
            }

            if (ngrams != null)
            {
                foreach (var ngram in ngrams)
                {
                    if (tfDictionary.TryGetValue(ngram, out count))
                    {
                        tfDictionary[ngram] = count + 1;
                    }
                    else
                    {
                        tfDictionary.Add(ngram, 1);
                    }
                }
                ngrams = null;
            }
            terms = null;
        }