public HashSet <string> ToHashset(string text, NGramsType ngramsType) { var hs = new HashSet <string>(); FillHashset(hs, text, ngramsType); return(hs); }
public void FillHashset(HashSet <string> hs, string text, NGramsType ngramsType) { var terms = Run(text); //Tokenizer.ParseText( text ); hs.Clear(); //NGramsType.NGram_1: foreach (var term in terms) { if (term != null) { hs.Add(term); } } var ngrams = default(IEnumerable <string>); switch (ngramsType) { case NGramsType.NGram_2: ngrams = GetNGrams_2(terms); break; case NGramsType.NGram_3: ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms)); break; case NGramsType.NGram_4: ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms)).Concat(GetNGrams_4(terms)); break; } if (ngrams != null) { foreach (var ngram in ngrams) { hs.Add(ngram); } ngrams = null; } terms = null; }
public void Fill_TF_Dictionary(Dictionary <string, int> tfDictionary, string text, NGramsType ngramsType) { var terms = Run(text); tfDictionary.Clear(); var count = default(int); //NGramsType.NGram_1: foreach (var term in terms) { //if ( term != null ) //{ if (tfDictionary.TryGetValue(term, out count)) { tfDictionary[term] = count + 1; } else { tfDictionary.Add(term, 1); } //} } var ngrams = default(IEnumerable <string>); switch (ngramsType) { case NGramsType.NGram_2: ngrams = GetNGrams_2(terms); break; case NGramsType.NGram_3: ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms)); break; case NGramsType.NGram_4: ngrams = GetNGrams_2(terms).Concat(GetNGrams_3(terms)).Concat(GetNGrams_4(terms)); break; } if (ngrams != null) { foreach (var ngram in ngrams) { if (tfDictionary.TryGetValue(ngram, out count)) { tfDictionary[ngram] = count + 1; } else { tfDictionary.Add(ngram, 1); } } ngrams = null; } terms = null; }