public Word[] GetWords(IEnumerable <string> words, int count) { var counter = 0; return(words .Select(w => mystem.GetWords(w)) .Where(w => { if (w.Count == 0) { return false; } counter++; return true; }) .Select(w => ParseWord(w[0])) .GroupBy(w => w.Value) .Select(w => { var result = w.First(); result.Count = w.Count(); return result; }) .Where(wordSelectorSettings.CanUseThisWord) .Take(count) .Select(w => { w.Frequency = (double)w.Count / counter; return w; }) .ToArray()); }
public void GetResultTest() { var str = "включи тестовый режим"; var mst = new Mysteam(); var result2 = mst.GetWords(str); Assert.IsTrue(result2.Count > 0); }
private List <WordModel> GetInfoAboutWords(string[] words) { var outputBuilder = new StringBuilder(); var mst = new Mysteam(); var res = mst.GetWords(WordsToString(words)); return(res); }
public IEnumerable <string> Filtering(IEnumerable <string> tokens) { var result = mystem.GetWords(string.Join(" ", tokens)) .Where(el => allowedWorldType.Contains(el.Lexems[0].GramPart)) .Select(t => t.SourceWord.Text) .Where(s => s.Length > 3); return(result); }
public IEnumerable <string> GetTokens(string str) { if (str == null) { throw new ArgumentNullException(); } var replace = str.Replace("\r\n", " "); return(mystem.GetWords(replace).Select(el => el.SourceWord.Analysis.FirstOrDefault()?.Lex ?? el.SourceWord.Text.ToLower())); }
private Result <List <WordModel> > GetInfoAboutWords(string[] words) { return(Result.Of(() => { var mst = new Mysteam(); var res = mst.GetWords(WordsToString(words)); return res; } )); }
public static List <string> StemSrcText(string path) { var result = new List <string>(); Mysteam mst = new Mysteam(); var fileText = File.ReadAllLines(path); var text = string.Join("", fileText); foreach (var word in mst.GetWords(text).Where(x => x.SourceWord.Text.Length > 2)) { try { result.Add(word.Lexems[0].SourceLex.Lex.ToLower()); } catch { result.Add(word.SourceWord.Text.ToLower()); } } return(result); }
public bool ShouldExclude(string word) { if (!IsCyrillicWord(word)) { return(false); } var models = mystem.GetWords(word); if (models.Count != 1) { return(false); } var model = models[0]; return(!allowedGramParts.Contains(model.Lexems[0].GramPart)); }
public Result <bool> ShouldExclude(string word) { if (mystem == null) { return(Result.Fail <bool>("Failed to load external library \"Mystem\"")); } if (!IsCyrillicWord(word)) { return(Result.Ok(false)); } var models = mystem.GetWords(word); if (models.Count != 1) { return(Result.Ok(false)); } var model = models[0]; return(Result.Ok(!allowedGramParts.Contains(model.Lexems[0].GramPart))); }