public List <Word> NormalizeText(string text) { //дополнить список разделителей char[] delimiterChars = { ' ', ',', '.', ':', ';', '\t', '(', ')', '{', '}', '"', '–', '\n' }; //дополнить список слов List <string> words = new List <string>(text.Split(delimiterChars)); foreach (string str in StopWords) { words.RemoveAll(cfg => cfg == str); } string reg = "[0-9]*"; words = words .Select(x => Regex.Replace(x, reg, "")) .ToList(); words.RemoveAll(cfg => cfg == ""); List <Word> wordsList = new List <Word>(); foreach (string str in words) { Word word = new Word(); word.sourceWord = str; word.stemmedWord = Porter.TransformingWord(str); wordsList.Add(word); } return(wordsList); }
private Dictionary <string, Word> GetWordsTable(List <Word> words) { Dictionary <string, Word> Table = new Dictionary <string, Word>(); for (int i = 0; i < words.Count; i++) { words[i].stemmedWord = Porter.TransformingWord(words[i].sourceWord); } foreach (Word word in words) { if (Table.ContainsKey(word.stemmedWord)) { Table[word.stemmedWord].count++; } else { Table.Add(word.stemmedWord, new Word { sourceWord = word.sourceWord, stemmedWord = word.stemmedWord, count = 1 }); } } return(Table); }