void SplitFileIntoWords(string fullName) { var dof = new DescriptionOfFile(fullName); dof.Index = Globals.allFiles.Files.Count; Globals.allFiles.Files.AddLast(dof); var words = new Dictionary <string, int> (); string content = File.ReadAllText(fullName); var matches = Regex.Matches(content, @"([a-z_]+)", RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (var match in matches) { string word = IndexOfWords.GetNormalizedString(match.ToString()); if (words.ContainsKey(word)) { words [word]++; } else { words.Add(word, 1); } } var matchesRus = Regex.Matches(content, @"([а-яёА-ЯЁ_а́еёио́у́ы́э́ю́я́]+)", RegexOptions.Multiline); foreach (var match in matchesRus) { string word = IndexOfWords.GetNormalizedString(match.ToString()); if (words.ContainsKey(word)) { words [word]++; } else { words.Add(word, 1); } } var matchesNum = Regex.Matches(content, @"([0-9]+)", RegexOptions.Multiline); foreach (var match in matchesNum) { string word = IndexOfWords.GetNormalizedString(match.ToString()); if (words.ContainsKey(word)) { words [word]++; } else { words.Add(word, 1); } } var matchesWeird = Regex.Matches(content, @"([C][#])|([C][+][+])|([F][#])|([F][*])|([A][*])|([C][\-][\-])", RegexOptions.Multiline); foreach (var match in matchesWeird) { string word = IndexOfWords.GetNormalizedString(match.ToString()); if (words.ContainsKey(word)) { words [word]++; } else { words.Add(word, 1); } } foreach (var uniqueWord in words.Keys) { var wrappedWord = Globals.allWords.NormalizeAndAdd(uniqueWord); wrappedWord.Occurences.Files.AddLast(dof); } }
public static void InitIndex() { Globals.allWords = new IndexOfWords(); Globals.allFiles = new CollectionOfFileDescriptions(); }