void SplitFileIntoWords(string fullName)
        {
            var dof = new DescriptionOfFile(fullName);

            dof.Index = Globals.allFiles.Files.Count;
            Globals.allFiles.Files.AddLast(dof);

            var    words   = new Dictionary <string, int> ();
            string content = File.ReadAllText(fullName);
            var    matches = Regex.Matches(content, @"([a-z_]+)", RegexOptions.IgnoreCase | RegexOptions.Multiline);

            foreach (var match in matches)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            var matchesRus = Regex.Matches(content, @"([а-яёА-ЯЁ_а́еёио́у́ы́э́ю́я́]+)", RegexOptions.Multiline);

            foreach (var match in matchesRus)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            var matchesNum = Regex.Matches(content, @"([0-9]+)", RegexOptions.Multiline);

            foreach (var match in matchesNum)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            var matchesWeird = Regex.Matches(content, @"([C][#])|([C][+][+])|([F][#])|([F][*])|([A][*])|([C][\-][\-])", RegexOptions.Multiline);

            foreach (var match in matchesWeird)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            foreach (var uniqueWord in words.Keys)
            {
                var wrappedWord = Globals.allWords.NormalizeAndAdd(uniqueWord);
                wrappedWord.Occurences.Files.AddLast(dof);
            }
        }
Пример #2
0
 public static void InitIndex()
 {
     Globals.allWords = new IndexOfWords();
     Globals.allFiles = new CollectionOfFileDescriptions();
 }