예제 #1
0
        public static void LoadFilenames(string filename)
        {
            int intIndex = 0;

            foreach (var line in File.ReadLines(filename))
            {
                var fileInfo = new DescriptionOfFile(line);
                fileInfo.Index = intIndex++;
                allFiles.Files.AddLast(fileInfo);
            }
        }
        void SplitFileIntoWords(string fullName)
        {
            var dof = new DescriptionOfFile(fullName);

            dof.Index = Globals.allFiles.Files.Count;
            Globals.allFiles.Files.AddLast(dof);

            var    words   = new Dictionary <string, int> ();
            string content = File.ReadAllText(fullName);
            var    matches = Regex.Matches(content, @"([a-z_]+)", RegexOptions.IgnoreCase | RegexOptions.Multiline);

            foreach (var match in matches)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            var matchesRus = Regex.Matches(content, @"([а-яёА-ЯЁ_а́еёио́у́ы́э́ю́я́]+)", RegexOptions.Multiline);

            foreach (var match in matchesRus)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            var matchesNum = Regex.Matches(content, @"([0-9]+)", RegexOptions.Multiline);

            foreach (var match in matchesNum)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            var matchesWeird = Regex.Matches(content, @"([C][#])|([C][+][+])|([F][#])|([F][*])|([A][*])|([C][\-][\-])", RegexOptions.Multiline);

            foreach (var match in matchesWeird)
            {
                string word = IndexOfWords.GetNormalizedString(match.ToString());
                if (words.ContainsKey(word))
                {
                    words [word]++;
                }
                else
                {
                    words.Add(word, 1);
                }
            }
            foreach (var uniqueWord in words.Keys)
            {
                var wrappedWord = Globals.allWords.NormalizeAndAdd(uniqueWord);
                wrappedWord.Occurences.Files.AddLast(dof);
            }
        }