コード例 #1
0
        public static (int countTotal, int countDistinct) Count_words(string text)
        {
            var stopwordsDirectory = DataacessLayer.Load_stopwords();

            var candidateWords = text.Split(new[] { ' ', '\t', '\n', '\r' },
                                            StringSplitOptions.RemoveEmptyEntries);
            var countTotal    = 0;
            var countDistinct = 0;

            var distinctWords = new HashSet <string>();

            foreach (var wordCandidate in candidateWords)
            {
                foreach (var m in Regex.Matches(wordCandidate, @"[\w\-]*"))
                {
                    if (!string.IsNullOrWhiteSpace(m.ToString()))
                    {
                        var word = m.ToString();

                        if (!stopwordsDirectory.Contains(word))
                        {
                            countTotal++;

                            if (!distinctWords.Contains(word))
                            {
                                distinctWords.Add(word);
                                countDistinct++;
                            }
                        }
                    }
                }
            }

            return(countTotal, countDistinct);
        }
コード例 #2
0
        public static (int countTotal, int countDistinct) Count_words_in_file(string filename)
        {
            var text = DataacessLayer.Read_text(filename);

            return(Count_words(text));
        }