コード例 #1
0
        // probability for just one file
        private List <WordMetrics> CalculateProbability(string categoryName, List <string> uniqueVocabulary,
                                                        Dictionary <string, int> categoryFile, int numberOfTrainingFiles)
        {
            // number of unique words in the training set
            int uniqueWords = uniqueVocabulary.Count();
            int allCatWords = 0;

            List <WordMetrics> l_wordMetrics       = new List <WordMetrics>();
            double             categoryProbability = 1 / (double)numberOfTrainingFiles;


            // calculate number of all the words in the category
            foreach (KeyValuePair <string, int> word in categoryFile)
            {
                allCatWords += word.Value;
            }

            // create the first object for the category and its probability
            WordMetrics categoryWord = new WordMetrics(categoryName, 1, categoryProbability, uniqueWords, allCatWords);

            l_wordMetrics.Add(categoryWord);



            // calculate probability for each word and then create a new object with this information
            foreach (KeyValuePair <string, int> word in categoryFile)
            {
                double      probabilityResult = ((word.Value + 1) / (double)(uniqueWords + allCatWords));
                WordMetrics wordMetrics       = new WordMetrics(word.Key, word.Value, probabilityResult, uniqueWords, allCatWords);
                l_wordMetrics.Add(wordMetrics);
            }

            return(l_wordMetrics);
        }
コード例 #2
0
        // probability for more than one file in the category
        private List <WordMetrics> CalculateProbability(string categoryName, List <string> uniqueVocabulary,
                                                        List <Dictionary <string, int> > categoryFiles, int numberOfTrainingFiles)
        {
            // number of unique words in the training set
            int uniqueWords = uniqueVocabulary.Count();
            int allCatWords = 0;

            Dictionary <string, int> dictCopy      = new Dictionary <string, int>();
            List <WordMetrics>       l_wordMetrics = new List <WordMetrics>();
            double categoryProbability             = categoryFiles.Count() / (double)numberOfTrainingFiles;

            // merge two or more dictionaries for the category together
            // if the keys are the same just add their values
            int frequencyValue = 0;

            foreach (Dictionary <string, int> dictionary in categoryFiles)
            {
                foreach (KeyValuePair <string, int> kvp in dictionary)
                {
                    if (dictCopy.ContainsKey(kvp.Key))
                    {
                        dictCopy[kvp.Key] += kvp.Value;
                    }
                    else
                    {
                        dictCopy.Add(kvp.Key, kvp.Value);
                    }
                }
            }

            // calculate number of all the words in the category
            foreach (KeyValuePair <string, int> frequency in dictCopy)
            {
                allCatWords += frequency.Value;
            }

            // creates the first object for the category and its probability
            WordMetrics categoryWord = new WordMetrics(categoryName, categoryFiles.Count(), categoryProbability, uniqueWords, allCatWords);

            l_wordMetrics.Add(categoryWord);

            // calculate probability for each word and then create a new object with this information
            foreach (KeyValuePair <string, int> word in dictCopy)
            {
                double      probabilityResult = (word.Value + 1) / (double)(uniqueWords + allCatWords);
                WordMetrics wordMetrics       = new WordMetrics(word.Key, word.Value, probabilityResult, uniqueWords, allCatWords);
                l_wordMetrics.Add(wordMetrics);
            }

            return(l_wordMetrics);
        }