// probability for just one file private List <WordMetrics> CalculateProbability(string categoryName, List <string> uniqueVocabulary, Dictionary <string, int> categoryFile, int numberOfTrainingFiles) { // number of unique words in the training set int uniqueWords = uniqueVocabulary.Count(); int allCatWords = 0; List <WordMetrics> l_wordMetrics = new List <WordMetrics>(); double categoryProbability = 1 / (double)numberOfTrainingFiles; // calculate number of all the words in the category foreach (KeyValuePair <string, int> word in categoryFile) { allCatWords += word.Value; } // create the first object for the category and its probability WordMetrics categoryWord = new WordMetrics(categoryName, 1, categoryProbability, uniqueWords, allCatWords); l_wordMetrics.Add(categoryWord); // calculate probability for each word and then create a new object with this information foreach (KeyValuePair <string, int> word in categoryFile) { double probabilityResult = ((word.Value + 1) / (double)(uniqueWords + allCatWords)); WordMetrics wordMetrics = new WordMetrics(word.Key, word.Value, probabilityResult, uniqueWords, allCatWords); l_wordMetrics.Add(wordMetrics); } return(l_wordMetrics); }
// probability for more than one file in the category private List <WordMetrics> CalculateProbability(string categoryName, List <string> uniqueVocabulary, List <Dictionary <string, int> > categoryFiles, int numberOfTrainingFiles) { // number of unique words in the training set int uniqueWords = uniqueVocabulary.Count(); int allCatWords = 0; Dictionary <string, int> dictCopy = new Dictionary <string, int>(); List <WordMetrics> l_wordMetrics = new List <WordMetrics>(); double categoryProbability = categoryFiles.Count() / (double)numberOfTrainingFiles; // merge two or more dictionaries for the category together // if the keys are the same just add their values int frequencyValue = 0; foreach (Dictionary <string, int> dictionary in categoryFiles) { foreach (KeyValuePair <string, int> kvp in dictionary) { if (dictCopy.ContainsKey(kvp.Key)) { dictCopy[kvp.Key] += kvp.Value; } else { dictCopy.Add(kvp.Key, kvp.Value); } } } // calculate number of all the words in the category foreach (KeyValuePair <string, int> frequency in dictCopy) { allCatWords += frequency.Value; } // creates the first object for the category and its probability WordMetrics categoryWord = new WordMetrics(categoryName, categoryFiles.Count(), categoryProbability, uniqueWords, allCatWords); l_wordMetrics.Add(categoryWord); // calculate probability for each word and then create a new object with this information foreach (KeyValuePair <string, int> word in dictCopy) { double probabilityResult = (word.Value + 1) / (double)(uniqueWords + allCatWords); WordMetrics wordMetrics = new WordMetrics(word.Key, word.Value, probabilityResult, uniqueWords, allCatWords); l_wordMetrics.Add(wordMetrics); } return(l_wordMetrics); }