/// <summary> /// Used to compute the difference between the two files /// </summary> /// <param name="queue">Queue containing the frequencies of the words used in the files</param> /// <returns>The difference percentage between the two authors</returns> public static float GetDifference(MinPriorityQueue<float, WordFrequency> queue) { float percentageDifference = 0; while (queue.Count > 0) { WordFrequency x = queue.RemoveMinimumPriority(); percentageDifference += ((x[0] - x[1]) * (x[0] - x[1])); } //END OF WHILE LOOP return (100 * ((float)Math.Sqrt(percentageDifference))); } //END OF GerDifference METHOD
/// <summary> /// Returns a float giving the difference measure. /// </summary> /// <param name="data">MinPriorirtyQueue contain a float(priority) and a Word frequency object</param> /// <returns></returns> public static float GetDifference(MinPriorityQueue <float, WordFrequency> data) { float accumulator = 0; while (data.Count > 0) { WordFrequency x = data.RemoveMinimumPriority(); accumulator += ((x[0] - x[1]) * (x[0] - x[1])); } return(100 * ((float)Math.Sqrt(accumulator))); }
/// <summary> /// returns a float giving the difference measure using the elements of the min-priority queue /// </summary> /// <param name="queue">queue to use</param> /// <returns>the difference measure</returns> public static float GetDifference(MinPriorityQueue <float, WordFrequency> queue) { // d = 100 * sqrt( (x1 - y1)^2 + ... + (xn - yn)^2 ) float d = 0; while (queue.Count > 0) { WordFrequency f = queue.RemoveMinimumPriority(); d += ((f[0] - f[1]) * (f[0] - f[1])); } return(100 * (float)Math.Sqrt(d)); }
} // end GetMostCommonWords /// <summary> /// This method finds the difference measure between the frequency of the words to use /// </summary> /// <param name="frequency">frequency of the word in MinPriorityQueue</param> /// <returns>Returns the difference between the frequencies</returns> public static float GetDifference(MinPriorityQueue <float, WordFrequency> frequency) { float diffMeasured = 0; while (frequency.Count > 0) { WordFrequency wordFrequency = frequency.RemoveMinimumPriority(); diffMeasured += (wordFrequency[0] - wordFrequency[1]) * (wordFrequency[0] - wordFrequency[1]); } float squareRoot = 100 * (float)Math.Sqrt(diffMeasured); return(squareRoot); }//end GetDifference
/// <summary> /// A method to compute the difference measure /// </summary> /// <param name="p">contains the frequencies of the words to use in computing the difference measure</param> /// <returns>a float giving the difference measure</returns> public static float GetDifference(MinPriorityQueue <float, WordFrequency> freq) { float diff = 0; while (freq.Count > 0) { WordFrequency wordFreq = freq.RemoveMinimumPriority(); diff += (wordFreq[0] - wordFreq[1]) * (wordFreq[0] - wordFreq[1]); } float sqrt = 100 * (float)Math.Sqrt(diff); return(sqrt); }
} //END OF GerDifference METHOD /// <summary> /// Method used to find the highest frequencies of words in each file /// </summary> /// <param name="dictionary">Gives the number of occurrences of each word in each file</param> /// <param name="numberOfWords">Gives the number of words in each file</param> /// <param name="num">number of words to get</param> /// <returns></returns> public static MinPriorityQueue<float, WordFrequency> GetMostCommonWord(Dictionary<string, WordCount> dictionary, int[] numberOfWords, int number) { MinPriorityQueue<float, WordFrequency> queue = new MinPriorityQueue<float, WordFrequency>(); foreach (KeyValuePair<string, WordCount> pair in dictionary) { WordFrequency temp = new WordFrequency(pair.Value, numberOfWords); queue.Add(temp[0] + temp[1], temp); if (queue.Count > number) { queue.RemoveMinimumPriority(); } //END OF IF STATEMENT } //END OF FOREACH LOOP return queue; } //END OF GetMostCommonWord METHOD
/// <summary> /// Returns a MinPriorityQueue whose elements contain the frequencies in each file of the most common words. /// </summary> /// <param name="input">Dictionary containing a string and a WordCount object</param> /// <param name="words">int array storing words</param> /// <param name="num">int for comparing against count</param> /// <returns></returns> public static MinPriorityQueue <float, WordFrequency> GetMostCommonWord(Dictionary <string, WordCount> input, int[] words, int num) { MinPriorityQueue <float, WordFrequency> queue = new MinPriorityQueue <float, WordFrequency>(); foreach (KeyValuePair <string, WordCount> pair in input) { WordFrequency temp = new WordFrequency(pair.Value, words); queue.Add(temp[0] + temp[1], temp); if (queue.Count > num) { queue.RemoveMinimumPriority(); } } return(queue); }
} //end ProcessFile /// <summary> /// This method removes the minimum priority object in the queue /// </summary> /// <param name="wordDictionary">This dictionary holds words and WordCounts</param> /// <param name="wordCount">The count of words</param> /// <param name="wordsToGet">The number of words to get</param> /// <returns>Returns the frequencies in each file of the most common words</returns> public static MinPriorityQueue <float, WordFrequency> GetMostCommonWords(Dictionary <string, WordCount> wordDictionary, int[] wordCount, int wordsToGet) { MinPriorityQueue <float, WordFrequency> minimumQueue = new MinPriorityQueue <float, WordFrequency>(); foreach (WordCount w in wordDictionary.Values) { WordFrequency frequency = new WordFrequency(w, wordCount); minimumQueue.Add(frequency[0] + frequency[1], frequency); if (minimumQueue.Count > wordsToGet) { minimumQueue.RemoveMinimumPriority(); } //end if } // foreach return(minimumQueue); } // end GetMostCommonWords
/// <summary> /// A method to get the words with highest combined frequencies /// </summary> /// <param name="d">A Dictionary<string, WordCount> giving the number of occurrences of each word in each file.</param> /// <param name="numWords">An int[ ] of size 2 giving the number of words in each file.</param> /// <param name="getNum">An int giving the number of words to get.</param> /// <returns>a MinPriorityQueue<float, WordFrequency> whose elements contain the frequencies in each file of the most common words, and whose priorities are the combined frequencies of each of these words</returns> public static MinPriorityQueue <float, WordFrequency> GetMostCommonWords(Dictionary <string, WordCount> d, int[] wordCount, int getNum) { MinPriorityQueue <float, WordFrequency> minQueue = new MinPriorityQueue <float, WordFrequency>(); foreach (WordCount w in d.Values) { WordFrequency freq = new WordFrequency(w, wordCount); minQueue.Add(freq[0] + freq[1], freq); if (minQueue.Count > getNum) { minQueue.RemoveMinimumPriority(); } } return(minQueue); }
/// <summary> /// returns a MinPriorityQueue whose elements contain the frequencies in each file of the most common words, /// and whose priorities are the combined frequencies of each of these words /// </summary> /// <param name="dictionary">dictionary to take words from</param> /// <param name="words">total number of words</param> /// <param name="number">number of words needed to get</param> /// <returns>min priority queue holding the frequencies</returns> public static MinPriorityQueue <float, WordFrequency> GetMostCommonWords(Dictionary <string, WordCount> dictionary, int[] words, int number) { MinPriorityQueue <float, WordFrequency> queue = new MinPriorityQueue <float, WordFrequency>(); foreach (WordCount value in dictionary.Values) { WordFrequency wordFrequency = new WordFrequency(value, words); queue.Add(wordFrequency[0] + wordFrequency[1], wordFrequency); if (queue.Count > number) { queue.RemoveMinimumPriority(); } } return(queue); }