/// <summary> /// Calculates the priority based on the frequency that the character changed to in the candidate word is the 'changed from' character in a substitution between adjacent words. /// </summary> /// <param name="currentWord">The current word being traversed from.</param> /// <param name="candidateWord">The candidate word to calculate the priority for.</param> /// <param name="destinationWord">The destination word.</param> /// <param name="distanceFromSourceToCandidateWord">The distance (in graph edges) from the source word, to the candidate word.</param> /// <returns>The priority.</returns> protected Double CalculatePopularityOfChangeToCharacterPriority(String currentWord, String candidateWord, String destinationWord, Int32 distanceFromSourceToCandidateWord) { Char changeToCharacter = wordUtilities.FindDifferingCharacters(currentWord, candidateWord).Item2; Int32 fromCharacterFrequency = fromCharacterFrequencies.GetFrequency(changeToCharacter); return(1.0 - (Convert.ToDouble(fromCharacterFrequency) / Convert.ToDouble(maximumFromCharacterFrequency))); }
/// <summary> /// Reads a set of words from a stream, and adds the words and statistics pertaining to the words to a set of data structures. /// </summary> /// <param name="reader">The stream reader to used to read the set of words (allows specifying a mocked reader for unit testing).</param> /// <param name="trieBuilder">The trie builder to use to add the words to the trie (allows specifying a mocked builder for unit testing).</param> /// <param name="wordFilterFunction">A Func to filter whether or not the specified word should be added to the trie. Accepts the word as a parameter, and returns a bookean indicating whether that word should be added to the trie.</param> /// <param name="allWordsTrieRoot">The root of a character trie to populate with the words.</param> /// <param name="allWords">A HashSet to populate with the words.</param> /// <param name="fromCharacterFrequencies">A FrequencyTable to populate with the number of times each character is the 'from' character in a substitution.</param> /// <param name="characterSubstitutionFrequencies">A FrequencyTable to populate with the number of times each pair of characters in a substitution occur.</param> public void PopulateAdjacentWordDataStructures(IStreamReader reader, ICharacterTrieBuilder trieBuilder, Func <String, Boolean> wordFilterFunction, Dictionary <Char, TrieNode <Char> > allWordsTrieRoot, HashSet <String> allWords, FrequencyTable <Char> fromCharacterFrequencies, FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies) { // Read all words and add them to the HashSet and trie using (reader) { while (reader.EndOfStream == false) { String currentWord = reader.ReadLine(); if (wordFilterFunction.Invoke(currentWord) == true) { if (allWords.Contains(currentWord) == false) { allWords.Add(currentWord); trieBuilder.AddWord(allWordsTrieRoot, currentWord, true); } } } } // Populate the frequency tables CharacterTrieUtilities trieUtilities = new CharacterTrieUtilities(); WordUtilities wordUtilities = new WordUtilities(); foreach (String currentWord in allWords) { foreach (String adjacentWord in trieUtilities.FindAdjacentWords(allWordsTrieRoot, currentWord)) { // Find the character which was substitued between the word and the adjacent word Tuple <Char, Char> differingCharacters = wordUtilities.FindDifferingCharacters(currentWord, adjacentWord); Char fromCharacter = differingCharacters.Item1, toCharacter = differingCharacters.Item2; // Increment the data structures fromCharacterFrequencies.Increment(fromCharacter); characterSubstitutionFrequencies.Increment(new CharacterSubstitution(fromCharacter, toCharacter)); } } }