/// <summary> /// Initialises a new instance of the Algorithms.CandidateWordPriorityCalculator class. /// </summary> /// <param name="maximumSourceWordToCandidateWordDistance">The maximum possible distance between any two words in the graph.</param> /// <param name="sourceWordToCandidateWordDistanceWeight">The weight which should be applied when calculating the overall priority, to the distance from the source word to the candidate word (the 'g(n)' score in the A* algorithm).</param> /// <param name="numberOfCharactersMatchingDestinationWeight">The weight which should be applied when calculating the overall priority, to the number of matching characters between the candidate word and the destination word.</param> /// <param name="popularityOfChangeToCharacterWeight">The weight which should be applied when calculating the overall priority, to the frequency that the character changed to in the candidate word is the 'changed from' character in a substitution between adjacent words.</param> /// <param name="popularityOfCharacterChangeWeight">The weight which should be applied when calculating the overall priority, to the frequency/popularity of the character substitution (i.e. character changed from current word to candidate word).</param> /// <param name="allWordsTrieRoot">The root of a character trie containing all the words in the graph.</param> /// <param name="fromCharacterFrequencies">A FrequencyTable containing the number of times each character is the 'from' character in a substitution between adjacent words (i.e. represented by an edge of the graph).</param> /// <param name="characterSubstitutionFrequencies">A FrequencyTable containing the number of times each pair of characters in a substitution between adjacent words (i.e. represented by an edge of the graph) occurs.</param> /// <exception cref="System.ArgumentException">Parameter 'maximumSourceWordToCandidateWordDistance' is less than 0.</exception> /// <exception cref="System.ArgumentException">Parameter 'sourceWordToCandidateWordDistanceWeight' is less than 0.</exception> /// <exception cref="System.ArgumentException">Parameter 'numberOfCharactersMatchingDestinationWeight' is less than 0.</exception> /// <exception cref="System.ArgumentException">Parameter 'popularityOfChangeToCharacterWeight' is less than 0.</exception> /// <exception cref="System.ArgumentException">Parameter 'popularityOfCharacterChangeWeight' is less than 0.</exception> /// <exception cref="System.ArgumentException">At least one of parameters 'sourceWordToCandidateWordDistanceWeight', 'numberOfCharactersMatchingDestinationWeight', 'popularityOfChangeToCharacterWeight', and 'popularityOfCharacterChangeWeight' must be greater than 0.</exception> public CandidateWordPriorityCalculator(Int32 maximumSourceWordToCandidateWordDistance, Int32 sourceWordToCandidateWordDistanceWeight, Int32 numberOfCharactersMatchingDestinationWeight, Int32 popularityOfChangeToCharacterWeight, Int32 popularityOfCharacterChangeWeight, Dictionary <Char, TrieNode <Char> > allWordsTrieRoot, FrequencyTable <Char> fromCharacterFrequencies, FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies) { if (maximumSourceWordToCandidateWordDistance < 1) { throw new ArgumentException("Parameter 'maximumSourceWordToCandidateWordDistance' must be greater than or equal to 1.", "maximumSourceWordToCandidateWordDistance"); } if (sourceWordToCandidateWordDistanceWeight < 0) { throw new ArgumentException("Parameter 'sourceWordToCandidateWordDistanceWeight' must be greater than or equal to 0.", "sourceWordToCandidateWordDistanceWeight"); } if (numberOfCharactersMatchingDestinationWeight < 0) { throw new ArgumentException("Parameter 'numberOfCharactersMatchingDestinationWeight' must be greater than or equal to 0.", "numberOfCharactersMatchingDestinationWeight"); } if (popularityOfChangeToCharacterWeight < 0) { throw new ArgumentException("Parameter 'popularityOfChangeToCharacterWeight' must be greater than or equal to 0.", "popularityOfChangeToCharacterWeight"); } if (popularityOfCharacterChangeWeight < 0) { throw new ArgumentException("Parameter 'popularityOfCharacterChangeWeight' must be greater than or equal to 0.", "popularityOfCharacterChangeWeight"); } if (sourceWordToCandidateWordDistanceWeight == 0 && numberOfCharactersMatchingDestinationWeight == 0 && popularityOfChangeToCharacterWeight == 0 && popularityOfCharacterChangeWeight == 0) { throw new ArgumentException("At least one of parameters 'sourceWordToCandidateWordDistanceWeight', 'numberOfCharactersMatchingDestinationWeight', 'popularityOfChangeToCharacterWeight', and 'popularityOfCharacterChangeWeight' must be greater than 0."); } // Initialize priority functions and weights priorityFunctions = new List <Func <String, String, String, Int32, Double> >(); priorityFunctionWeights = new List <Int32>(); functionWeightsTotal = 0; priorityFunctionWeights.Add(sourceWordToCandidateWordDistanceWeight); priorityFunctions.Add(CalculateSourceWordToCandidateWordDistancePriority); functionWeightsTotal += Convert.ToInt64(sourceWordToCandidateWordDistanceWeight); priorityFunctionWeights.Add(numberOfCharactersMatchingDestinationWeight); priorityFunctions.Add(CalculateNumberOfCharactersMatchingDestinationPriority); functionWeightsTotal += Convert.ToInt64(numberOfCharactersMatchingDestinationWeight); priorityFunctionWeights.Add(popularityOfChangeToCharacterWeight); priorityFunctions.Add(CalculatePopularityOfChangeToCharacterPriority); functionWeightsTotal += Convert.ToInt64(popularityOfChangeToCharacterWeight); priorityFunctionWeights.Add(popularityOfCharacterChangeWeight); priorityFunctions.Add(CalculatePopularityOfCharacterChangePriority); functionWeightsTotal += Convert.ToInt64(popularityOfCharacterChangeWeight); this.maximumSourceWordToCandidateWordDistance = maximumSourceWordToCandidateWordDistance; this.allWordsTrieRoot = allWordsTrieRoot; this.fromCharacterFrequencies = fromCharacterFrequencies; this.characterSubstitutionFrequencies = characterSubstitutionFrequencies; wordUtilities = new WordUtilities(); PopulateMaximumFrequencyMembers(); }
/// <summary> /// Reads a set of words from a stream, and adds the words and statistics pertaining to the words to a set of data structures. /// </summary> /// <param name="reader">The stream reader to used to read the set of words (allows specifying a mocked reader for unit testing).</param> /// <param name="trieBuilder">The trie builder to use to add the words to the trie (allows specifying a mocked builder for unit testing).</param> /// <param name="wordFilterFunction">A Func to filter whether or not the specified word should be added to the trie. Accepts the word as a parameter, and returns a bookean indicating whether that word should be added to the trie.</param> /// <param name="allWordsTrieRoot">The root of a character trie to populate with the words.</param> /// <param name="allWords">A HashSet to populate with the words.</param> /// <param name="fromCharacterFrequencies">A FrequencyTable to populate with the number of times each character is the 'from' character in a substitution.</param> /// <param name="characterSubstitutionFrequencies">A FrequencyTable to populate with the number of times each pair of characters in a substitution occur.</param> public void PopulateAdjacentWordDataStructures(IStreamReader reader, ICharacterTrieBuilder trieBuilder, Func <String, Boolean> wordFilterFunction, Dictionary <Char, TrieNode <Char> > allWordsTrieRoot, HashSet <String> allWords, FrequencyTable <Char> fromCharacterFrequencies, FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies) { // Read all words and add them to the HashSet and trie using (reader) { while (reader.EndOfStream == false) { String currentWord = reader.ReadLine(); if (wordFilterFunction.Invoke(currentWord) == true) { if (allWords.Contains(currentWord) == false) { allWords.Add(currentWord); trieBuilder.AddWord(allWordsTrieRoot, currentWord, true); } } } } // Populate the frequency tables CharacterTrieUtilities trieUtilities = new CharacterTrieUtilities(); WordUtilities wordUtilities = new WordUtilities(); foreach (String currentWord in allWords) { foreach (String adjacentWord in trieUtilities.FindAdjacentWords(allWordsTrieRoot, currentWord)) { // Find the character which was substitued between the word and the adjacent word Tuple <Char, Char> differingCharacters = wordUtilities.FindDifferingCharacters(currentWord, adjacentWord); Char fromCharacter = differingCharacters.Item1, toCharacter = differingCharacters.Item2; // Increment the data structures fromCharacterFrequencies.Increment(fromCharacter); characterSubstitutionFrequencies.Increment(new CharacterSubstitution(fromCharacter, toCharacter)); } } }