/// <summary>
        /// Reads a set of words from a stream, and adds the words and statistics pertaining to the words to a set of data structures.
        /// </summary>
        /// <param name="reader">The stream reader to used to read the set of words (allows specifying a mocked reader for unit testing).</param>
        /// <param name="trieBuilder">The trie builder to use to add the words to the trie (allows specifying a mocked builder for unit testing).</param>
        /// <param name="wordFilterFunction">A Func to filter whether or not the specified word should be added to the trie.  Accepts the word as a parameter, and returns a bookean indicating whether that word should be added to the trie.</param>
        /// <param name="allWordsTrieRoot">The root of a character trie to populate with the words.</param>
        /// <param name="allWords">A HashSet to populate with the words.</param>
        /// <param name="fromCharacterFrequencies">A FrequencyTable to populate with the number of times each character is the 'from' character in a substitution.</param>
        /// <param name="characterSubstitutionFrequencies">A FrequencyTable to populate with the number of times each pair of characters in a substitution occur.</param>
        public void PopulateAdjacentWordDataStructures(IStreamReader reader, ICharacterTrieBuilder trieBuilder, Func <String, Boolean> wordFilterFunction, Dictionary <Char, TrieNode <Char> > allWordsTrieRoot, HashSet <String> allWords, FrequencyTable <Char> fromCharacterFrequencies, FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies)
        {
            // Read all words and add them to the HashSet and trie
            using (reader)
            {
                while (reader.EndOfStream == false)
                {
                    String currentWord = reader.ReadLine();
                    if (wordFilterFunction.Invoke(currentWord) == true)
                    {
                        if (allWords.Contains(currentWord) == false)
                        {
                            allWords.Add(currentWord);
                            trieBuilder.AddWord(allWordsTrieRoot, currentWord, true);
                        }
                    }
                }
            }

            // Populate the frequency tables
            CharacterTrieUtilities trieUtilities = new CharacterTrieUtilities();
            WordUtilities          wordUtilities = new WordUtilities();

            foreach (String currentWord in allWords)
            {
                foreach (String adjacentWord in trieUtilities.FindAdjacentWords(allWordsTrieRoot, currentWord))
                {
                    // Find the character which was substitued between the word and the adjacent word
                    Tuple <Char, Char> differingCharacters = wordUtilities.FindDifferingCharacters(currentWord, adjacentWord);
                    Char fromCharacter = differingCharacters.Item1, toCharacter = differingCharacters.Item2;

                    // Increment the data structures
                    fromCharacterFrequencies.Increment(fromCharacter);
                    characterSubstitutionFrequencies.Increment(new CharacterSubstitution(fromCharacter, toCharacter));
                }
            }
        }
        /// <summary>
        /// Finds a path between two words in a graph of adjacent words using A*, with priorities of candidate words assigned using class member 'priorityCalculator'.
        /// </summary>
        /// <param name="sourceWord">The source word to find a path from.</param>
        /// <param name="destinationWord">The destination word to find a path to.</param>
        /// <param name="numberOfEdgesExplored">Is populated with the number of graph edges explored while finding a path.</param>
        /// <returns>A linked list containing the path from the source word to the destination word.  The list will be enpty if no path was found.</returns>
        /// <exception cref="System.ArgumentException">Parameter 'sourceWord' is the same word as parameter 'destinationWord'.</exception>
        /// <exception cref="System.ArgumentException">The length of parameter 'sourceWord' is less than 1.</exception>
        /// <exception cref="System.ArgumentException">Parameter 'sourceWord' has different length to parameter 'destinationWord'.</exception>
        /// <remarks>The graph of words is not pre-built, but rather is generated dynamically, using data structures in class members 'trieUtilities' and 'wordDictionaryTrieRoot'.</remarks>
        public LinkedList <String> FindPathViaAStar(String sourceWord, String destinationWord, ref Int32 numberOfEdgesExplored)
        {
            CheckFindPathParameters(sourceWord, destinationWord);

            // Priority queue used to decide which vertices (words) to traverse to next
            PriorityQueue <String> priorityQueue = new PriorityQueue <String>();
            // The set of vertices (words) which have been completely explored
            HashSet <String> visitedVertices = new HashSet <String>();
            // For the shortest path to each vertex (represented by the key string), the vertex which is prior to the vertex in the shortest path (represented by the value string)
            Dictionary <String, String> previousVertices = new Dictionary <String, String>();
            // Holds a map of a vertex (word), and the shortest distance found so far between the source and the vertex (i.e. the g(n) score in A*)
            Dictionary <String, Int32> shortestDistancesFromSource = new Dictionary <String, Int32>();
            Boolean pathFound = false;

            priorityQueue.Enqueue(sourceWord, 0.0);
            shortestDistancesFromSource.Add(sourceWord, 0);
            numberOfEdgesExplored = 0;

            // Continue while there are still vertices in the priority queue and a path has not been found
            while (priorityQueue.Count > 0 && pathFound == false)
            {
                String currentWord = priorityQueue.DequeueMinimum();
                Int32  distanceFromSourceToCurrentVertex = shortestDistancesFromSource[currentWord];
                foreach (String currentCandidateWord in trieUtilities.FindAdjacentWords(wordDictionaryTrieRoot, currentWord))
                {
                    if (currentCandidateWord.Equals(destinationWord))
                    {
                        previousVertices.Add(currentCandidateWord, currentWord);
                        numberOfEdgesExplored++;
                        pathFound = true;
                        break;
                    }
                    else
                    {
                        if (visitedVertices.Contains(currentCandidateWord) == false)
                        {
                            Double candidateWordPriority = priorityCalculator.CalculatePriority(currentWord, currentCandidateWord, destinationWord, distanceFromSourceToCurrentVertex + 1);
                            if (priorityQueue.Contains(currentCandidateWord))
                            {
                                // If the candidate word is already in the priority queue and newly calculated priority is less than the existing priority (i.e. higher priority since maximum priority is 0.0), then update the queue and other data structures
                                if (candidateWordPriority < priorityQueue.GetPriorityForItem(currentCandidateWord))
                                {
                                    priorityQueue.Remove(currentCandidateWord);
                                    priorityQueue.Enqueue(currentCandidateWord, candidateWordPriority);
                                    previousVertices.Remove(currentCandidateWord);
                                    previousVertices.Add(currentCandidateWord, currentWord);
                                    shortestDistancesFromSource.Remove(currentCandidateWord);
                                    shortestDistancesFromSource.Add(currentCandidateWord, distanceFromSourceToCurrentVertex + 1);
                                }
                            }
                            // If the candidate word is not already enqueued, then add it to the queue and other data structures
                            else
                            {
                                priorityQueue.Enqueue(currentCandidateWord, candidateWordPriority);
                                previousVertices.Add(currentCandidateWord, currentWord);
                                shortestDistancesFromSource.Add(currentCandidateWord, distanceFromSourceToCurrentVertex + 1);
                            }
                            numberOfEdgesExplored++;
                        }
                    }
                }

                visitedVertices.Add(currentWord);
            }

            // Reconstruct the path from 'previousVertices'
            LinkedList <String> returnList = new LinkedList <String>();

            if (pathFound == true)
            {
                String currentWord = destinationWord;
                while (previousVertices.ContainsKey(currentWord))
                {
                    returnList.AddFirst(currentWord);
                    currentWord = previousVertices[currentWord];
                }
                returnList.AddFirst(currentWord);
            }

            return(returnList);
        }
Пример #3
0
        /// <summary>
        /// Runs the graph traversal comparison.
        /// </summary>
        public void Run()
        {
            // The path to a file containing a dictionary of words
            const String dictionaryFilePath = @"C:\Temp\words2.txt";
            // The assumed maximum distance from a source word to a candidate word (used in weighting of g(n) and h(n) scores)
            const Int32 maximumSourceWordToCandidateWordDistance = 30;

            // Setup the word dictionary tree and other supporting data structures
            Dictionary <Char, TrieNode <Char> > allWordsTrieRoot = new Dictionary <Char, TrieNode <Char> >();
            HashSet <String>      allWords = new HashSet <String>();
            FrequencyTable <Char> fromCharacterFrequencies = new FrequencyTable <Char>();
            FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies = new FrequencyTable <CharacterSubstitution>();

            // Populate the word dictionary tree and other supporting data structures
            System.IO.StreamReader  underlyingReader   = new System.IO.StreamReader(dictionaryFilePath);
            Algorithms.StreamReader reader             = new Algorithms.StreamReader(underlyingReader);
            CharacterTrieBuilder    trieBuilder        = new CharacterTrieBuilder();
            Func <String, Boolean>  wordFilterFunction = new Func <String, Boolean>((inputString) =>
            {
                foreach (Char currentCharacter in inputString)
                {
                    if (Char.IsLetter(currentCharacter) == false)
                    {
                        return(false);
                    }
                }
                if (inputString.Length == 4)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            });
            DataStructureUtilities dataStructureUtils = new DataStructureUtilities();

            dataStructureUtils.PopulateAdjacentWordDataStructures(reader, trieBuilder, wordFilterFunction, allWordsTrieRoot, allWords, fromCharacterFrequencies, characterSubstitutionFrequencies);
            CharacterTrieUtilities trieUtilities = new CharacterTrieUtilities();

            // Setup the test data (word pairs to find paths between)
            List <Tuple <String, String> > testData = new List <Tuple <String, String> >()
            {
                new Tuple <String, String>("role", "band"),
                new Tuple <String, String>("pack", "sill"),
                new Tuple <String, String>("debt", "tyre"),
                new Tuple <String, String>("duct", "grid")
            };

            // Find paths
            foreach (Tuple <String, String> currentWordPair in testData)
            {
                // Setup priority calculator and graph path finder
                Int32 sourceWordToCandidateWordDistanceWeight      = 1;
                Int32 numberOfCharactersMatchingDestinationWeight  = 0;
                Int32 popularityOfChangeToCharacterWeight          = 0;
                Int32 popularityOfCharacterChangeWeight            = 0;
                CandidateWordPriorityCalculator priorityCalculator = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                AdjacentWordGraphPathFinder     pathFinder         = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);

                Console.WriteLine("-----------------------------------------------");
                Console.WriteLine("  Finding paths for strings '{0}' and '{1}'", currentWordPair.Item1, currentWordPair.Item2);
                Console.WriteLine("-----------------------------------------------");

                // Find a path using breadth-first search
                Console.WriteLine("  Using breadth-first search...");
                Int32 numberOfEdgesExplored = 0;
                LinkedList <String> path    = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using Dijkstras algorithm
                Console.WriteLine("  Using Dijkstra's algorithm...");
                numberOfEdgesExplored = 0;
                path = pathFinder.FindShortestPathViaDijkstrasAlgorithm(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using bidirectional breadth-first search
                Console.WriteLine("  Using bidirectional breadth-first search...");
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaBidirectionalBreadthFirstSearch(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 50% g(n) and 50% h(n) )
                Console.WriteLine("  Using A* ( 50% g(n) and 50% h(n) )...");
                sourceWordToCandidateWordDistanceWeight     = 3;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 0% g(n) and 100% h(n) )
                Console.WriteLine("  Using A* ( 0% g(n) and 100% h(n) )...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 25% g(n) and 75% h(n) )
                Console.WriteLine("  Using A* ( 25% g(n) and 75% h(n) )...");
                sourceWordToCandidateWordDistanceWeight     = 1;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 25% g(n) and 75% h(n) with custom h(n) weighting )
                Console.WriteLine("  Using A* ( 25% g(n) and 75% h(n) with custom h(n) weighting )...");
                sourceWordToCandidateWordDistanceWeight     = 1;
                numberOfCharactersMatchingDestinationWeight = 2;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 0;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                Console.WriteLine();
            }

            return;

            #region Test Code and Utility Routines

            // Read a word from the console and find its adjacent words
            String readWord = "";
            while (true)
            {
                Console.Write("Type a source word: ");
                readWord = Console.ReadLine();
                if (readWord.Equals("q"))
                {
                    break;
                }
                foreach (String currAdjacent in trieUtilities.FindAdjacentWords(allWordsTrieRoot, readWord))
                {
                    Console.WriteLine("  " + currAdjacent);
                }
            }

            // Find the total number of edges and vertices in the graph
            Int32 totalEdges = 0;
            foreach (String currWord in allWords)
            {
                foreach (String currAdj in trieUtilities.FindAdjacentWords(allWordsTrieRoot, currWord))
                {
                    totalEdges++;
                }
            }
            Console.WriteLine("Total edges: " + totalEdges / 2);
            Console.WriteLine("Total vertices: " + allWords.Count);

            // Show contents of frequency tables
            foreach (KeyValuePair <Char, Int32> currKVP in fromCharacterFrequencies)
            {
                Console.WriteLine(currKVP.Key + ": " + currKVP.Value);
            }
            Console.WriteLine("t > b: " + characterSubstitutionFrequencies.GetFrequency(new CharacterSubstitution('t', 'b')));
            Console.WriteLine("n > f: " + characterSubstitutionFrequencies.GetFrequency(new CharacterSubstitution('n', 'f')));
            foreach (KeyValuePair <CharacterSubstitution, Int32> currKVP in characterSubstitutionFrequencies)
            {
                if (currKVP.Key.ToCharacter == 'y')
                {
                    Console.WriteLine(currKVP.Key.FromCharacter + " > " + currKVP.Key.ToCharacter + ": " + currKVP.Value);
                }
            }
            foreach (String currWord in allWords)
            {
                if (currWord[1] == 'y')
                {
                    Console.WriteLine(currWord);
                }
            }

            // Compare each heuristic function in isolation
            List <Tuple <String, String> > testData2 = new List <Tuple <String, String> >()
            {
                new Tuple <String, String>("role", "band"),
                new Tuple <String, String>("pack", "sill"),
                new Tuple <String, String>("debt", "tyre"),
                new Tuple <String, String>("duct", "grid")
            };

            foreach (Tuple <String, String> currentWordPair in testData2)
            {
                // Setup priority calculator and graph path finder
                Int32 sourceWordToCandidateWordDistanceWeight      = 1;
                Int32 numberOfCharactersMatchingDestinationWeight  = 0;
                Int32 popularityOfChangeToCharacterWeight          = 0;
                Int32 popularityOfCharacterChangeWeight            = 0;
                CandidateWordPriorityCalculator priorityCalculator = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                AdjacentWordGraphPathFinder     pathFinder         = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                Int32 numberOfEdgesExplored = 0;
                Int32 maximumSourceWordToCandidateWordDistance2 = 4000;

                Console.WriteLine("-----------------------------------------------");
                Console.WriteLine("  Finding paths for strings '{0}' and '{1}'", currentWordPair.Item1, currentWordPair.Item2);
                Console.WriteLine("-----------------------------------------------");

                // Find the shortest path using A* ( 50% g(n) and 50% h(n) )
                Console.WriteLine("  1 0 0...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 0;
                popularityOfCharacterChangeWeight           = 0;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance2, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                LinkedList <String> path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine("    Length of path:  {0}", path.Count);
                Console.WriteLine();

                // Find the shortest path using A* ( 0% g(n) and 100% h(n) )
                Console.WriteLine("  0 1 0...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 0;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 0;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance2, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine("    Length of path:  {0}", path.Count);
                Console.WriteLine();

                // Find the shortest path using A* ( 25% g(n) and 75% h(n) )
                Console.WriteLine("  0 0 1...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 0;
                popularityOfChangeToCharacterWeight         = 0;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance2, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine("    Length of path:  {0}", path.Count);
                Console.WriteLine();

                Console.WriteLine();
            }

            #endregion
        }