/// <summary>
        /// Reads a set of words from a stream, and adds the words and statistics pertaining to the words to a set of data structures.
        /// </summary>
        /// <param name="reader">The stream reader to used to read the set of words (allows specifying a mocked reader for unit testing).</param>
        /// <param name="trieBuilder">The trie builder to use to add the words to the trie (allows specifying a mocked builder for unit testing).</param>
        /// <param name="wordFilterFunction">A Func to filter whether or not the specified word should be added to the trie.  Accepts the word as a parameter, and returns a bookean indicating whether that word should be added to the trie.</param>
        /// <param name="allWordsTrieRoot">The root of a character trie to populate with the words.</param>
        /// <param name="allWords">A HashSet to populate with the words.</param>
        /// <param name="fromCharacterFrequencies">A FrequencyTable to populate with the number of times each character is the 'from' character in a substitution.</param>
        /// <param name="characterSubstitutionFrequencies">A FrequencyTable to populate with the number of times each pair of characters in a substitution occur.</param>
        public void PopulateAdjacentWordDataStructures(IStreamReader reader, ICharacterTrieBuilder trieBuilder, Func <String, Boolean> wordFilterFunction, Dictionary <Char, TrieNode <Char> > allWordsTrieRoot, HashSet <String> allWords, FrequencyTable <Char> fromCharacterFrequencies, FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies)
        {
            // Read all words and add them to the HashSet and trie
            using (reader)
            {
                while (reader.EndOfStream == false)
                {
                    String currentWord = reader.ReadLine();
                    if (wordFilterFunction.Invoke(currentWord) == true)
                    {
                        if (allWords.Contains(currentWord) == false)
                        {
                            allWords.Add(currentWord);
                            trieBuilder.AddWord(allWordsTrieRoot, currentWord, true);
                        }
                    }
                }
            }

            // Populate the frequency tables
            CharacterTrieUtilities trieUtilities = new CharacterTrieUtilities();
            WordUtilities          wordUtilities = new WordUtilities();

            foreach (String currentWord in allWords)
            {
                foreach (String adjacentWord in trieUtilities.FindAdjacentWords(allWordsTrieRoot, currentWord))
                {
                    // Find the character which was substitued between the word and the adjacent word
                    Tuple <Char, Char> differingCharacters = wordUtilities.FindDifferingCharacters(currentWord, adjacentWord);
                    Char fromCharacter = differingCharacters.Item1, toCharacter = differingCharacters.Item2;

                    // Increment the data structures
                    fromCharacterFrequencies.Increment(fromCharacter);
                    characterSubstitutionFrequencies.Increment(new CharacterSubstitution(fromCharacter, toCharacter));
                }
            }
        }
 /// <summary>
 /// Initialises a new instance of the Algorithms.AdjacentWordGraphPathFinder class.
 /// </summary>
 /// <param name="priorityCalculator">Used to calculate the priority assigned to candidate words.</param>
 /// <param name="trieUtilities">Used to find adjacent vertices in the graph of words.</param>
 /// <param name="wordDictionaryTrieRoot">The root node of a character trie containing all the words in the graph.</param>
 /// <remarks>Note that parameter 'wordDictionaryTrieRoot' should be the same character trie root that is set on the constructor of parameter 'priorityCalculator'.</remarks>
 public AdjacentWordGraphPathFinder(CandidateWordPriorityCalculator priorityCalculator, CharacterTrieUtilities trieUtilities, Dictionary <Char, TrieNode <Char> > wordDictionaryTrieRoot)
 {
     this.priorityCalculator     = priorityCalculator;
     this.trieUtilities          = trieUtilities;
     this.wordDictionaryTrieRoot = wordDictionaryTrieRoot;
 }
Пример #3
0
        /// <summary>
        /// Runs the graph traversal comparison.
        /// </summary>
        public void Run()
        {
            // The path to a file containing a dictionary of words
            const String dictionaryFilePath = @"C:\Temp\words2.txt";
            // The assumed maximum distance from a source word to a candidate word (used in weighting of g(n) and h(n) scores)
            const Int32 maximumSourceWordToCandidateWordDistance = 30;

            // Setup the word dictionary tree and other supporting data structures
            Dictionary <Char, TrieNode <Char> > allWordsTrieRoot = new Dictionary <Char, TrieNode <Char> >();
            HashSet <String>      allWords = new HashSet <String>();
            FrequencyTable <Char> fromCharacterFrequencies = new FrequencyTable <Char>();
            FrequencyTable <CharacterSubstitution> characterSubstitutionFrequencies = new FrequencyTable <CharacterSubstitution>();

            // Populate the word dictionary tree and other supporting data structures
            System.IO.StreamReader  underlyingReader   = new System.IO.StreamReader(dictionaryFilePath);
            Algorithms.StreamReader reader             = new Algorithms.StreamReader(underlyingReader);
            CharacterTrieBuilder    trieBuilder        = new CharacterTrieBuilder();
            Func <String, Boolean>  wordFilterFunction = new Func <String, Boolean>((inputString) =>
            {
                foreach (Char currentCharacter in inputString)
                {
                    if (Char.IsLetter(currentCharacter) == false)
                    {
                        return(false);
                    }
                }
                if (inputString.Length == 4)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            });
            DataStructureUtilities dataStructureUtils = new DataStructureUtilities();

            dataStructureUtils.PopulateAdjacentWordDataStructures(reader, trieBuilder, wordFilterFunction, allWordsTrieRoot, allWords, fromCharacterFrequencies, characterSubstitutionFrequencies);
            CharacterTrieUtilities trieUtilities = new CharacterTrieUtilities();

            // Setup the test data (word pairs to find paths between)
            List <Tuple <String, String> > testData = new List <Tuple <String, String> >()
            {
                new Tuple <String, String>("role", "band"),
                new Tuple <String, String>("pack", "sill"),
                new Tuple <String, String>("debt", "tyre"),
                new Tuple <String, String>("duct", "grid")
            };

            // Find paths
            foreach (Tuple <String, String> currentWordPair in testData)
            {
                // Setup priority calculator and graph path finder
                Int32 sourceWordToCandidateWordDistanceWeight      = 1;
                Int32 numberOfCharactersMatchingDestinationWeight  = 0;
                Int32 popularityOfChangeToCharacterWeight          = 0;
                Int32 popularityOfCharacterChangeWeight            = 0;
                CandidateWordPriorityCalculator priorityCalculator = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                AdjacentWordGraphPathFinder     pathFinder         = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);

                Console.WriteLine("-----------------------------------------------");
                Console.WriteLine("  Finding paths for strings '{0}' and '{1}'", currentWordPair.Item1, currentWordPair.Item2);
                Console.WriteLine("-----------------------------------------------");

                // Find a path using breadth-first search
                Console.WriteLine("  Using breadth-first search...");
                Int32 numberOfEdgesExplored = 0;
                LinkedList <String> path    = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using Dijkstras algorithm
                Console.WriteLine("  Using Dijkstra's algorithm...");
                numberOfEdgesExplored = 0;
                path = pathFinder.FindShortestPathViaDijkstrasAlgorithm(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using bidirectional breadth-first search
                Console.WriteLine("  Using bidirectional breadth-first search...");
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaBidirectionalBreadthFirstSearch(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 50% g(n) and 50% h(n) )
                Console.WriteLine("  Using A* ( 50% g(n) and 50% h(n) )...");
                sourceWordToCandidateWordDistanceWeight     = 3;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 0% g(n) and 100% h(n) )
                Console.WriteLine("  Using A* ( 0% g(n) and 100% h(n) )...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 25% g(n) and 75% h(n) )
                Console.WriteLine("  Using A* ( 25% g(n) and 75% h(n) )...");
                sourceWordToCandidateWordDistanceWeight     = 1;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                // Find the shortest path using A* ( 25% g(n) and 75% h(n) with custom h(n) weighting )
                Console.WriteLine("  Using A* ( 25% g(n) and 75% h(n) with custom h(n) weighting )...");
                sourceWordToCandidateWordDistanceWeight     = 1;
                numberOfCharactersMatchingDestinationWeight = 2;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 0;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine();

                Console.WriteLine();
            }

            return;

            #region Test Code and Utility Routines

            // Read a word from the console and find its adjacent words
            String readWord = "";
            while (true)
            {
                Console.Write("Type a source word: ");
                readWord = Console.ReadLine();
                if (readWord.Equals("q"))
                {
                    break;
                }
                foreach (String currAdjacent in trieUtilities.FindAdjacentWords(allWordsTrieRoot, readWord))
                {
                    Console.WriteLine("  " + currAdjacent);
                }
            }

            // Find the total number of edges and vertices in the graph
            Int32 totalEdges = 0;
            foreach (String currWord in allWords)
            {
                foreach (String currAdj in trieUtilities.FindAdjacentWords(allWordsTrieRoot, currWord))
                {
                    totalEdges++;
                }
            }
            Console.WriteLine("Total edges: " + totalEdges / 2);
            Console.WriteLine("Total vertices: " + allWords.Count);

            // Show contents of frequency tables
            foreach (KeyValuePair <Char, Int32> currKVP in fromCharacterFrequencies)
            {
                Console.WriteLine(currKVP.Key + ": " + currKVP.Value);
            }
            Console.WriteLine("t > b: " + characterSubstitutionFrequencies.GetFrequency(new CharacterSubstitution('t', 'b')));
            Console.WriteLine("n > f: " + characterSubstitutionFrequencies.GetFrequency(new CharacterSubstitution('n', 'f')));
            foreach (KeyValuePair <CharacterSubstitution, Int32> currKVP in characterSubstitutionFrequencies)
            {
                if (currKVP.Key.ToCharacter == 'y')
                {
                    Console.WriteLine(currKVP.Key.FromCharacter + " > " + currKVP.Key.ToCharacter + ": " + currKVP.Value);
                }
            }
            foreach (String currWord in allWords)
            {
                if (currWord[1] == 'y')
                {
                    Console.WriteLine(currWord);
                }
            }

            // Compare each heuristic function in isolation
            List <Tuple <String, String> > testData2 = new List <Tuple <String, String> >()
            {
                new Tuple <String, String>("role", "band"),
                new Tuple <String, String>("pack", "sill"),
                new Tuple <String, String>("debt", "tyre"),
                new Tuple <String, String>("duct", "grid")
            };

            foreach (Tuple <String, String> currentWordPair in testData2)
            {
                // Setup priority calculator and graph path finder
                Int32 sourceWordToCandidateWordDistanceWeight      = 1;
                Int32 numberOfCharactersMatchingDestinationWeight  = 0;
                Int32 popularityOfChangeToCharacterWeight          = 0;
                Int32 popularityOfCharacterChangeWeight            = 0;
                CandidateWordPriorityCalculator priorityCalculator = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                AdjacentWordGraphPathFinder     pathFinder         = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                Int32 numberOfEdgesExplored = 0;
                Int32 maximumSourceWordToCandidateWordDistance2 = 4000;

                Console.WriteLine("-----------------------------------------------");
                Console.WriteLine("  Finding paths for strings '{0}' and '{1}'", currentWordPair.Item1, currentWordPair.Item2);
                Console.WriteLine("-----------------------------------------------");

                // Find the shortest path using A* ( 50% g(n) and 50% h(n) )
                Console.WriteLine("  1 0 0...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 1;
                popularityOfChangeToCharacterWeight         = 0;
                popularityOfCharacterChangeWeight           = 0;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance2, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                LinkedList <String> path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine("    Length of path:  {0}", path.Count);
                Console.WriteLine();

                // Find the shortest path using A* ( 0% g(n) and 100% h(n) )
                Console.WriteLine("  0 1 0...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 0;
                popularityOfChangeToCharacterWeight         = 1;
                popularityOfCharacterChangeWeight           = 0;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance2, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine("    Length of path:  {0}", path.Count);
                Console.WriteLine();

                // Find the shortest path using A* ( 25% g(n) and 75% h(n) )
                Console.WriteLine("  0 0 1...");
                sourceWordToCandidateWordDistanceWeight     = 0;
                numberOfCharactersMatchingDestinationWeight = 0;
                popularityOfChangeToCharacterWeight         = 0;
                popularityOfCharacterChangeWeight           = 1;
                priorityCalculator    = new CandidateWordPriorityCalculator(maximumSourceWordToCandidateWordDistance2, sourceWordToCandidateWordDistanceWeight, numberOfCharactersMatchingDestinationWeight, popularityOfChangeToCharacterWeight, popularityOfCharacterChangeWeight, allWordsTrieRoot, fromCharacterFrequencies, characterSubstitutionFrequencies);
                pathFinder            = new AdjacentWordGraphPathFinder(priorityCalculator, trieUtilities, allWordsTrieRoot);
                numberOfEdgesExplored = 0;
                path = pathFinder.FindPathViaAStar(currentWordPair.Item1, currentWordPair.Item2, ref numberOfEdgesExplored);
                Console.Write("    Path:  ");
                WritePathToConsole(path);
                Console.WriteLine("    Edges explored:  {0}", numberOfEdgesExplored);
                Console.WriteLine("    Length of path:  {0}", path.Count);
                Console.WriteLine();

                Console.WriteLine();
            }

            #endregion
        }