/// <summary>
        /// Generate a probability hash from raw text data, and serialize
        /// the hash for later use.
        /// </summary>
        /// <param name="topic"></param>
        /// <param name="order"></param>
        /// <returns></returns>
        public static ProbabilityHash Parse(string topic, int order)
        {
            var probabilityHash = new ProbabilityHash(order);

            // This was a rushed design decision. I actually change the working directory to
            // get to the corpus data. A lock needs to be held because the client can be multithreaded,
            // and a race condition could cause the directory to be entered twice.
            lock (DataDirectories.DirectoryControl)
            {
                DataDirectories.EnterDirectory(RawTextWriter.DirectoryName);

                // Find all subtopic files for the given topic
                var filenames = FindFilenames(topic);

                // If there are no subtopic files, then the topic must not
                // exist on disk.
                if (filenames.Count == 0)
                {
                    DataDirectories.LeaveDirectory();
                    throw new FileNotFoundException();
                }

                // Read each subtopic into the probability hash
                foreach (var subtopic in filenames.Keys)
                {
                    var data = new RawTextParseData()
                    {
                        ProbabilityHash = probabilityHash,
                        PreviousWords = new List<string>(),
                        Subtopic = subtopic
                    };

                    using (var file = File.OpenText(filenames[subtopic]))
                    {
                        while (!file.EndOfStream)
                        {
                            data.Line = file.ReadLine();
                            ParseLine(data);
                        }
                    }

                }

                // Finalize the probability hash so it can be used
                // in the Markov chain.
                probabilityHash.Finalize();
                DataDirectories.LeaveDirectory();
            }

            // Produce a serialized version of the probability hash for later use.
            CorpusWriter.Write(topic, probabilityHash);
            return probabilityHash;
        }
        /// <summary>
        /// Generate Markov states and state transitions from a line of raw text data.
        /// </summary>
        /// <param name="data"></param>
        private static void ParseLine(RawTextParseData data)
        {
            // Remove citation references from the line
            string line = wikiReferencePattern.Replace(data.Line, "");

            // Clean the words of other unwanted characters, such as double quotes.
            var words = TextCorpus.GetCleanWords(line);

            foreach (string word in words)
            {
                if (data.PreviousWords.Count < data.ProbabilityHash.Order)
                {
                    // We first have to find the first state by populating the
                    // "PreviousWords" list
                    data.PreviousWords.Add(word);
                }
                else
                {
                    // Once we have an initial state, we can start generating transition edges
                    // and new states based on each successive word
                    data.ProbabilityHash.Add(word, data.Subtopic, data.PreviousWords);
                    data.PreviousWords.Add(word);
                    data.PreviousWords.RemoveAt(0);
                }
            }
        }