/// <summary> /// Writes a single probability hash entry as a line /// in the data file /// </summary> /// <param name="data">Writing state structure</param> private static void WriteLine(CorpusWriteData data) { StringBuilder sb = new StringBuilder(); // String each key word together, separating them // with an item delimiter constant for (int i = 0; i < data.Key.Length; i++) { sb.Append(data.Key[i]); if (i < data.Key.Length - 1) sb.Append(ItemDelimiter); } // Add the sub-topic to the key portion of the line sb.Append(TopicDelimiter) .Append(data.Key.Topic) .Append(ListDelimiter); // String the values together along with their probabilities. for (int i = 0; i < data.ProbabilityHash[data.Key].Count; i++) { var word = data.ProbabilityHash[data.Key][i]; sb.Append(word.Key) .Append(ProbabilityDelimiter) .Append(word.Value); if (i < data.ProbabilityHash[data.Key].Count - 1) sb.Append(ItemDelimiter); } data.Writer.WriteLine(sb.ToString()); }
/// <summary> /// Serializes a probability hash and writes it to disk. /// </summary> /// <param name="topic"></param> /// <param name="probabilityHash"></param> public static void Write(string topic, ProbabilityHash probabilityHash) { // Mangle the topic name to save the file with no spaces // in the filename. topic = topic.ToLower().Replace(" ", "-"); // This was a rushed design decision. I actually change the working directory to // get to the corpus data. A lock needs to be held because the client can be multithreaded, // and a race condition could cause the directory to be entered twice. lock (DataDirectories.DirectoryControl) { // Create the corpus directory if it doesn't exist already. DataDirectories.ForceEnterDirectory(DirectoryName); // Create the filename with string substitution string filename = String.Format(FilenamePattern, topic, probabilityHash.Order); // I can't think of a reason why the filename would exist // already, but just in case, delete it. if (File.Exists(filename)) File.Delete(filename); using (var writer = new StreamWriter(filename, false)) { // Use a struct to simplify argument passing var data = new CorpusWriteData() { Writer = writer, ProbabilityHash = probabilityHash }; // Write each probability hash entry as a // line in the data file foreach (var key in probabilityHash.Keys) { data.Key = key; WriteLine(data); } } DataDirectories.LeaveDirectory(); } }