コード例 #1
0
        /// <summary>
        /// Writes a single probability hash entry as a line
        /// in the data file
        /// </summary>
        /// <param name="data">Writing state structure</param>
        private static void WriteLine(CorpusWriteData data)
        {
            StringBuilder sb = new StringBuilder();

            // String each key word together, separating them
            // with an item delimiter constant
            for (int i = 0; i < data.Key.Length; i++)
            {
                sb.Append(data.Key[i]);
                if (i < data.Key.Length - 1)
                    sb.Append(ItemDelimiter);
            }

            // Add the sub-topic to the key portion of the line
            sb.Append(TopicDelimiter)
                .Append(data.Key.Topic)
                .Append(ListDelimiter);

            // String the values together along with their probabilities.
            for (int i = 0; i < data.ProbabilityHash[data.Key].Count; i++)
            {
                var word = data.ProbabilityHash[data.Key][i];
                sb.Append(word.Key)
                    .Append(ProbabilityDelimiter)
                    .Append(word.Value);
                if (i < data.ProbabilityHash[data.Key].Count - 1)
                    sb.Append(ItemDelimiter);
            }

            data.Writer.WriteLine(sb.ToString());
        }
コード例 #2
0
        /// <summary>
        /// Serializes a probability hash and writes it to disk.
        /// </summary>
        /// <param name="topic"></param>
        /// <param name="probabilityHash"></param>
        public static void Write(string topic, ProbabilityHash probabilityHash)
        {
            // Mangle the topic name to save the file with no spaces
            // in the filename.
            topic = topic.ToLower().Replace(" ", "-");

            // This was a rushed design decision. I actually change the working directory to
            // get to the corpus data. A lock needs to be held because the client can be multithreaded,
            // and a race condition could cause the directory to be entered twice.
            lock (DataDirectories.DirectoryControl)
            {
                // Create the corpus directory if it doesn't exist already.
                DataDirectories.ForceEnterDirectory(DirectoryName);

                // Create the filename with string substitution
                string filename = String.Format(FilenamePattern, topic, probabilityHash.Order);

                // I can't think of a reason why the filename would exist
                // already, but just in case, delete it.
                if (File.Exists(filename))
                    File.Delete(filename);

                using (var writer = new StreamWriter(filename, false))
                {
                    // Use a struct to simplify argument passing
                    var data = new CorpusWriteData()
                    {
                        Writer = writer,
                        ProbabilityHash = probabilityHash
                    };

                    // Write each probability hash entry as a
                    // line in the data file
                    foreach (var key in probabilityHash.Keys)
                    {
                        data.Key = key;
                        WriteLine(data);
                    }
                }

                DataDirectories.LeaveDirectory();
            }
        }