예제 #1
0
 static void BuildDawgFile(string file)
 {
     var rootPath = AppDomain.CurrentDomain.BaseDirectory.Replace(@"\test\bin\Debug\", "");
     var wordUtil = new WordDict();
     //加载默认的词频
     using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.freq", Encoding.UTF8))
     {
         string line = null;
         while ((line = sr.ReadLine()) != null)
         {
             if (line == string.Empty) continue;
             var array = line.Split(' ');
             wordUtil.Add(array[0], int.Parse(array[1]));
         }
     }
     //加载新的词典
     using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.dic", Encoding.UTF8))
     {
         string line = null;
         while ((line = sr.ReadLine()) != null)
         {
             if (line == string.Empty) continue;
             wordUtil.Add(line);
         }
     }
     //保存新的dawg文件
     wordUtil.SaveTo(file);
 }
예제 #2
0
        static void BuildDawgFile(string file)
        {
            var rootPath = AppDomain.CurrentDomain.BaseDirectory.Replace(@"\test\bin\Debug\", "");
            var wordUtil = new WordDict();

            //加载默认的词频
            using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.freq", Encoding.UTF8))
            {
                string line = null;
                while ((line = sr.ReadLine()) != null)
                {
                    if (line == string.Empty)
                    {
                        continue;
                    }
                    var array = line.Split(' ');
                    wordUtil.Add(array[0], int.Parse(array[1]));
                }
            }
            //加载新的词典
            using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.dic", Encoding.UTF8))
            {
                string line = null;
                while ((line = sr.ReadLine()) != null)
                {
                    if (line == string.Empty)
                    {
                        continue;
                    }
                    wordUtil.Add(line);
                }
            }
            //保存新的dawg文件
            wordUtil.SaveTo(file);
        }
예제 #3
0
        public void TestFromTxtFile(string file)
        {
            var wordUtil = new WordDict();
            var expectWordCount = 0;
            using (var sr = new StreamReader(file, Encoding.UTF8))
            {
                string line = null;
                while ((line = sr.ReadLine()) != null)
                {
                    if (line == string.Empty) continue;
                    wordUtil.Add(line);
                    expectWordCount++;
                }
            }

            var watcher = new System.Diagnostics.Stopwatch();
            watcher.Start();
            var ms = new MemoryStream();
            wordUtil.SaveTo(ms);
            watcher.Stop();

            Console.WriteLine("build dawg elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms");

            watcher.Reset();
            watcher.Start();
            ms.Position = 0;
            wordUtil = WordDict.LoadFrom(ms);
            watcher.Stop();
            Console.WriteLine("load dawg file elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms");
            Assert.AreEqual(expectWordCount, wordUtil.Count);
        }
예제 #4
0
 public static void AddWords(this WordDict wordUtil, params string[] words)
 {
     foreach (var word in words)
     {
         wordUtil.Add(word);
     }
 }
예제 #5
0
        /// <summary>
        /// Parse word data from a string and add it to the dictionary.
        /// </summary>
        /// <param name="str"></param>
        /// <returns></returns>
        public bool AddWord(string str)
        {
            if (string.IsNullOrEmpty(str))
            {
                return(false);
            }

            string[] parts = str.Split(' ');
            if (parts?.Length >= 3)
            {
                var key  = parts[0]; // 字詞
                var freq = Convert.ToInt32(parts[1]);

                // 如果字詞已經存在表中,則比較頻率。若出現頻率大於或等於既有的字詞,則覆蓋之。
                WordData word = null;
                if (WordDict.ContainsKey(key)) // 字詞已經存在表中
                {
                    word = WordDict[key];
                    if (freq < word.Frequency)
                    {
                        return(false);
                    }
                    word.ZhuyinList.Clear();
                }
                else
                {
                    word = new WordData();
                    WordDict.Add(key, word);
                }

                word.Frequency = freq;
                for (int i = 2; i < parts.Length; i++)
                {
                    word.ZhuyinList.Add(parts[i]);
                }
                Logger.Verbose("加入字詞: {Key} {@Word}", key, word);
                return(true);
            }
            Logger.Warning($"無效的字詞: {str}");
            return(false);
        }
예제 #6
0
        public void TestFromTxtFile(string file)
        {
            var wordUtil        = new WordDict();
            var expectWordCount = 0;

            using (var sr = new StreamReader(file, Encoding.UTF8))
            {
                string line = null;
                while ((line = sr.ReadLine()) != null)
                {
                    if (line == string.Empty)
                    {
                        continue;
                    }
                    wordUtil.Add(line);
                    expectWordCount++;
                }
            }

            var watcher = new System.Diagnostics.Stopwatch();

            watcher.Start();
            var ms = new MemoryStream();

            wordUtil.SaveTo(ms);
            watcher.Stop();

            Console.WriteLine("build dawg elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms");

            watcher.Reset();
            watcher.Start();
            ms.Position = 0;
            wordUtil    = WordDict.LoadFrom(ms);
            watcher.Stop();
            Console.WriteLine("load dawg file elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms");
            Assert.AreEqual(expectWordCount, wordUtil.Count);
        }