static void BuildDawgFile(string file) { var rootPath = AppDomain.CurrentDomain.BaseDirectory.Replace(@"\test\bin\Debug\", ""); var wordUtil = new WordDict(); //加载默认的词频 using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.freq", Encoding.UTF8)) { string line = null; while ((line = sr.ReadLine()) != null) { if (line == string.Empty) continue; var array = line.Split(' '); wordUtil.Add(array[0], int.Parse(array[1])); } } //加载新的词典 using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.dic", Encoding.UTF8)) { string line = null; while ((line = sr.ReadLine()) != null) { if (line == string.Empty) continue; wordUtil.Add(line); } } //保存新的dawg文件 wordUtil.SaveTo(file); }
static void BuildDawgFile(string file) { var rootPath = AppDomain.CurrentDomain.BaseDirectory.Replace(@"\test\bin\Debug\", ""); var wordUtil = new WordDict(); //加载默认的词频 using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.freq", Encoding.UTF8)) { string line = null; while ((line = sr.ReadLine()) != null) { if (line == string.Empty) { continue; } var array = line.Split(' '); wordUtil.Add(array[0], int.Parse(array[1])); } } //加载新的词典 using (var sr = new StreamReader(rootPath + @"\dict\cwsharp.dic", Encoding.UTF8)) { string line = null; while ((line = sr.ReadLine()) != null) { if (line == string.Empty) { continue; } wordUtil.Add(line); } } //保存新的dawg文件 wordUtil.SaveTo(file); }
public void TestFromTxtFile(string file) { var wordUtil = new WordDict(); var expectWordCount = 0; using (var sr = new StreamReader(file, Encoding.UTF8)) { string line = null; while ((line = sr.ReadLine()) != null) { if (line == string.Empty) continue; wordUtil.Add(line); expectWordCount++; } } var watcher = new System.Diagnostics.Stopwatch(); watcher.Start(); var ms = new MemoryStream(); wordUtil.SaveTo(ms); watcher.Stop(); Console.WriteLine("build dawg elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms"); watcher.Reset(); watcher.Start(); ms.Position = 0; wordUtil = WordDict.LoadFrom(ms); watcher.Stop(); Console.WriteLine("load dawg file elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms"); Assert.AreEqual(expectWordCount, wordUtil.Count); }
public static void AddWords(this WordDict wordUtil, params string[] words) { foreach (var word in words) { wordUtil.Add(word); } }
/// <summary> /// Parse word data from a string and add it to the dictionary. /// </summary> /// <param name="str"></param> /// <returns></returns> public bool AddWord(string str) { if (string.IsNullOrEmpty(str)) { return(false); } string[] parts = str.Split(' '); if (parts?.Length >= 3) { var key = parts[0]; // 字詞 var freq = Convert.ToInt32(parts[1]); // 如果字詞已經存在表中,則比較頻率。若出現頻率大於或等於既有的字詞,則覆蓋之。 WordData word = null; if (WordDict.ContainsKey(key)) // 字詞已經存在表中 { word = WordDict[key]; if (freq < word.Frequency) { return(false); } word.ZhuyinList.Clear(); } else { word = new WordData(); WordDict.Add(key, word); } word.Frequency = freq; for (int i = 2; i < parts.Length; i++) { word.ZhuyinList.Add(parts[i]); } Logger.Verbose("加入字詞: {Key} {@Word}", key, word); return(true); } Logger.Warning($"無效的字詞: {str}"); return(false); }
public void TestFromTxtFile(string file) { var wordUtil = new WordDict(); var expectWordCount = 0; using (var sr = new StreamReader(file, Encoding.UTF8)) { string line = null; while ((line = sr.ReadLine()) != null) { if (line == string.Empty) { continue; } wordUtil.Add(line); expectWordCount++; } } var watcher = new System.Diagnostics.Stopwatch(); watcher.Start(); var ms = new MemoryStream(); wordUtil.SaveTo(ms); watcher.Stop(); Console.WriteLine("build dawg elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms"); watcher.Reset(); watcher.Start(); ms.Position = 0; wordUtil = WordDict.LoadFrom(ms); watcher.Stop(); Console.WriteLine("load dawg file elapsed time:" + watcher.Elapsed.TotalMilliseconds + "'ms"); Assert.AreEqual(expectWordCount, wordUtil.Count); }