/// <summary> /// 加载纠正词典 /// </summary> private static void InitAmbiguityLibrary() { var ambiguityLibrary = MyStaticValue.AmbiguityLibrary; if (string.IsNullOrWhiteSpace(ambiguityLibrary)) { MyStaticValue.Librarylog.Warn("init ambiguity warning :" + ambiguityLibrary + " because : file not found or failed to read !"); return; } ambiguityLibrary = MyStaticValue.AmbiguityLibrary; var file = new FileInfo(ambiguityLibrary); if (file.Exists) { try { AmbiguityForest = StaticLibrary.MakeForest(ambiguityLibrary); } catch (Exception e) { MyStaticValue.Librarylog.Warn("init ambiguity error :" + new FileInfo(ambiguityLibrary).FullName + " because : not find that file or can not to read !"); Trace.WriteLine(e); } MyStaticValue.Librarylog.Info("init ambiguityLibrary ok!"); } else { MyStaticValue.Librarylog.Warn("init ambiguity warning :" + new FileInfo(ambiguityLibrary).FullName + " because : file not found or failed to read !"); } }
/// <summary> /// 传入value数组.构造树 /// </summary> /// <param name="values"></param> /// <returns></returns> public static Forest MakeForest(List<Value> values) { var forest = new Forest(); foreach (var value in values) { InsertWord(forest, value.ToString()); } return forest; }
public void Test1() { // 增加新词,中间按照'\t'隔开 UserDefineLibrary.InsertWord("ansj中文分词", "userDefine", 1000); var terms = ToAnalysis.Parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "增加新词"); } // 删除词语,只能删除.用户自定义的词典. UserDefineLibrary.RemoveWord("ansj中文分词"); terms = ToAnalysis.Parse("我觉得ansj中文分词是一个不错的系统!我是王婆!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "删除用户自定义词典例子"); } // 歧义词 var value = new Value("济南下车", "济南", "n", "下车", "v"); terms = ToAnalysis.Parse("我经济南下车到广州.中国经济南下势头迅猛!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "歧义词1"); } StaticLibrary.InsertWord(UserDefineLibrary.AmbiguityForest, value); terms = ToAnalysis.Parse("我经济南下车到广州.中国经济南下势头迅猛!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "歧义词2"); } // 多用户词典 var str = "神探夏洛克这部电影作者.是一个dota迷"; terms = ToAnalysis.Parse(str); foreach (var term in terms) { Debug.WriteLine(term.RealName, "多用户词典1"); } // 两个词汇 神探夏洛克 douta迷 var dic1 = new Forest(); StaticLibrary.InsertWord(dic1, new Value("神探夏洛克", "define", "1000")); var dic2 = new Forest(); StaticLibrary.InsertWord(dic2, new Value("dota迷", "define", "1000")); terms = ToAnalysis.Parse(str, dic1, dic2); foreach (var term in terms) { Debug.WriteLine(term.RealName, "多用户词典2"); } }
/// <summary> /// 词典树的构造方法 /// </summary> /// <param name="br"></param> /// <param name="forest"></param> /// <returns></returns> private static Forest MakeLibrary(StreamReader br, Forest forest) { if (br == null) return forest; try { string temp = null; while ((temp = br.ReadLine()) != null) { InsertWord(forest, temp); } } catch (Exception e) { Console.WriteLine(e); } finally { br.Close(); } return forest; }
/// <summary> /// 加载用户自定义词典和补充词典 /// </summary> private static void InitUserLibrary() { try { Forest = new Forest(); // 加载用户自定义词典 var userLibrary = MyStaticValue.UserLibrary; LoadLibrary(Forest, userLibrary); } catch (Exception e) { Trace.WriteLine(e); } }
public static string[] GetParams(Forest forest, string word) { IWoodInterface temp = forest; for (var i = 0; i < word.Length; i++) { temp = temp.Get(word[i]); if (temp == null) { return null; } } if (temp.Status > 1) { return temp.Param; } return null; }
/// <summary> /// 加载词典,传入一本词典的路径.或者目录.词典后缀必须为.dic /// </summary> /// <param name="forest"></param> /// <param name="path"></param> public static void LoadLibrary(Forest forest, string path) { // 加载用户自定义词典 if (path != null) { path = "Resources/" + path; var file = new FileInfo(path); if (!File.Exists(path) && !Directory.Exists(path)) { MyStaticValue.Librarylog.Warn("init userLibrary warning :" + file.FullName + " because : file not found or failed to read !"); return; } if (file.Exists) { LoadFile(forest, file); } else if (Directory.Exists(path)) { var files = new DirectoryInfo(path).GetFiles(); for (var i = 0; i < files.Length; i++) { if (files[i].Name.Trim().EndsWith(".dic")) { LoadFile(forest, files[i]); } } } else { MyStaticValue.Librarylog.Warn("init user library error :" + path + " because : not find that file !"); } } }
/// <summary> /// 单个文件加载词典 /// </summary> /// <param name="forest"></param> /// <param name="file"></param> public static void LoadFile(Forest forest, FileInfo file) { if (!file.Exists) { MyStaticValue.Librarylog.Warn("file in path " + file.FullName + " can not to read!"); return; } TextReader br = null; try { br = IOUtil.GetReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read, FileShare.Read), Encoding.UTF8); string temp; while ((temp = br.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(temp)) { } var strs = temp.Split('\t'); strs[0] = strs[0].ToLower(); // 如何核心辞典存在那么就放弃 if (MyStaticValue.IsSkipUserDefine && DatDictionary.GetId(strs[0]) > 0) { continue; } Value value; if (strs.Length != 3) { value = new Value(strs[0], DefaultNature, DefaultFreqStr); } else { value = new Value(strs[0], strs[1], strs[2]); } StaticLibrary.InsertWord(forest, value); } MyStaticValue.Librarylog.Info("init user userLibrary ok path is : " + file.FullName); } catch (Exception e) { Trace.WriteLine(e); } finally { IOUtil.Close(br); } }
public GetWord(Forest forest, char[] chars) { _chars = chars; _forest = forest; _branch = forest; }
public GetWord(Forest forest, string content) { _chars = content.ToCharArray(); _forest = forest; _branch = forest; }
public static void InsertWord(Forest forest, Value value) { InsertWord(forest, value.Keyword, value.Paramers); }
/// <summary> /// 删除一个词 /// </summary> /// <param name="forest"></param> /// <param name="word"></param> public static void RemoveWord(Forest forest, string word) { IWoodInterface branch = forest; var chars = word.ToCharArray(); for (var i = 0; i < chars.Length; i++) { if (branch == null) return; if (chars.Length == i + 1) { branch.Add(new Branch(chars[i], -1, null)); } branch = branch.Get(chars[i]); } }