/// <summary> /// 关键词增加 /// </summary> /// <param name="keyword">所要增加的关键词</param> /// <param name="nature">关键词的词性</param> /// <param name="freq">关键词的词频</param> public static void InsertWord(string keyword, string nature, int freq) { var paramers = new string[2]; paramers[0] = nature; paramers[1] = freq.ToString(); var value = new Value(keyword, paramers); StaticLibrary.InsertWord(Forest, value); }
public void Test1() { // 增加新词,中间按照'\t'隔开 UserDefineLibrary.InsertWord("ansj中文分词", "userDefine", 1000); var terms = ToAnalysis.Parse("我觉得Ansj中文分词是一个不错的系统!我是王婆!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "增加新词"); } // 删除词语,只能删除.用户自定义的词典. UserDefineLibrary.RemoveWord("ansj中文分词"); terms = ToAnalysis.Parse("我觉得ansj中文分词是一个不错的系统!我是王婆!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "删除用户自定义词典例子"); } // 歧义词 var value = new Value("济南下车", "济南", "n", "下车", "v"); terms = ToAnalysis.Parse("我经济南下车到广州.中国经济南下势头迅猛!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "歧义词1"); } StaticLibrary.InsertWord(UserDefineLibrary.AmbiguityForest, value); terms = ToAnalysis.Parse("我经济南下车到广州.中国经济南下势头迅猛!"); foreach (var term in terms) { Debug.WriteLine(term.RealName, "歧义词2"); } // 多用户词典 var str = "神探夏洛克这部电影作者.是一个dota迷"; terms = ToAnalysis.Parse(str); foreach (var term in terms) { Debug.WriteLine(term.RealName, "多用户词典1"); } // 两个词汇 神探夏洛克 douta迷 var dic1 = new Forest(); StaticLibrary.InsertWord(dic1, new Value("神探夏洛克", "define", "1000")); var dic2 = new Forest(); StaticLibrary.InsertWord(dic2, new Value("dota迷", "define", "1000")); terms = ToAnalysis.Parse(str, dic1, dic2); foreach (var term in terms) { Debug.WriteLine(term.RealName, "多用户词典2"); } }
/// <summary> /// 单个文件加载词典 /// </summary> /// <param name="forest"></param> /// <param name="file"></param> public static void LoadFile(Forest forest, FileInfo file) { if (!file.Exists) { MyStaticValue.Librarylog.Warn("file in path " + file.FullName + " can not to read!"); return; } TextReader br = null; try { br = IOUtil.GetReader(new FileStream(file.FullName, FileMode.Open, FileAccess.Read, FileShare.Read), Encoding.UTF8); string temp; while ((temp = br.ReadLine()) != null) { if (string.IsNullOrWhiteSpace(temp)) { } var strs = temp.Split('\t'); strs[0] = strs[0].ToLower(); // 如何核心辞典存在那么就放弃 if (MyStaticValue.IsSkipUserDefine && DatDictionary.GetId(strs[0]) > 0) { continue; } Value value; if (strs.Length != 3) { value = new Value(strs[0], DefaultNature, DefaultFreqStr); } else { value = new Value(strs[0], strs[1], strs[2]); } StaticLibrary.InsertWord(forest, value); } MyStaticValue.Librarylog.Info("init user userLibrary ok path is : " + file.FullName); } catch (Exception e) { Trace.WriteLine(e); } finally { IOUtil.Close(br); } }
public static void InsertWord(Forest forest, Value value) { InsertWord(forest, value.Keyword, value.Paramers); }