public static Attribute Create(String natureWithFrequency) { try { string[] param = natureWithFrequency.Split(' '); int natureCount = param.Length / 2; Attribute attribute = new Attribute(natureCount); for (int i = 0; i < natureCount; ++i) { attribute.nature[i] = LexiconUtility.convertStringToNature(param[2 * i], null); attribute.frequency[i] = int.Parse(param[1 + 2 * i]); attribute.totalFrequency += attribute.frequency[i]; } return(attribute); } catch (Exception e) { Predefine.logger.Warn("使用字符串" + natureWithFrequency + "创建词条属性失败!" + e.Message); return(null); } }
/** * 获取本词语在HanLP词库中的频次 * @return 频次,0代表这是个OOV */ public int getFrequency() { return(LexiconUtility.getFrequency(word)); }
private static bool loadMainDictionary(String mainPath) { logger.info("自定义词典开始加载:" + mainPath); if (loadDat(mainPath)) { return(true); } Dictionary <String, CoreDictionary.Attribute> map = new Dictionary <string, CoreDictionary.Attribute>(); HashSet <Nature> customNatureCollector = new HashSet <Nature>(); try { foreach (String p in path) { Nature defaultNature = Nature.n; int cut = p.IndexOf(' '); if (cut > 0) { // 有默认词性 String nature = p.Substring(cut + 1); p = p.Substring(0, cut); try { defaultNature = LexiconUtility.convertStringToNature(nature, customNatureCollector); } catch (Exception e) { logger.severe("配置文件【" + p + "】写错了!" + e); continue; } } logger.info("以默认词性[" + defaultNature + "]加载自定义词典" + p + "中……"); bool success = load(p, defaultNature, map, customNatureCollector); if (!success) { logger.warning("失败:" + p); } } if (map.Count == 0) { logger.warning("没有加载到任何词条"); map.put(Predefine.TAG_OTHER, null); // 当作空白占位符 } logger.info("正在构建DoubleArrayTrie……"); dat.build(map); // 缓存成dat文件,下次加载会快很多 logger.info("正在缓存词典为dat文件……"); // 缓存值文件 List <CoreDictionary.Attribute> attributeList = new List <CoreDictionary.Attribute>(); foreach (KeyValuePair <String, CoreDictionary.Attribute> entry in map) { attributeList.Add(entry.Value); } DataOutputStream out = new DataOutputStream(new FileOutputStream(mainPath + Predefine.BIN_EXT)); // 缓存用户词性 IOUtil.writeCustomNature(out, customNatureCollector); // 缓存正文 out.writeInt(attributeList.size()); for (CoreDictionary.Attribute attribute : attributeList) { out.writeInt(attribute.totalFrequency); out.writeInt(attribute.nature.length);