Esempio n. 1
0
 public static Attribute Create(String natureWithFrequency)
 {
     try {
         string[]  param       = natureWithFrequency.Split(' ');
         int       natureCount = param.Length / 2;
         Attribute attribute   = new Attribute(natureCount);
         for (int i = 0; i < natureCount; ++i)
         {
             attribute.nature[i]       = LexiconUtility.convertStringToNature(param[2 * i], null);
             attribute.frequency[i]    = int.Parse(param[1 + 2 * i]);
             attribute.totalFrequency += attribute.frequency[i];
         }
         return(attribute);
     }
     catch (Exception e) {
         Predefine.logger.Warn("使用字符串" + natureWithFrequency + "创建词条属性失败!" + e.Message);
         return(null);
     }
 }
Esempio n. 2
0
 /**
  * 获取本词语在HanLP词库中的频次
  * @return 频次,0代表这是个OOV
  */
 public int getFrequency()
 {
     return(LexiconUtility.getFrequency(word));
 }
Esempio n. 3
0
        private static bool loadMainDictionary(String mainPath)
        {
            logger.info("自定义词典开始加载:" + mainPath);
            if (loadDat(mainPath))
            {
                return(true);
            }
            Dictionary <String, CoreDictionary.Attribute> map = new Dictionary <string, CoreDictionary.Attribute>();
            HashSet <Nature> customNatureCollector            = new HashSet <Nature>();

            try
            {
                foreach (String p in path)
                {
                    Nature defaultNature = Nature.n;
                    int    cut           = p.IndexOf(' ');
                    if (cut > 0)
                    {
                        // 有默认词性
                        String nature = p.Substring(cut + 1);
                        p = p.Substring(0, cut);
                        try
                        {
                            defaultNature = LexiconUtility.convertStringToNature(nature, customNatureCollector);
                        }
                        catch (Exception e)
                        {
                            logger.severe("配置文件【" + p + "】写错了!" + e);
                            continue;
                        }
                    }
                    logger.info("以默认词性[" + defaultNature + "]加载自定义词典" + p + "中……");
                    bool success = load(p, defaultNature, map, customNatureCollector);
                    if (!success)
                    {
                        logger.warning("失败:" + p);
                    }
                }
                if (map.Count == 0)
                {
                    logger.warning("没有加载到任何词条");
                    map.put(Predefine.TAG_OTHER, null); // 当作空白占位符
                }
                logger.info("正在构建DoubleArrayTrie……");
                dat.build(map);
                // 缓存成dat文件,下次加载会快很多
                logger.info("正在缓存词典为dat文件……");
                // 缓存值文件
                List <CoreDictionary.Attribute> attributeList = new List <CoreDictionary.Attribute>();
                foreach (KeyValuePair <String, CoreDictionary.Attribute> entry in map)
                {
                    attributeList.Add(entry.Value);
                }
                DataOutputStream out = new DataOutputStream(new FileOutputStream(mainPath + Predefine.BIN_EXT));
                // 缓存用户词性
                IOUtil.writeCustomNature(out, customNatureCollector);
                // 缓存正文
                out.writeInt(attributeList.size());
                for (CoreDictionary.Attribute attribute : attributeList)
                {
                    out.writeInt(attribute.totalFrequency);
                    out.writeInt(attribute.nature.length);