예제 #1
0
 /**
  * 从HanLP的词库中提取某个单词的属性(包括核心词典和用户词典)
  *
  * @param word 单词
  * @return 包含词性与频次的信息
  */
 public static CoreDictionary.Attribute getAttribute(String word)
 {
     CoreDictionary.Attribute attribute = CoreDictionary.get(word);
     if (attribute != null)
     {
         return(attribute);
     }
     return(CustomDictionary.get(word));
 }
예제 #2
0
 /**
  * 自动构造一个合理的顶点
  *
  * @param realWord
  */
 public Vertex(String realWord)
     : this(null, realWord, CoreDictionary.get(realWord))
 {
 }
예제 #3
0
        /**
         * 将原词转为等效词串
         * @param realWord 原来的词
         * @param attribute 等效词串
         * @return
         */
        private String compileRealWord(String realWord, CoreDictionary.Attribute attribute)
        {
            if (attribute.nature.Length == 1)
            {
                switch (attribute.nature[0])
                {
                case Nature.nr:
                case Nature.nr1:
                case Nature.nr2:
                case Nature.nrf:
                case Nature.nrj:
                {
                    wordID = CoreDictionary.NR_WORD_ID;
                    //                    this.attribute = CoreDictionary.get(CoreDictionary.NR_WORD_ID);
                    return(Predefine.TAG_PEOPLE);
                }

                case Nature.ns:
                case Nature.nsf:
                {
                    wordID = CoreDictionary.NS_WORD_ID;
                    // 在地名识别的时候,希望类似"河镇"的词语保持自己的词性,而不是未##地的词性
                    //                    this.attribute = CoreDictionary.get(CoreDictionary.NS_WORD_ID);
                    return(Predefine.TAG_PLACE);
                }

                //                case nz:
                case Nature.nx:
                {
                    wordID         = CoreDictionary.NX_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.NX_WORD_ID);
                    return(Predefine.TAG_PROPER);
                }

                case Nature.nt:
                case Nature.ntc:
                case Nature.ntcf:
                case Nature.ntcb:
                case Nature.ntch:
                case Nature.nto:
                case Nature.ntu:
                case Nature.nts:
                case Nature.nth:
                case Nature.nit:
                {
                    wordID         = CoreDictionary.NT_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.NT_WORD_ID);
                    return(Predefine.TAG_GROUP);
                }

                case Nature.m:
                case Nature.mq:
                {
                    wordID         = CoreDictionary.M_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.M_WORD_ID);
                    return(Predefine.TAG_NUMBER);
                }

                case Nature.x:
                {
                    wordID         = CoreDictionary.X_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.X_WORD_ID);
                    return(Predefine.TAG_CLUSTER);
                }

                //                case xx:
                //                case w:
                //                {
                //                    word= Predefine.TAG_OTHER;
                //                }
                //                break;
                case Nature.t:
                {
                    wordID         = CoreDictionary.T_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.T_WORD_ID);
                    return(Predefine.TAG_TIME);
                }
                }
            }

            return(realWord);
        }