/** * 从HanLP的词库中提取某个单词的属性(包括核心词典和用户词典) * * @param word 单词 * @return 包含词性与频次的信息 */ public static CoreDictionary.Attribute getAttribute(String word) { CoreDictionary.Attribute attribute = CoreDictionary.get(word); if (attribute != null) { return(attribute); } return(CustomDictionary.get(word)); }
/** * 自动构造一个合理的顶点 * * @param realWord */ public Vertex(String realWord) : this(null, realWord, CoreDictionary.get(realWord)) { }
/** * 将原词转为等效词串 * @param realWord 原来的词 * @param attribute 等效词串 * @return */ private String compileRealWord(String realWord, CoreDictionary.Attribute attribute) { if (attribute.nature.Length == 1) { switch (attribute.nature[0]) { case Nature.nr: case Nature.nr1: case Nature.nr2: case Nature.nrf: case Nature.nrj: { wordID = CoreDictionary.NR_WORD_ID; // this.attribute = CoreDictionary.get(CoreDictionary.NR_WORD_ID); return(Predefine.TAG_PEOPLE); } case Nature.ns: case Nature.nsf: { wordID = CoreDictionary.NS_WORD_ID; // 在地名识别的时候,希望类似"河镇"的词语保持自己的词性,而不是未##地的词性 // this.attribute = CoreDictionary.get(CoreDictionary.NS_WORD_ID); return(Predefine.TAG_PLACE); } // case nz: case Nature.nx: { wordID = CoreDictionary.NX_WORD_ID; this.attribute = CoreDictionary.get(CoreDictionary.NX_WORD_ID); return(Predefine.TAG_PROPER); } case Nature.nt: case Nature.ntc: case Nature.ntcf: case Nature.ntcb: case Nature.ntch: case Nature.nto: case Nature.ntu: case Nature.nts: case Nature.nth: case Nature.nit: { wordID = CoreDictionary.NT_WORD_ID; this.attribute = CoreDictionary.get(CoreDictionary.NT_WORD_ID); return(Predefine.TAG_GROUP); } case Nature.m: case Nature.mq: { wordID = CoreDictionary.M_WORD_ID; this.attribute = CoreDictionary.get(CoreDictionary.M_WORD_ID); return(Predefine.TAG_NUMBER); } case Nature.x: { wordID = CoreDictionary.X_WORD_ID; this.attribute = CoreDictionary.get(CoreDictionary.X_WORD_ID); return(Predefine.TAG_CLUSTER); } // case xx: // case w: // { // word= Predefine.TAG_OTHER; // } // break; case Nature.t: { wordID = CoreDictionary.T_WORD_ID; this.attribute = CoreDictionary.get(CoreDictionary.T_WORD_ID); return(Predefine.TAG_TIME); } } } return(realWord); }