Ejemplo n.º 1
0
 /**
  * 获取某个单词的词频
  * @param word
  * @return
  */
 public static int getFrequency(String word)
 {
     CoreDictionary.Attribute attribute = getAttribute(word);
     if (attribute == null)
     {
         return(0);
     }
     return(attribute.totalFrequency);
 }
Ejemplo n.º 2
0
 /**
  * 从HanLP的词库中提取某个单词的属性(包括核心词典和用户词典)
  *
  * @param word 单词
  * @return 包含词性与频次的信息
  */
 public static CoreDictionary.Attribute getAttribute(String word)
 {
     CoreDictionary.Attribute attribute = CoreDictionary.get(word);
     if (attribute != null)
     {
         return(attribute);
     }
     return(CustomDictionary.get(word));
 }
Ejemplo n.º 3
0
        /**
         * 设置某个单词的属性
         * @param word
         * @param natures
         * @return
         */
        public static bool setAttribute(String word, params Nature[] natures)
        {
            if (natures == null)
            {
                return(false);
            }

            CoreDictionary.Attribute attribute = new CoreDictionary.Attribute(natures, new int[natures.Length]);
            Arrays.fill(attribute.frequency, 1);

            return(setAttribute(word, attribute));
        }
Ejemplo n.º 4
0
 /**
  * 从磁盘加载双数组
  *
  * @param path
  * @return
  */
 static bool loadDat(String path)
 {
     try
     {
         ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
         if (byteArray == null)
         {
             return(false);
         }
         int size = byteArray.nextInt();
         CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
         final Nature[] natureIndexArray = Nature.values();
Ejemplo n.º 5
0
 public Vertex(String word, String realWord, CoreDictionary.Attribute attribute, int wordID)
 {
     if (attribute == null)
     {
         attribute = new CoreDictionary.Attribute(Nature.n, 1);                      // 安全起见
     }
     this.wordID    = wordID;
     this.attribute = attribute;
     if (word == null)
     {
         word = compileRealWord(realWord, attribute);
     }
     //assert realWord.length() > 0 : "构造空白节点会导致死循环!";
     this.word     = word;
     this.realWord = realWord;
 }
Ejemplo n.º 6
0
        /**
         * 将属性的词性锁定为nature
         *
         * @param nature 词性
         * @return 如果锁定词性在词性列表中,返回真,否则返回假
         */
        public bool confirmNature(Nature nature)
        {
            if (attribute.nature.Length == 1 && attribute.nature[0] == nature)
            {
                return(true);
            }
            bool result    = true;
            int  frequency = attribute.getNatureFrequency(nature);

            if (frequency == 0)
            {
                frequency = 1000;
                result    = false;
            }
            attribute = new CoreDictionary.Attribute(nature, frequency);
            return(result);
        }
Ejemplo n.º 7
0
        /**
         * 设置某个单词的属性
         * @param word
         * @param attribute
         * @return
         */
        public static bool setAttribute(String word, CoreDictionary.Attribute attribute)
        {
            if (attribute == null)
            {
                return(false);
            }

            if (CoreDictionary.trie.set(word, attribute))
            {
                return(true);
            }
            if (CustomDictionary.dat.set(word, attribute))
            {
                return(true);
            }
            CustomDictionary.trie.put(word, attribute);
            return(true);
        }
Ejemplo n.º 8
0
 /**
  * 将一个BufferedReader中的词条加载到词典
  * @param br 源
  * @param storage 储存位置
  * @throws IOException 异常表示加载失败
  */
 public static void loadDictionary(StreamReader br, Dictionary<String, CoreDictionary.Attribute> storage)
 {
     String line;
     while ((line = br.ReadLine()) != null)
     {
         String[] param = line.Split(new string[] { "\\s" }, StringSplitOptions.None);
         int natureCount = (param.Length - 1) / 2;
         CoreDictionary.Attribute attribute = new CoreDictionary.Attribute(natureCount);
         //for (int i = 0; i < natureCount; ++i)
         //{
         //    attribute.nature[i] = Enum.valueOf(Nature.class, param[1 + 2 * i]);
         //        attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]);
         //        attribute.totalFrequency += attribute.frequency[i];
         //    }
         //    storage.put(param[0], attribute);
         //}
     }
     //br.close();
 }
Ejemplo n.º 9
0
 /**
  * 最复杂的构造函数
  *
  * @param word      编译后的词
  * @param realWord  真实词
  * @param attribute 属性
  */
 public Vertex(String word, String realWord, CoreDictionary.Attribute attribute)
     : this(word, realWord, attribute, -1)
 {
 }
Ejemplo n.º 10
0
 public Vertex(char realWord, CoreDictionary.Attribute attribute)
     : this(realWord.ToString(), attribute)
 {
 }
Ejemplo n.º 11
0
 public Vertex(String realWord, CoreDictionary.Attribute attribute, int wordID)
     : this(null, realWord, attribute, wordID)
 {
 }
Ejemplo n.º 12
0
 /**
  * 真实词与编译词相同时候的构造函数
  *
  * @param realWord
  * @param attribute
  */
 public Vertex(String realWord, CoreDictionary.Attribute attribute)
     : this(null, realWord, attribute)
 {
 }
Ejemplo n.º 13
0
        /**
         * 将原词转为等效词串
         * @param realWord 原来的词
         * @param attribute 等效词串
         * @return
         */
        private String compileRealWord(String realWord, CoreDictionary.Attribute attribute)
        {
            if (attribute.nature.Length == 1)
            {
                switch (attribute.nature[0])
                {
                case Nature.nr:
                case Nature.nr1:
                case Nature.nr2:
                case Nature.nrf:
                case Nature.nrj:
                {
                    wordID = CoreDictionary.NR_WORD_ID;
                    //                    this.attribute = CoreDictionary.get(CoreDictionary.NR_WORD_ID);
                    return(Predefine.TAG_PEOPLE);
                }

                case Nature.ns:
                case Nature.nsf:
                {
                    wordID = CoreDictionary.NS_WORD_ID;
                    // 在地名识别的时候,希望类似"河镇"的词语保持自己的词性,而不是未##地的词性
                    //                    this.attribute = CoreDictionary.get(CoreDictionary.NS_WORD_ID);
                    return(Predefine.TAG_PLACE);
                }

                //                case nz:
                case Nature.nx:
                {
                    wordID         = CoreDictionary.NX_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.NX_WORD_ID);
                    return(Predefine.TAG_PROPER);
                }

                case Nature.nt:
                case Nature.ntc:
                case Nature.ntcf:
                case Nature.ntcb:
                case Nature.ntch:
                case Nature.nto:
                case Nature.ntu:
                case Nature.nts:
                case Nature.nth:
                case Nature.nit:
                {
                    wordID         = CoreDictionary.NT_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.NT_WORD_ID);
                    return(Predefine.TAG_GROUP);
                }

                case Nature.m:
                case Nature.mq:
                {
                    wordID         = CoreDictionary.M_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.M_WORD_ID);
                    return(Predefine.TAG_NUMBER);
                }

                case Nature.x:
                {
                    wordID         = CoreDictionary.X_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.X_WORD_ID);
                    return(Predefine.TAG_CLUSTER);
                }

                //                case xx:
                //                case w:
                //                {
                //                    word= Predefine.TAG_OTHER;
                //                }
                //                break;
                case Nature.t:
                {
                    wordID         = CoreDictionary.T_WORD_ID;
                    this.attribute = CoreDictionary.get(CoreDictionary.T_WORD_ID);
                    return(Predefine.TAG_TIME);
                }
                }
            }

            return(realWord);
        }
Ejemplo n.º 14
0
 /**
  * 设置某个单词的属性
  * @param word
  * @param natureWithFrequency
  * @return
  */
 public static bool setAttribute(String word, String natureWithFrequency)
 {
     CoreDictionary.Attribute attribute = CoreDictionary.Attribute.create(natureWithFrequency);
     return(setAttribute(word, attribute));
 }
Ejemplo n.º 15
0
        /**
         * 使用用户词典合并粗分结果
         * @param vertexList 粗分结果
         * @return 合并后的结果
         */
        protected static LinkedList <Vertex> combineByCustomDictionary(LinkedList <Vertex> vertexList)
        {
            Vertex[] wordNet = vertexList.ToArray();
            // DAT合并
            DoubleArrayTrie <CoreDictionary.Attribute> dat = CustomDictionary.dat;

            for (int i = 0; i < wordNet.Length; ++i)
            {
                int state = 1;
                state = dat.transition(wordNet[i].realWord, state);
                if (state > 0)
                {
                    int start = i;
                    int to    = i + 1;
                    int end   = to;
                    //CoreDictionary.Attribute value = dat.output(state);
                    //for (; to < wordNet.Length; ++to)
                    //{
                    //    state = dat.transition(wordNet[to].realWord, state);
                    //    if (state < 0) break;
                    //    CoreDictionary.Attribute output = dat.output(state);
                    //    if (output != null)
                    //    {
                    //        value = output;
                    //        end = to + 1;
                    //    }
                    //}
                    //if (value != null)
                    //{
                    //    StringBuilder sbTerm = new StringBuilder();
                    //    for (int j = start; j < end; ++j)
                    //    {
                    //        sbTerm.Append(wordNet[j]);
                    //        wordNet[j] = null;
                    //    }
                    //    wordNet[i] = new Vertex(sbTerm.ToString(), value);
                    //    i = end - 1;
                    //}
                }
            }
            // BinTrie合并
            if (CustomDictionary.trie != null)
            {
                for (int i = 0; i < wordNet.Length; ++i)
                {
                    if (wordNet[i] == null)
                    {
                        continue;
                    }
                    BaseNode <CoreDictionary.Attribute> state = CustomDictionary.trie.transition(wordNet[i].realWord.ToCharArray(), 0);
                    if (state != null)
                    {
                        int start = i;
                        int to    = i + 1;
                        int end   = to;
                        CoreDictionary.Attribute value = state.getValue();
                        for (; to < wordNet.Length; ++to)
                        {
                            if (wordNet[to] == null)
                            {
                                continue;
                            }
                            state = state.transition(wordNet[to].realWord.ToCharArray(), 0);
                            if (state == null)
                            {
                                break;
                            }
                            if (state.getValue() != null)
                            {
                                value = state.getValue();
                                end   = to + 1;
                            }
                        }
                        if (value != null)
                        {
                            StringBuilder sbTerm = new StringBuilder();
                            for (int j = start; j < end; ++j)
                            {
                                if (wordNet[j] == null)
                                {
                                    continue;
                                }
                                sbTerm.Append(wordNet[j]);
                                wordNet[j] = null;
                            }
                            wordNet[i] = new Vertex(sbTerm.ToString(), value);
                            i          = end - 1;
                        }
                    }
                }
            }
            vertexList.Clear();
            foreach (Vertex vertex in wordNet)
            {
                if (vertex != null)
                {
                    vertexList.AddLast(vertex);
                }
            }
            return(vertexList);
        }