예제 #1
0
        /**
         * 合并数字
         * @param termList
         */
        protected void mergeNumberQuantifier(LinkedList <Vertex> termList, WordNet wordNetAll, Config config)
        {
            if (termList.Count < 4)
            {
                return;
            }
            StringBuilder sbQuantifier = new StringBuilder();

            LinkedList <Vertex> .Enumerator iterator = termList.GetEnumerator();
            //iterator.next();
            int line = 1;

            while (iterator.MoveNext())
            {
                Vertex pre = iterator.Current;
                if (pre.hasNature(Nature.m))
                {
                    sbQuantifier.Append(pre.realWord);
                    Vertex cur = null;
                    while (iterator.MoveNext() && (cur = iterator.Current).hasNature(Nature.m))
                    {
                        sbQuantifier.Append(cur.realWord);
                        //iterator.remove();
                        removeFromWordNet(cur, wordNetAll, line, sbQuantifier.Length);
                    }
                    if (cur != null)
                    {
                        if ((cur.hasNature(Nature.q) || cur.hasNature(Nature.qv) || cur.hasNature(Nature.qt)))
                        {
                            if (config.indexMode)
                            {
                                wordNetAll.add(line, new Vertex(sbQuantifier.ToString(), new CoreDictionary.Attribute(Nature.m)));
                            }
                            sbQuantifier.Append(cur.realWord);
                            //iterator.remove();
                            removeFromWordNet(cur, wordNetAll, line, sbQuantifier.Length);
                        }
                        else
                        {
                            line += cur.realWord.Length;   // (cur = iterator.next()).hasNature(Nature.m) 最后一个next可能不含q词性
                        }
                    }
                    if (sbQuantifier.Length != pre.realWord.Length)
                    {
                        pre.realWord        = sbQuantifier.ToString();
                        pre.word            = Predefine.TAG_NUMBER;
                        pre.attribute       = new CoreDictionary.Attribute(Nature.mq);
                        pre.wordID          = CoreDictionary.M_WORD_ID;
                        sbQuantifier.Length = 0;
                    }
                }
                sbQuantifier.Length = 0;
                line += pre.realWord.Length;
            }
            //        System.out.println(wordNetAll);
        }
        /**
         * 生成一元词网
         *
         * @param wordNetStorage
         */
        protected void GenerateWordNet(WordNet wordNetStorage)
        {
            char[] charArray = wordNetStorage.charArray;

            // 核心词典查询
            DoubleArrayTrie <CoreDictionary.Attribute> .Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
            while (searcher.next())
            {
                wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value, searcher.index));
            }
            // 用户词典查询
            //        if (config.useCustomDictionary)
            //        {
            //            searcher = CustomDictionary.dat.getSearcher(charArray, 0);
            //            while (searcher.next())
            //            {
            //                wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value));
            //            }
            //        }
            // 原子分词,保证图连通
            //List<Vertex>[] vertexes = wordNetStorage.getVertexes();
            List <Vertex>[] vertexes = wordNetStorage.getVertexes();
            for (int i = 1; i < vertexes.Length;)
            {
                if (vertexes[i].Count == 0)
                {
                    int j = i + 1;
                    for (; j < vertexes.Length - 1; ++j)
                    {
                        if (!(vertexes[j].Count == 0))
                        {
                            break;
                        }
                    }
                    wordNetStorage.add(i, quickAtomSegment(charArray, i - 1, j - 1));
                    i = j;
                }
                else
                {
                    i += vertexes[i][vertexes[i].Count - 1].realWord.Length;
                }
            }
        }