示例#1
0
        protected override List <Term> segSentence(char[] sentence)
        {
            //        long start = System.currentTimeMillis();
            WordNet wordNetAll = new WordNet(sentence);

            ////////////////生成词网////////////////////
            GenerateWordNet(wordNetAll);
            ///////////////生成词图////////////////////
            //        System.out.println("构图:" + (System.currentTimeMillis() - start));
            if (HanLP.Config.DEBUG)
            {
                //System.out.printf("粗分词网:\n%s\n", wordNetAll);
            }
            //        start = System.currentTimeMillis();
            LinkedList <Vertex> vertexList = viterbi(wordNetAll);

            //        System.out.println("最短路:" + (System.currentTimeMillis() - start));

            if (config.useCustomDictionary)
            {
                combineByCustomDictionary(vertexList);
            }

            if (HanLP.Config.DEBUG)
            {
                //System.out.println("粗分结果" + convert(vertexList, false));
            }

            // 数字识别
            if (config.numberQuantifierRecognize)
            {
                mergeNumberQuantifier(vertexList, wordNetAll, config);
            }

            // 实体命名识别
            if (config.ner)
            {
                WordNet wordNetOptimum = new WordNet(sentence, vertexList);
                int     preSize        = wordNetOptimum.Size();
                if (config.nameRecognize)
                {
                    PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.translatedNameRecognize)
                {
                    TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.japaneseNameRecognize)
                {
                    JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.placeRecognize)
                {
                    PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.organizationRecognize)
                {
                    // 层叠隐马模型——生成输出作为下一级隐马输入
                    vertexList = viterbi(wordNetOptimum);
                    wordNetOptimum.clear();
                    wordNetOptimum.addAll(vertexList);
                    preSize = wordNetOptimum.Size();
                    OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (wordNetOptimum.Size() != preSize)
                {
                    vertexList = viterbi(wordNetOptimum);
                    if (HanLP.Config.DEBUG)
                    {
                        Console.WriteLine("细分词网:\n%s\n", wordNetOptimum);
                    }
                }
            }

            // 如果是索引模式则全切分
            if (config.indexMode)
            {
                //return decorateResultForIndexMode(vertexList, wordNetAll);
            }

            // 是否标注词性
            if (config.speechTagging)
            {
                //speechTagging(vertexList);
            }

            return(convert(vertexList, config.offset));
        }