Esempio n. 1
0
        /// <summary>
        /// 将给定句子分词
        /// </summary>
        /// <param name="sentence"></param>
        /// <returns></returns>
        public override List <Term> SegSentence(char[] sentence)
        {
            var wordNetAll = new WordNet(sentence);

            //-------------------- 生成一元词网 --------------------------
            GenerateWordNet(wordNetAll);

            // 使用Viterbi分词
            var list = Viterbi(wordNetAll);

            if (config.useCustomDict)   // 需要使用自定义词典
            {
                if (config.indexMode)   // 开启索引模式
                {
                    CombineByCustomDict(list, wordNetAll);
                }
                else
                {
                    CombineByCustomDict(list);
                }
            }

            if (config.numQuantRecognize)               // 数量词识别
            {
                CombineNumQuant(list, wordNetAll, config);
            }

            if (config.nameEntityRecognize)                       // 命名实体识别
            {
                var wordNetOptimum = new WordNet(sentence, list); //
                var preSize        = wordNetOptimum.Size;

                if (config.chsNameRecognize)         // 中文人名识别
                {
                    ChsNameRecognition.Recognition(list, wordNetOptimum, wordNetAll);
                }

                if (config.translatedNameRecognize)
                {
                    TranslatedPersonRecognition.Recognition(list, wordNetOptimum, wordNetAll);
                }

                if (config.jpNameRecognize)
                {
                }
                if (config.placeRecognize)
                {
                }

                if (config.orgRecognize)
                {
                    list = Viterbi(wordNetOptimum);
                    wordNetOptimum.Clear();
                    wordNetOptimum.AddAll(list);
                    preSize = wordNetOptimum.Size;
                    OrgRecognition.Recognition(list, wordNetOptimum, wordNetAll);
                }
                if (wordNetOptimum.Size != preSize)
                {
                    list = Viterbi(wordNetOptimum);
                }
            }

            return(Convert(list, config.offset));
        }
Esempio n. 2
0
        protected override List <Term> segSentence(char[] sentence)
        {
            //        long start = System.currentTimeMillis();
            WordNet wordNetAll = new WordNet(sentence);

            ////////////////生成词网////////////////////
            GenerateWordNet(wordNetAll);
            ///////////////生成词图////////////////////
            //        System.out.println("构图:" + (System.currentTimeMillis() - start));
            if (HanLP.Config.DEBUG)
            {
                //System.out.printf("粗分词网:\n%s\n", wordNetAll);
            }
            //        start = System.currentTimeMillis();
            LinkedList <Vertex> vertexList = viterbi(wordNetAll);

            //        System.out.println("最短路:" + (System.currentTimeMillis() - start));

            if (config.useCustomDictionary)
            {
                combineByCustomDictionary(vertexList);
            }

            if (HanLP.Config.DEBUG)
            {
                //System.out.println("粗分结果" + convert(vertexList, false));
            }

            // 数字识别
            if (config.numberQuantifierRecognize)
            {
                mergeNumberQuantifier(vertexList, wordNetAll, config);
            }

            // 实体命名识别
            if (config.ner)
            {
                WordNet wordNetOptimum = new WordNet(sentence, vertexList);
                int     preSize        = wordNetOptimum.Size();
                if (config.nameRecognize)
                {
                    PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.translatedNameRecognize)
                {
                    TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.japaneseNameRecognize)
                {
                    JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.placeRecognize)
                {
                    PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.organizationRecognize)
                {
                    // 层叠隐马模型——生成输出作为下一级隐马输入
                    vertexList = viterbi(wordNetOptimum);
                    wordNetOptimum.clear();
                    wordNetOptimum.addAll(vertexList);
                    preSize = wordNetOptimum.Size();
                    OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (wordNetOptimum.Size() != preSize)
                {
                    vertexList = viterbi(wordNetOptimum);
                    if (HanLP.Config.DEBUG)
                    {
                        Console.WriteLine("细分词网:\n%s\n", wordNetOptimum);
                    }
                }
            }

            // 如果是索引模式则全切分
            if (config.indexMode)
            {
                //return decorateResultForIndexMode(vertexList, wordNetAll);
            }

            // 是否标注词性
            if (config.speechTagging)
            {
                //speechTagging(vertexList);
            }

            return(convert(vertexList, config.offset));
        }