コード例 #1
0
        /**
         * 对粗分结果执行一些规则上的合并拆分等等,同时合成新词网
         *
         * @param linkedArray    粗分结果
         * @param wordNetOptimum 合并了所有粗分结果的词网
         */
        protected static void GenerateWord(LinkedList <Vertex> linkedArray, WordNet wordNetOptimum)
        {
            fixResultByRule(linkedArray);

            //--------------------------------------------------------------------
            // 建造新词网
            wordNetOptimum.addAll(linkedArray);
        }
コード例 #2
0
ファイル: ViterbiSegment.cs プロジェクト: shibox/JShibo.NLP
        protected override List <Term> segSentence(char[] sentence)
        {
            //        long start = System.currentTimeMillis();
            WordNet wordNetAll = new WordNet(sentence);

            ////////////////生成词网////////////////////
            GenerateWordNet(wordNetAll);
            ///////////////生成词图////////////////////
            //        System.out.println("构图:" + (System.currentTimeMillis() - start));
            if (HanLP.Config.DEBUG)
            {
                //System.out.printf("粗分词网:\n%s\n", wordNetAll);
            }
            //        start = System.currentTimeMillis();
            LinkedList <Vertex> vertexList = viterbi(wordNetAll);

            //        System.out.println("最短路:" + (System.currentTimeMillis() - start));

            if (config.useCustomDictionary)
            {
                combineByCustomDictionary(vertexList);
            }

            if (HanLP.Config.DEBUG)
            {
                //System.out.println("粗分结果" + convert(vertexList, false));
            }

            // 数字识别
            if (config.numberQuantifierRecognize)
            {
                mergeNumberQuantifier(vertexList, wordNetAll, config);
            }

            // 实体命名识别
            if (config.ner)
            {
                WordNet wordNetOptimum = new WordNet(sentence, vertexList);
                int     preSize        = wordNetOptimum.Size();
                if (config.nameRecognize)
                {
                    PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.translatedNameRecognize)
                {
                    TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.japaneseNameRecognize)
                {
                    JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.placeRecognize)
                {
                    PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (config.organizationRecognize)
                {
                    // 层叠隐马模型——生成输出作为下一级隐马输入
                    vertexList = viterbi(wordNetOptimum);
                    wordNetOptimum.clear();
                    wordNetOptimum.addAll(vertexList);
                    preSize = wordNetOptimum.Size();
                    OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
                }
                if (wordNetOptimum.Size() != preSize)
                {
                    vertexList = viterbi(wordNetOptimum);
                    if (HanLP.Config.DEBUG)
                    {
                        Console.WriteLine("细分词网:\n%s\n", wordNetOptimum);
                    }
                }
            }

            // 如果是索引模式则全切分
            if (config.indexMode)
            {
                //return decorateResultForIndexMode(vertexList, wordNetAll);
            }

            // 是否标注词性
            if (config.speechTagging)
            {
                //speechTagging(vertexList);
            }

            return(convert(vertexList, config.offset));
        }