/** * 对粗分结果执行一些规则上的合并拆分等等,同时合成新词网 * * @param linkedArray 粗分结果 * @param wordNetOptimum 合并了所有粗分结果的词网 */ protected static void GenerateWord(LinkedList <Vertex> linkedArray, WordNet wordNetOptimum) { fixResultByRule(linkedArray); //-------------------------------------------------------------------- // 建造新词网 wordNetOptimum.addAll(linkedArray); }
protected override List <Term> segSentence(char[] sentence) { // long start = System.currentTimeMillis(); WordNet wordNetAll = new WordNet(sentence); ////////////////生成词网//////////////////// GenerateWordNet(wordNetAll); ///////////////生成词图//////////////////// // System.out.println("构图:" + (System.currentTimeMillis() - start)); if (HanLP.Config.DEBUG) { //System.out.printf("粗分词网:\n%s\n", wordNetAll); } // start = System.currentTimeMillis(); LinkedList <Vertex> vertexList = viterbi(wordNetAll); // System.out.println("最短路:" + (System.currentTimeMillis() - start)); if (config.useCustomDictionary) { combineByCustomDictionary(vertexList); } if (HanLP.Config.DEBUG) { //System.out.println("粗分结果" + convert(vertexList, false)); } // 数字识别 if (config.numberQuantifierRecognize) { mergeNumberQuantifier(vertexList, wordNetAll, config); } // 实体命名识别 if (config.ner) { WordNet wordNetOptimum = new WordNet(sentence, vertexList); int preSize = wordNetOptimum.Size(); if (config.nameRecognize) { PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll); } if (config.translatedNameRecognize) { TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll); } if (config.japaneseNameRecognize) { JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll); } if (config.placeRecognize) { PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll); } if (config.organizationRecognize) { // 层叠隐马模型——生成输出作为下一级隐马输入 vertexList = viterbi(wordNetOptimum); wordNetOptimum.clear(); wordNetOptimum.addAll(vertexList); preSize = wordNetOptimum.Size(); OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll); } if (wordNetOptimum.Size() != preSize) { vertexList = viterbi(wordNetOptimum); if (HanLP.Config.DEBUG) { Console.WriteLine("细分词网:\n%s\n", wordNetOptimum); } } } // 如果是索引模式则全切分 if (config.indexMode) { //return decorateResultForIndexMode(vertexList, wordNetAll); } // 是否标注词性 if (config.speechTagging) { //speechTagging(vertexList); } return(convert(vertexList, config.offset)); }