/** * 合并数字 * @param termList */ protected void mergeNumberQuantifier(LinkedList <Vertex> termList, WordNet wordNetAll, Config config) { if (termList.Count < 4) { return; } StringBuilder sbQuantifier = new StringBuilder(); LinkedList <Vertex> .Enumerator iterator = termList.GetEnumerator(); //iterator.next(); int line = 1; while (iterator.MoveNext()) { Vertex pre = iterator.Current; if (pre.hasNature(Nature.m)) { sbQuantifier.Append(pre.realWord); Vertex cur = null; while (iterator.MoveNext() && (cur = iterator.Current).hasNature(Nature.m)) { sbQuantifier.Append(cur.realWord); //iterator.remove(); removeFromWordNet(cur, wordNetAll, line, sbQuantifier.Length); } if (cur != null) { if ((cur.hasNature(Nature.q) || cur.hasNature(Nature.qv) || cur.hasNature(Nature.qt))) { if (config.indexMode) { wordNetAll.add(line, new Vertex(sbQuantifier.ToString(), new CoreDictionary.Attribute(Nature.m))); } sbQuantifier.Append(cur.realWord); //iterator.remove(); removeFromWordNet(cur, wordNetAll, line, sbQuantifier.Length); } else { line += cur.realWord.Length; // (cur = iterator.next()).hasNature(Nature.m) 最后一个next可能不含q词性 } } if (sbQuantifier.Length != pre.realWord.Length) { pre.realWord = sbQuantifier.ToString(); pre.word = Predefine.TAG_NUMBER; pre.attribute = new CoreDictionary.Attribute(Nature.mq); pre.wordID = CoreDictionary.M_WORD_ID; sbQuantifier.Length = 0; } } sbQuantifier.Length = 0; line += pre.realWord.Length; } // System.out.println(wordNetAll); }
/** * 生成一元词网 * * @param wordNetStorage */ protected void GenerateWordNet(WordNet wordNetStorage) { char[] charArray = wordNetStorage.charArray; // 核心词典查询 DoubleArrayTrie <CoreDictionary.Attribute> .Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0); while (searcher.next()) { wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value, searcher.index)); } // 用户词典查询 // if (config.useCustomDictionary) // { // searcher = CustomDictionary.dat.getSearcher(charArray, 0); // while (searcher.next()) // { // wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value)); // } // } // 原子分词,保证图连通 //List<Vertex>[] vertexes = wordNetStorage.getVertexes(); List <Vertex>[] vertexes = wordNetStorage.getVertexes(); for (int i = 1; i < vertexes.Length;) { if (vertexes[i].Count == 0) { int j = i + 1; for (; j < vertexes.Length - 1; ++j) { if (!(vertexes[j].Count == 0)) { break; } } wordNetStorage.add(i, quickAtomSegment(charArray, i - 1, j - 1)); i = j; } else { i += vertexes[i][vertexes[i].Count - 1].realWord.Length; } } }