/** * 将一个词语从词网中彻底抹除 * @param cur 词语 * @param wordNetAll 词网 * @param line 当前扫描的行数 * @param length 当前缓冲区的长度 */ private static void removeFromWordNet(Vertex cur, WordNet wordNetAll, int line, int length) { LinkedList <Vertex>[] vertexes = wordNetAll.getVertexes(); // 将其从wordNet中删除 foreach (Vertex vertex in vertexes[line + length]) { if (vertex.from == cur) { vertex.from = null; } } LinkedList <Vertex> .Enumerator iterator = vertexes[line + length - cur.realWord.Length].GetEnumerator(); while (iterator.MoveNext()) { Vertex vertex = iterator.Current; //if (vertex == cur) iterator.remove(); } }
/** * 生成一元词网 * * @param wordNetStorage */ protected void GenerateWordNet(WordNet wordNetStorage) { char[] charArray = wordNetStorage.charArray; // 核心词典查询 DoubleArrayTrie <CoreDictionary.Attribute> .Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0); while (searcher.next()) { wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value, searcher.index)); } // 用户词典查询 // if (config.useCustomDictionary) // { // searcher = CustomDictionary.dat.getSearcher(charArray, 0); // while (searcher.next()) // { // wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value)); // } // } // 原子分词,保证图连通 //List<Vertex>[] vertexes = wordNetStorage.getVertexes(); List <Vertex>[] vertexes = wordNetStorage.getVertexes(); for (int i = 1; i < vertexes.Length;) { if (vertexes[i].Count == 0) { int j = i + 1; for (; j < vertexes.Length - 1; ++j) { if (!(vertexes[j].Count == 0)) { break; } } wordNetStorage.add(i, quickAtomSegment(charArray, i - 1, j - 1)); i = j; } else { i += vertexes[i][vertexes[i].Count - 1].realWord.Length; } } }
private static LinkedList <Vertex> viterbi(WordNet wordNet) { // 避免生成对象,优化速度 LinkedList <Vertex>[] nodes = wordNet.getVertexes(); LinkedList <Vertex> vertexList = new LinkedList <Vertex>(); foreach (Vertex node in nodes[1]) { node.updateFrom(nodes[0].First()); } for (int i = 1; i < nodes.Length - 1; ++i) { LinkedList <Vertex> nodeArray = nodes[i]; if (nodeArray == null) { continue; } foreach (Vertex node in nodeArray) { if (node.from == null) { continue; } foreach (Vertex to in nodes[i + node.realWord.Length]) { to.updateFrom(node); } } } Vertex from = nodes[nodes.Length - 1].First(); while (from != null) { vertexList.AddFirst(from); from = from.from; } return(vertexList); }