示例#1
0
 /**
  * 将一个词语从词网中彻底抹除
  * @param cur 词语
  * @param wordNetAll 词网
  * @param line 当前扫描的行数
  * @param length 当前缓冲区的长度
  */
 private static void removeFromWordNet(Vertex cur, WordNet wordNetAll, int line, int length)
 {
     LinkedList <Vertex>[] vertexes = wordNetAll.getVertexes();
     // 将其从wordNet中删除
     foreach (Vertex vertex in vertexes[line + length])
     {
         if (vertex.from == cur)
         {
             vertex.from = null;
         }
     }
     LinkedList <Vertex> .Enumerator iterator = vertexes[line + length - cur.realWord.Length].GetEnumerator();
     while (iterator.MoveNext())
     {
         Vertex vertex = iterator.Current;
         //if (vertex == cur) iterator.remove();
     }
 }
        /**
         * 生成一元词网
         *
         * @param wordNetStorage
         */
        protected void GenerateWordNet(WordNet wordNetStorage)
        {
            char[] charArray = wordNetStorage.charArray;

            // 核心词典查询
            DoubleArrayTrie <CoreDictionary.Attribute> .Searcher searcher = CoreDictionary.trie.getSearcher(charArray, 0);
            while (searcher.next())
            {
                wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value, searcher.index));
            }
            // 用户词典查询
            //        if (config.useCustomDictionary)
            //        {
            //            searcher = CustomDictionary.dat.getSearcher(charArray, 0);
            //            while (searcher.next())
            //            {
            //                wordNetStorage.add(searcher.begin + 1, new Vertex(new String(charArray, searcher.begin, searcher.length), searcher.value));
            //            }
            //        }
            // 原子分词,保证图连通
            //List<Vertex>[] vertexes = wordNetStorage.getVertexes();
            List <Vertex>[] vertexes = wordNetStorage.getVertexes();
            for (int i = 1; i < vertexes.Length;)
            {
                if (vertexes[i].Count == 0)
                {
                    int j = i + 1;
                    for (; j < vertexes.Length - 1; ++j)
                    {
                        if (!(vertexes[j].Count == 0))
                        {
                            break;
                        }
                    }
                    wordNetStorage.add(i, quickAtomSegment(charArray, i - 1, j - 1));
                    i = j;
                }
                else
                {
                    i += vertexes[i][vertexes[i].Count - 1].realWord.Length;
                }
            }
        }
示例#3
0
        private static LinkedList <Vertex> viterbi(WordNet wordNet)
        {
            // 避免生成对象,优化速度
            LinkedList <Vertex>[] nodes      = wordNet.getVertexes();
            LinkedList <Vertex>   vertexList = new LinkedList <Vertex>();

            foreach (Vertex node in nodes[1])
            {
                node.updateFrom(nodes[0].First());
            }
            for (int i = 1; i < nodes.Length - 1; ++i)
            {
                LinkedList <Vertex> nodeArray = nodes[i];
                if (nodeArray == null)
                {
                    continue;
                }
                foreach (Vertex node in nodeArray)
                {
                    if (node.from == null)
                    {
                        continue;
                    }
                    foreach (Vertex to in nodes[i + node.realWord.Length])
                    {
                        to.updateFrom(node);
                    }
                }
            }
            Vertex from = nodes[nodes.Length - 1].First();

            while (from != null)
            {
                vertexList.AddFirst(from);
                from = from.from;
            }
            return(vertexList);
        }