Exemple #1
0
        private static void InitNShortPath(ColumnFirstDynamicArray <ChainContent> apCost, int nValueKind)
        {
            m_apCost     = apCost;     //Set the cost
            m_nValueKind = nValueKind; //Set the value kind

            // 获取顶点的数目
            // ----------------- 注:by zhenyulu ------------------
            // 原来程序为m_nNode = Math.Max(apCost.ColumnCount, apCost.RowCount) + 1;
            // 但apCost.ColumnCount应该一定大于apCost.RowCount,所以改成这样。
            m_nNode = apCost.ColumnCount + 1;

            m_pParent = new CQueue[m_nNode - 1][]; //not including the first node
            m_pWeight = new double[m_nNode - 1][];

            //The queue array for every node
            for (int i = 0; i < m_nNode - 1; i++)
            {
                m_pParent[i] = new CQueue[nValueKind];
                m_pWeight[i] = new double[nValueKind];

                for (int j = 0; j < nValueKind; j++)
                {
                    m_pParent[i][j] = new CQueue();
                }
            }
        }
Exemple #2
0
        public int BiOptimumSegment(int nResultCount, double dSmoothingPara)
        {
            WordResult[]    tmpResult;
            WordLinkedArray linkedArray;

            //Generate the biword link net
            ColumnFirstDynamicArray <ChainContent> aBiwordsNet = BiGraphGenerate(m_graphOptimum, dSmoothingPara, biDict, coreDict);

            OnGenBiOptimumSegGraph(aBiwordsNet);

            NShortPath.Calculate(aBiwordsNet, nResultCount);
            List <int[]> spResult = NShortPath.GetNPaths(Predefine.MAX_SEGMENT_NUM);

            m_pWordSeg     = new List <WordResult[]>();
            segGraph       = m_graphOptimum;
            m_graphOptimum = new RowFirstDynamicArray <ChainContent>();

            for (int i = 0; i < spResult.Count; i++)
            {
                linkedArray = BiPath2LinkedArray(spResult[i], segGraph, atomSegment);
                tmpResult   = GenerateWord(spResult[i], linkedArray, m_graphOptimum);

                if (tmpResult != null)
                {
                    m_pWordSeg.Add(tmpResult);
                }
            }

            return(m_pWordSeg.Count);
        }
Exemple #3
0
        public ColumnFirstDynamicArray <ChainContent> TestSegment(string sSentence, double smoothPara, int nKind)
        {
            WordResult[]    tmpResult;
            WordLinkedArray linkedArray;

            if (biDict == null || coreDict == null)
            {
                throw new Exception("biDict 或 coreDict 尚未初始化!");
            }

            //---原子分词
            atomSegment = AtomSegment(sSentence);
            OnAtomSegment(atomSegment);

            //---检索词库,加入所有可能分词方案并存入链表结构
            segGraph = GenerateWordNet(atomSegment, coreDict);
            //OnGenSegGraph(segGraph);

            //---检索所有可能的两两组合
            biGraphResult = BiGraphGenerate(segGraph, smoothPara, biDict, coreDict);
            //OnGenBiSegGraph(biGraphResult);

            return(biGraphResult);
            //--逆向匹配优化
            //biGraphResult = BackwardOptimize(biGraphResult);
            //OnBackwardOptimize(biGraphResult);
        }
Exemple #4
0
        public static ColumnFirstDynamicArray <ChainContent> BackwardOptimize(ColumnFirstDynamicArray <ChainContent> test)
        {
            ChainItem <ChainContent> pCur = test.GetHead();
            StringBuilder            sb   = new StringBuilder();

            while (pCur != null)
            {
                double multiNum = Math.Pow(backNum, pCur.col);
                pCur.Content.eWeight *= multiNum;
                pCur = pCur.next;
            }

            return(test);
        }
Exemple #5
0
        public int BiSegment(string sSentence, double smoothPara, int nKind)
        {
            WordResult[]    tmpResult;
            WordLinkedArray linkedArray;

            if (biDict == null || coreDict == null)
            {
                throw new Exception("biDict 或 coreDict 尚未初始化!");
            }

            //---原子分词
            atomSegment = AtomSegment(sSentence);
            OnAtomSegment(atomSegment);

            //---检索词库,加入所有可能分词方案并存入链表结构
            segGraph = GenerateWordNet(atomSegment, coreDict);
            OnGenSegGraph(segGraph);

            //---检索所有可能的两两组合
            biGraphResult = BiGraphGenerate(segGraph, smoothPara, biDict, coreDict);
            OnGenBiSegGraph(biGraphResult);

            //--逆向匹配优化
            biGraphResult = BackwardOptimize(biGraphResult);
            OnBackwardOptimize(biGraphResult);

            //---N 最短路径计算出多个分词方案
            NShortPath.Calculate(biGraphResult, nKind);
            List <int[]> spResult = NShortPath.GetNPaths(Predefine.MAX_SEGMENT_NUM);

            OnNShortPath(spResult, segGraph);

            m_pWordSeg     = new List <WordResult[]>();
            m_graphOptimum = new RowFirstDynamicArray <ChainContent>();

            for (int i = 0; i < spResult.Count; i++)
            {
                linkedArray = BiPath2LinkedArray(spResult[i], segGraph, atomSegment);
                tmpResult   = GenerateWord(spResult[i], linkedArray, m_graphOptimum);

                if (tmpResult != null)
                {
                    m_pWordSeg.Add(tmpResult);
                }
            }

            OnBeforeOptimize(m_pWordSeg);

            return(m_pWordSeg.Count);
        }
Exemple #6
0
        //====================================================================
        // 计算出所有结点上可能的路径,为路径数据提供数据准备
        //====================================================================
        public static void Calculate(ColumnFirstDynamicArray <ChainContent> apCost, int nValueKind)
        {
            InitNShortPath(apCost, nValueKind);

            QueueElement tmpElement;
            CQueue       queWork = new CQueue();
            double       eWeight;

            for (int nCurNode = 1; nCurNode < m_nNode; nCurNode++)
            {
                // 将所有到当前结点(nCurNode)可能的边根据eWeight排序并压入队列
                EnQueueCurNodeEdges(ref queWork, nCurNode);

                // 初始化当前结点所有边的eWeight值
                for (int i = 0; i < m_nValueKind; i++)
                {
                    m_pWeight[nCurNode - 1][i] = Predefine.INFINITE_VALUE;
                }

                // 将queWork中的内容装入m_pWeight与m_pParent
                tmpElement = queWork.DeQueue();
                if (tmpElement != null)
                {
                    for (int i = 0; i < m_nValueKind; i++)
                    {
                        eWeight = tmpElement.eWeight;
                        m_pWeight[nCurNode - 1][i] = eWeight;
                        do
                        {
                            m_pParent[nCurNode - 1][i].EnQueue(new QueueElement(tmpElement.nParent, tmpElement.nIndex, 0));
                            tmpElement = queWork.DeQueue();
                            if (tmpElement == null)
                            {
                                goto nextnode;
                            }
                        } while (tmpElement.eWeight == eWeight);
                    }
                }
                nextnode :;
            }
        }
Exemple #7
0
 private void OnGenBiOptimumSegGraph(ColumnFirstDynamicArray <ChainContent> biOptGraph)
 {
     SendEvents(new SegmentEventArgs(SegmentStage.GenBiSegGraph, biOptGraph.ToString()));
 }
Exemple #8
0
        //====================================================================
        // 生成两两词之间的二叉图表
        //====================================================================
        public static ColumnFirstDynamicArray <ChainContent> BiGraphGenerate(
            RowFirstDynamicArray <ChainContent> aWord, double smoothPara, WordDictionary biDict, WordDictionary coreDict)
        {
            ColumnFirstDynamicArray <ChainContent> aBiWordNet = new ColumnFirstDynamicArray <ChainContent>();

            ChainItem <ChainContent> pCur, pNextWords;
            int           nTwoWordsFreq = 0, nCurWordIndex, nNextWordIndex;
            double        dCurFreqency, dValue, dTemp;
            string        sTwoWords;
            StringBuilder sb = new StringBuilder();

            //Record the position map of possible words
            int[] m_npWordPosMapTable = PreparePositionMap(aWord);

            pCur = aWord.GetHead();
            while (pCur != null)
            {
                if (pCur.Content.nPOS >= 0)
                {
                    //It's not an unknown words
                    dCurFreqency = pCur.Content.eWeight;
                }
                else
                {
                    //Unknown words
                    dCurFreqency = coreDict.GetFrequency(pCur.Content.sWord, 2);
                }

                //Get next words which begin with pCur.col(注:很特殊的对应关系)
                pNextWords = aWord.GetFirstElementOfRow(pCur.col);

                while (pNextWords != null && pNextWords.row == pCur.col)
                {
                    sb.Remove(0, sb.Length);
                    sb.Append(pCur.Content.sWord);
                    sb.Append(Predefine.WORD_SEGMENTER);
                    sb.Append(pNextWords.Content.sWord);

                    sTwoWords = sb.ToString();

                    //Two linked Words frequency
                    nTwoWordsFreq = biDict.GetFrequency(sTwoWords, 3);

                    //Smoothing
                    dTemp = 1.0 / Predefine.MAX_FREQUENCE;

                    //-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
                    dValue = -Math.Log(smoothPara * (1.0 + dCurFreqency) / (Predefine.MAX_FREQUENCE + 80000.0)
                                       + (1.0 - smoothPara) * ((1.0 - dTemp) * nTwoWordsFreq / (1.0 + dCurFreqency) +
                                                               dTemp));

                    //Unknown words: P(Wi|Ci);while known words:1
                    if (pCur.Content.nPOS < 0)
                    {
                        dValue += pCur.Content.nPOS;
                    }

                    //Get the position index of current word in the position map table
                    nCurWordIndex  = Utility.BinarySearch(pCur.row * Predefine.MAX_SENTENCE_LEN + pCur.col, m_npWordPosMapTable);
                    nNextWordIndex = Utility.BinarySearch(pNextWords.row * Predefine.MAX_SENTENCE_LEN + pNextWords.col, m_npWordPosMapTable);

                    aBiWordNet.SetElement(nCurWordIndex, nNextWordIndex, new ChainContent(dValue, pCur.Content.nPOS, sTwoWords));

                    pNextWords = pNextWords.next; //Get next word
                }
                pCur = pCur.next;
            }

            return(aBiWordNet);
        }
Exemple #9
0
 private void OnBackwardOptimize(ColumnFirstDynamicArray <ChainContent> biGraph)
 {
     SendEvents(new SegmentEventArgs(SegmentStage.BcakwardOptimize, biGraph.ToString()));
 }