Exemplo n.º 1
0
        //正向最长匹配
        public string ForwardSplitting(RowFirstDynamicArray <ChainContent> m_segGraph)
        {
            string abc = "";
            // =GetSegGraph();
            int currcol = 0;
            ChainItem <ChainContent> dfg = m_segGraph.GetElement(0, 1);
            ChainItem <ChainContent> aa  = dfg.next;

            while (null != aa.next)
            {
                if (aa.next.row == aa.row)
                {
                    currcol = aa.next.col;
                    aa      = aa.next;
                }
                else
                {
                    abc    += aa.Content.sWord;
                    currcol = aa.col;
                    aa      = m_segGraph.GetFirstElementOfRow(currcol);
                    break;
                }
            }

            while (null != aa.next)
            {
                if (aa.next.row == aa.row)
                {
                    currcol = aa.next.col;
                    aa      = aa.next;
                }
                else
                {
                    abc    += "/" + aa.Content.sWord;
                    currcol = aa.col;
                    aa      = m_segGraph.GetFirstElementOfRow(currcol);
                }
            }
            return(abc);
        }
Exemplo n.º 2
0
        //正向最长匹配
        public string ForwardSplitting(RowFirstDynamicArray<ChainContent> m_segGraph)
        {
            string abc = "";
            // =GetSegGraph();
            int currcol = 0;
            ChainItem<ChainContent> dfg= m_segGraph.GetElement(0, 1);
            ChainItem<ChainContent> aa = dfg.next;
            while (null != aa.next)
            {
                if (aa.next.row == aa.row)
                {
                    currcol = aa.next.col;
                    aa = aa.next;
                }
                else
                {
                    abc += aa.Content.sWord  ;
                    currcol = aa.col;
                    aa = m_segGraph.GetFirstElementOfRow(currcol);
                    break;
                }
            }

            while (null != aa.next)
            {
                if (aa.next.row == aa.row)
                {
                    currcol = aa.next.col;
                    aa = aa.next;
                }
                else
                {
                    abc += "/"+aa.Content.sWord ;
                    currcol = aa.col;
                    aa = m_segGraph.GetFirstElementOfRow(currcol);
                }

            }
            return abc;
        }
Exemplo n.º 3
0
        //====================================================================
        // ����������֮��Ķ���ͼ��
        //====================================================================
        public static ColumnFirstDynamicArray<ChainContent> BiGraphGenerate(
         RowFirstDynamicArray<ChainContent> aWord, double smoothPara, WordDictionary biDict, WordDictionary coreDict)
        {
            ColumnFirstDynamicArray<ChainContent> aBiWordNet = new ColumnFirstDynamicArray<ChainContent>();

             ChainItem<ChainContent> pCur, pNextWords;
             int nTwoWordsFreq = 0, nCurWordIndex, nNextWordIndex;
             double dCurFreqency, dValue, dTemp;
             string sTwoWords;
             StringBuilder sb = new StringBuilder();

             //Record the position map of possible words
             int[] m_npWordPosMapTable = PreparePositionMap(aWord);

             pCur = aWord.GetHead();
             while (pCur != null)
             {
            if (pCur.Content.nPOS >= 0)
               //It's not an unknown words
               dCurFreqency = pCur.Content.eWeight;
            else
               //Unknown words
               dCurFreqency = coreDict.GetFrequency(pCur.Content.sWord, 2);

            //Get next words which begin with pCur.col��ע��������Ķ�Ӧ��ϵ��
            pNextWords = aWord.GetFirstElementOfRow(pCur.col);

            while (pNextWords != null && pNextWords.row == pCur.col)
            {
               sb.Remove(0, sb.Length);
               sb.Append(pCur.Content.sWord);
               sb.Append(Predefine.WORD_SEGMENTER);
               sb.Append(pNextWords.Content.sWord);

               sTwoWords = sb.ToString();

               //Two linked Words frequency
               nTwoWordsFreq = biDict.GetFrequency(sTwoWords, 3);

               //Smoothing
               dTemp = 1.0 / Predefine.MAX_FREQUENCE;

               //-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
               dValue = -Math.Log(smoothPara * (1.0 + dCurFreqency) / (Predefine.MAX_FREQUENCE + 80000.0)
                 + (1.0 - smoothPara) * ((1.0 - dTemp) * nTwoWordsFreq / (1.0 + dCurFreqency) +
                 dTemp));

               //Unknown words: P(Wi|Ci);while known words:1
               if (pCur.Content.nPOS < 0)
                  dValue += pCur.Content.nPOS;

               //Get the position index of current word in the position map table
               nCurWordIndex = Utility.BinarySearch(pCur.row * Predefine.MAX_SENTENCE_LEN + pCur.col, m_npWordPosMapTable);
               nNextWordIndex = Utility.BinarySearch(pNextWords.row * Predefine.MAX_SENTENCE_LEN + pNextWords.col, m_npWordPosMapTable);

               aBiWordNet.SetElement(nCurWordIndex, nNextWordIndex, new ChainContent(dValue, pCur.Content.nPOS, sTwoWords));

               pNextWords = pNextWords.next; //Get next word
            }
            pCur = pCur.next;
             }

             return aBiWordNet;
        }