Example #1
0
        //Judge whether the name is a given name
        public bool IsGivenName(string sName)
        {
            char   sFirstChar, sSecondChar;
            double dGivenNamePossibility = 0, dSingleNamePossibility = 0;

            if (sName.Length != 2)
            {
                return(false);
            }

            sFirstChar  = sName.ToCharArray()[0];
            sSecondChar = sName.ToCharArray()[1];

            //The possibility of P(Wi|Ti)
            dGivenNamePossibility += Math.Log((double)m_dict.GetFrequency(sFirstChar.ToString(), 2) + 1.0)
                                     - Math.Log(m_roleTag.m_context.GetFrequency(0, 2) + 1.0);
            dGivenNamePossibility += Math.Log((double)m_dict.GetFrequency(sSecondChar.ToString(), 3) + 1.0)
                                     - Math.Log(m_roleTag.m_context.GetFrequency(0, 3) + 1.0);
            //The possibility of conversion from 2 to 3
            dGivenNamePossibility += Math.Log(m_roleTag.m_context.GetContextPossibility(0, 2, 3) + 1.0)
                                     - Math.Log(m_roleTag.m_context.GetFrequency(0, 2) + 1.0);

            //The possibility of P(Wi|Ti)
            dSingleNamePossibility += Math.Log((double)m_dict.GetFrequency(sFirstChar.ToString(), 1) + 1.0)
                                      - Math.Log(m_roleTag.m_context.GetFrequency(0, 1) + 1.0);
            dSingleNamePossibility += Math.Log((double)m_dict.GetFrequency(sSecondChar.ToString(), 4) + 1.0)
                                      - Math.Log(m_roleTag.m_context.GetFrequency(0, 4) + 1.0);
            //The possibility of conversion from 1 to 4
            dSingleNamePossibility += Math.Log(m_roleTag.m_context.GetContextPossibility(0, 1, 4) + 1.0)
                                      - Math.Log(m_roleTag.m_context.GetFrequency(0, 1) + 1.0);

            if (dSingleNamePossibility >= dGivenNamePossibility)
            {
                //张震||m_dict.GetFrequency(sFirstChar,1)/m_dict.GetFrequency(sFirstChar,2)>=10
                //The possibility being a single given name is more than being a 2-char given name
                return(false);
            }
            return(true);
        }
Example #2
0
        //POS tagging with Hidden Markov Model
        public bool POSTagging(WordResult[] pWordItems, WordDictionary dictCore, WordDictionary dictUnknown)
        {
            //pWordItems: Items; nItemCount: the count of items;core dictionary and unknown recognition dictionary
            int i = 0, j, nStartPos;

            Reset(false);
            while (i > -1 && i < pWordItems.Length && pWordItems[i].sWord != null)
            {
                nStartPos = i; //Start Position
                i         = GetFrom(pWordItems, nStartPos, dictCore, dictUnknown);
                GetBestPOS();
                switch (m_tagType)
                {
                case TAG_TYPE.TT_NORMAL:
                    //normal POS tagging
                    j = 1;
                    while (m_nBestTag[j] != -1 && j < m_nCurLength)
                    {
                        //Store the best POS tagging
                        pWordItems[j + nStartPos - 1].nPOS = m_nBestTag[j];
                        //Let 。be 0
                        if (pWordItems[j + nStartPos - 1].dValue > 0 && dictCore.IsExist(pWordItems[j + nStartPos - 1].sWord, -1))
                        {
                            //Exist and update its frequncy as a POS value
                            pWordItems[j + nStartPos - 1].dValue = dictCore.GetFrequency(pWordItems[j + nStartPos - 1].sWord, m_nBestTag[j]);
                        }
                        j += 1;
                    }
                    break;

                case TAG_TYPE.TT_PERSON:
                    //Person recognition
                    PersonRecognize(dictUnknown);
                    break;

                case TAG_TYPE.TT_PLACE:
                //Place name recognition
                case TAG_TYPE.TT_TRANS_PERSON:
                    //Transliteration Person
                    PlaceRecognize(dictCore, dictUnknown);
                    break;

                default:
                    break;
                }
                Reset();
            }
            return(true);
        }
Example #3
0
        private double ComputePossibility(int nStartPos, int nLength, WordDictionary dict)
        {
            double dRetValue = 0, dPOSPoss;
            //dPOSPoss: the possibility of a POS appears
            //dContextPoss: The possibility of context POS appears
            int nFreq;

            for (int i = nStartPos; i < nStartPos + nLength; i++)
            {
                nFreq = dict.GetFrequency(m_sWords[i], m_nBestTag[i]);
                //nFreq is word being the POS
                dPOSPoss   = Math.Log((double)(m_context.GetFrequency(0, m_nBestTag[i]) + 1)) - Math.Log((double)(nFreq + 1));
                dRetValue += dPOSPoss;

                /*
                 * if(i<nStartPos+nLength-1)
                 * {
                 *  dContextPoss=log((double)(m_context.GetContextPossibility(0,m_nBestTag[i],m_nBestTag[i+1])+1));
                 *  dRetValue+=dPOSPoss-dContextPoss;
                 * }
                 */
            }
            return(dRetValue);
        }
Example #4
0
 //POS tagging with Hidden Markov Model
 public bool POSTagging(WordResult[] pWordItems, WordDictionary dictCore, WordDictionary dictUnknown)
 {
     //pWordItems: Items; nItemCount: the count of items;core dictionary and unknown recognition dictionary
     int i = 0, j, nStartPos;
     Reset(false);
     while (i > -1 && i < pWordItems.Length && pWordItems[i].sWord != null)
     {
         nStartPos = i; //Start Position
         i = GetFrom(pWordItems, nStartPos, dictCore, dictUnknown);
         GetBestPOS();
         switch (m_tagType)
         {
             case TAG_TYPE.TT_NORMAL:
                 //normal POS tagging
                 j = 1;
                 while (m_nBestTag[j] != -1 && j < m_nCurLength)
                 {
                     //Store the best POS tagging
                     pWordItems[j + nStartPos - 1].nPOS = m_nBestTag[j];
                     //Let 。be 0
                     if (pWordItems[j + nStartPos - 1].dValue > 0 && dictCore.IsExist(pWordItems[j + nStartPos - 1].sWord, -1))
                         //Exist and update its frequncy as a POS value
                         pWordItems[j + nStartPos - 1].dValue = dictCore.GetFrequency(pWordItems[j + nStartPos - 1].sWord, m_nBestTag[j]);
                     j += 1;
                 }
                 break;
             case TAG_TYPE.TT_PERSON:
                 //Person recognition
                 PersonRecognize(dictUnknown);
                 break;
             case TAG_TYPE.TT_PLACE:
             //Place name recognition
             case TAG_TYPE.TT_TRANS_PERSON:
                 //Transliteration Person
                 PlaceRecognize(dictCore, dictUnknown);
                 break;
             default:
                 break;
         }
         Reset();
     }
     return true;
 }
Example #5
0
 private double ComputePossibility(int nStartPos, int nLength, WordDictionary dict)
 {
     double dRetValue = 0, dPOSPoss;
     //dPOSPoss: the possibility of a POS appears
     //dContextPoss: The possibility of context POS appears
     int nFreq;
     for (int i = nStartPos; i < nStartPos + nLength; i++)
     {
         nFreq = dict.GetFrequency(m_sWords[i], m_nBestTag[i]);
         //nFreq is word being the POS
         dPOSPoss = Math.Log((double)(m_context.GetFrequency(0, m_nBestTag[i]) + 1)) - Math.Log((double)(nFreq + 1));
         dRetValue += dPOSPoss;
         /*
          if(i<nStartPos+nLength-1)
          {
             dContextPoss=log((double)(m_context.GetContextPossibility(0,m_nBestTag[i],m_nBestTag[i+1])+1));
             dRetValue+=dPOSPoss-dContextPoss;
          }
          */
     }
     return dRetValue;
 }
Example #6
0
        public bool PersonRecognize(WordDictionary personDict)
        {
            StringBuilder sb = new StringBuilder();

            int i;
            string sPOS = "z", sPersonName;
            string[] sPatterns = { "BBCD", "BBC", "BBE", "BBZ", "BCD", "BEE", "BE", "BG", "BXD", "BZ", "CDCD", "CD", "EE", "FB", "Y", "XD", "" };
            double[] dFactor =   { 0.003606, 0.000021, 0.001314, 0.000315, 0.656624, 0.000021, 0.146116, 0.009136, 
            0.000042, 0.038971, 0, 0.090367, 0.000273, 0.009157, 0.034324, 0.009735, 0 };

            /*------------------------------------
            About parameter:
         
            BBCD  343      0.003606
            BBC   2        0.000021
            BBE   125      0.001314
            BBZ   30       0.000315
            BCD   62460    0.656624
            BEE   0        0.000000
            BE    13899    0.146116
            BG    869      0.009136
            BXD   4        0.000042
            BZ    3707     0.038971
            CD    8596     0.090367
            EE    26       0.000273
            FB    871      0.009157
            Y     3265     0.034324
            XD    926      0.009735

            The person recognition patterns set
            BBCD:姓+姓+名1+名2;
            BBE: 姓+姓+单名;
            BBZ: 姓+姓+双名成词;
            BCD: 姓+名1+名2;
            BE:  姓+单名;
            BEE: 姓+单名+单名;韩磊磊
            BG:  姓+后缀
            BXD: 姓+姓双名首字成词+双名末字
            BZ:  姓+双名成词;
            B:   姓
            CD:  名1+名2;
            EE:  单名+单名;
            FB:  前缀+姓
            XD:  姓双名首字成词+双名末字
            Y:   姓单名成词
            ------------------------------------*/

            int[] nPatternLen = { 4, 3, 3, 3, 3, 3, 2, 2, 3, 2, 4, 2, 2, 2, 1, 2, 0 };

            //Convert to string from POS
            sb.Append('z');
            for (i = 1; m_nBestTag[i] > -1; i++)
                sb.Append(Convert.ToChar(m_nBestTag[i] + Convert.ToInt32('A')));

            sPOS = sb.ToString();

            int j = 1, k, nPos; //Find the proper pattern from the first POS
            int nLittleFreqCount; //Counter for the person name role with little frequecy
            bool bMatched = false;
            while (j < i)
            {
                bMatched = false;
                for (k = 0; !bMatched && nPatternLen[k] > 0; k++)
                {
                    if (string.Compare(sPatterns[k], 0, sPOS, j, nPatternLen[k]) == 0 &&
                       string.Compare(m_sWords[j - 1], "·") != 0 && string.Compare(m_sWords[j + nPatternLen[k]], "·") != 0)
                    {
                        //Find the proper pattern k
                        if (string.Compare(sPatterns[k], "FB") == 0 && (sPOS[j + 2] == 'E' || sPOS[j + 2] == 'C' || sPOS[j + 2] == 'G'))
                        {
                            //Rule 1 for exclusion:前缀+姓+名1(名2): 规则(前缀+姓)失效;
                            continue;
                        }

                        /*			
                        if((strcmp(sPatterns[k],"BEE")==0||strcmp(sPatterns[k],"EE")==0)&&strcmp(m_sWords[j+nPatternLen[k]-1],m_sWords[j+nPatternLen[k]-2])!=0)
                        {//Rule 2 for exclusion:姓+单名+单名:单名+单名 若EE对应的字不同,规则失效.如:韩磊磊
                        continue;
                        }

                        if(strcmp(sPatterns[k],"B")==0&&m_nBestTag[j+1]!=12)
                        {//Rule 3 for exclusion: 若姓后不是后缀,规则失效.如:江主席、刘大娘
                        continue;
                        }
                         */
                        //Get the possible name

                        nPos = j; //Record the person position in the tag sequence
                        sPersonName = null;
                        nLittleFreqCount = 0; //Record the number of role with little frequency
                        while (nPos < j + nPatternLen[k])
                        {
                            //Get the possible person name
                            //
                            if (m_nBestTag[nPos] < 4 && personDict.GetFrequency(m_sWords[nPos], m_nBestTag[nPos]) < Predefine.LITTLE_FREQUENCY)
                                nLittleFreqCount++;
                            //The counter increase
                            sPersonName += m_sWords[nPos];
                            nPos += 1;
                        }
                        /*
                        if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
                        {//Exclusion foreign name
                        //Rule 2 for exclusion:若均为外国人名用字 规则(名1+名2)失效
                        j+=nPatternLen[k]-1;
                        continue;
                        }
                         */
                        if (string.Compare(sPatterns[k], "CDCD") == 0)
                        {
                            //Rule for exclusion
                            //规则(名1+名2+名1+名2)本身是排除规则:女高音歌唱家迪里拜尔演唱
                            //Rule 3 for exclusion:含外国人名用字 规则适用
                            //否则,排除规则失效:黑妞白妞姐俩拔了头筹。
                            if (Utility.GetForeignCharCount(sPersonName) > 0)
                                j += nPatternLen[k] - 1;
                            continue;
                        }
                        /*
                        if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
                        {//
                        j+=nPatternLen[k]-1;
                        continue;
                        }
                        if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
                        //马哈蒂尔;小扎耶德与他的中国阿姨胡彩玲受华黎明大使之邀,
                        //The all roles appear with two lower frequecy,we will ignore them
                        continue;
                         */
                        m_nUnknownWords[m_nUnknownWordsCount, 0] = m_nWordPosition[j];
                        m_nUnknownWords[m_nUnknownWordsCount, 1] = m_nWordPosition[j + nPatternLen[k]];
                        m_dWordsPossibility[m_nUnknownWordsCount] = -Math.Log(dFactor[k]) + ComputePossibility(j, nPatternLen[k], personDict);
                        //Mutiply the factor 
                        m_nUnknownWordsCount += 1;
                        j += nPatternLen[k];
                        bMatched = true;
                    }
                }
                if (!bMatched)
                    //Not matched, add j by 1
                    j += 1;
            }
            return true;
        }
Example #7
0
        //====================================================================
        // ����������֮��Ķ���ͼ��
        //====================================================================
        public static ColumnFirstDynamicArray<ChainContent> BiGraphGenerate(
         RowFirstDynamicArray<ChainContent> aWord, double smoothPara, WordDictionary biDict, WordDictionary coreDict)
        {
            ColumnFirstDynamicArray<ChainContent> aBiWordNet = new ColumnFirstDynamicArray<ChainContent>();

             ChainItem<ChainContent> pCur, pNextWords;
             int nTwoWordsFreq = 0, nCurWordIndex, nNextWordIndex;
             double dCurFreqency, dValue, dTemp;
             string sTwoWords;
             StringBuilder sb = new StringBuilder();

             //Record the position map of possible words
             int[] m_npWordPosMapTable = PreparePositionMap(aWord);

             pCur = aWord.GetHead();
             while (pCur != null)
             {
            if (pCur.Content.nPOS >= 0)
               //It's not an unknown words
               dCurFreqency = pCur.Content.eWeight;
            else
               //Unknown words
               dCurFreqency = coreDict.GetFrequency(pCur.Content.sWord, 2);

            //Get next words which begin with pCur.col��ע��������Ķ�Ӧ��ϵ��
            pNextWords = aWord.GetFirstElementOfRow(pCur.col);

            while (pNextWords != null && pNextWords.row == pCur.col)
            {
               sb.Remove(0, sb.Length);
               sb.Append(pCur.Content.sWord);
               sb.Append(Predefine.WORD_SEGMENTER);
               sb.Append(pNextWords.Content.sWord);

               sTwoWords = sb.ToString();

               //Two linked Words frequency
               nTwoWordsFreq = biDict.GetFrequency(sTwoWords, 3);

               //Smoothing
               dTemp = 1.0 / Predefine.MAX_FREQUENCE;

               //-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
               dValue = -Math.Log(smoothPara * (1.0 + dCurFreqency) / (Predefine.MAX_FREQUENCE + 80000.0)
                 + (1.0 - smoothPara) * ((1.0 - dTemp) * nTwoWordsFreq / (1.0 + dCurFreqency) +
                 dTemp));

               //Unknown words: P(Wi|Ci);while known words:1
               if (pCur.Content.nPOS < 0)
                  dValue += pCur.Content.nPOS;

               //Get the position index of current word in the position map table
               nCurWordIndex = Utility.BinarySearch(pCur.row * Predefine.MAX_SENTENCE_LEN + pCur.col, m_npWordPosMapTable);
               nNextWordIndex = Utility.BinarySearch(pNextWords.row * Predefine.MAX_SENTENCE_LEN + pNextWords.col, m_npWordPosMapTable);

               aBiWordNet.SetElement(nCurWordIndex, nNextWordIndex, new ChainContent(dValue, pCur.Content.nPOS, sTwoWords));

               pNextWords = pNextWords.next; //Get next word
            }
            pCur = pCur.next;
             }

             return aBiWordNet;
        }
Example #8
0
        //====================================================================
        // 生成两两词之间的二叉图表
        //====================================================================
        public static ColumnFirstDynamicArray <ChainContent> BiGraphGenerate(
            RowFirstDynamicArray <ChainContent> aWord, double smoothPara, WordDictionary biDict, WordDictionary coreDict)
        {
            ColumnFirstDynamicArray <ChainContent> aBiWordNet = new ColumnFirstDynamicArray <ChainContent>();

            ChainItem <ChainContent> pCur, pNextWords;
            int           nTwoWordsFreq = 0, nCurWordIndex, nNextWordIndex;
            double        dCurFreqency, dValue, dTemp;
            string        sTwoWords;
            StringBuilder sb = new StringBuilder();

            //Record the position map of possible words
            int[] m_npWordPosMapTable = PreparePositionMap(aWord);

            pCur = aWord.GetHead();
            while (pCur != null)
            {
                if (pCur.Content.nPOS >= 0)
                {
                    //It's not an unknown words
                    dCurFreqency = pCur.Content.eWeight;
                }
                else
                {
                    //Unknown words
                    dCurFreqency = coreDict.GetFrequency(pCur.Content.sWord, 2);
                }

                //Get next words which begin with pCur.col(注:很特殊的对应关系)
                pNextWords = aWord.GetFirstElementOfRow(pCur.col);

                while (pNextWords != null && pNextWords.row == pCur.col)
                {
                    sb.Remove(0, sb.Length);
                    sb.Append(pCur.Content.sWord);
                    sb.Append(Predefine.WORD_SEGMENTER);
                    sb.Append(pNextWords.Content.sWord);

                    sTwoWords = sb.ToString();

                    //Two linked Words frequency
                    nTwoWordsFreq = biDict.GetFrequency(sTwoWords, 3);

                    //Smoothing
                    dTemp = 1.0 / Predefine.MAX_FREQUENCE;

                    //-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
                    dValue = -Math.Log(smoothPara * (1.0 + dCurFreqency) / (Predefine.MAX_FREQUENCE + 80000.0)
                                       + (1.0 - smoothPara) * ((1.0 - dTemp) * nTwoWordsFreq / (1.0 + dCurFreqency) +
                                                               dTemp));

                    //Unknown words: P(Wi|Ci);while known words:1
                    if (pCur.Content.nPOS < 0)
                    {
                        dValue += pCur.Content.nPOS;
                    }

                    //Get the position index of current word in the position map table
                    nCurWordIndex  = Utility.BinarySearch(pCur.row * Predefine.MAX_SENTENCE_LEN + pCur.col, m_npWordPosMapTable);
                    nNextWordIndex = Utility.BinarySearch(pNextWords.row * Predefine.MAX_SENTENCE_LEN + pNextWords.col, m_npWordPosMapTable);

                    aBiWordNet.SetElement(nCurWordIndex, nNextWordIndex, new ChainContent(dValue, pCur.Content.nPOS, sTwoWords));

                    pNextWords = pNextWords.next; //Get next word
                }
                pCur = pCur.next;
            }

            return(aBiWordNet);
        }
Example #9
0
        public bool PersonRecognize(WordDictionary personDict)
        {
            StringBuilder sb = new StringBuilder();

            int    i;
            string sPOS = "z", sPersonName;

            string[] sPatterns = { "BBCD", "BBC", "BBE", "BBZ", "BCD", "BEE", "BE", "BG", "BXD", "BZ", "CDCD", "CD", "EE", "FB", "Y", "XD", "" };
            double[] dFactor   = { 0.003606, 0.000021, 0.001314, 0.000315, 0.656624, 0.000021, 0.146116, 0.009136,
                                   0.000042,   0.038971,        0, 0.090367, 0.000273, 0.009157, 0.034324, 0.009735, 0 };

            /*------------------------------------
            *  About parameter:
            *
            *  BBCD  343      0.003606
            *  BBC   2        0.000021
            *  BBE   125      0.001314
            *  BBZ   30       0.000315
            *  BCD   62460    0.656624
            *  BEE   0        0.000000
            *  BE    13899    0.146116
            *  BG    869      0.009136
            *  BXD   4        0.000042
            *  BZ    3707     0.038971
            *  CD    8596     0.090367
            *  EE    26       0.000273
            *  FB    871      0.009157
            *  Y     3265     0.034324
            *  XD    926      0.009735
            *
            *  The person recognition patterns set
            *  BBCD:姓+姓+名1+名2;
            *  BBE: 姓+姓+单名;
            *  BBZ: 姓+姓+双名成词;
            *  BCD: 姓+名1+名2;
            *  BE:  姓+单名;
            *  BEE: 姓+单名+单名;韩磊磊
            *  BG:  姓+后缀
            *  BXD: 姓+姓双名首字成词+双名末字
            *  BZ:  姓+双名成词;
            *  B:   姓
            *  CD:  名1+名2;
            *  EE:  单名+单名;
            *  FB:  前缀+姓
            *  XD:  姓双名首字成词+双名末字
            *  Y:   姓单名成词
            *  ------------------------------------*/

            int[] nPatternLen = { 4, 3, 3, 3, 3, 3, 2, 2, 3, 2, 4, 2, 2, 2, 1, 2, 0 };

            //Convert to string from POS
            sb.Append('z');
            for (i = 1; m_nBestTag[i] > -1; i++)
            {
                sb.Append(Convert.ToChar(m_nBestTag[i] + Convert.ToInt32('A')));
            }

            sPOS = sb.ToString();

            int  j = 1, k, nPos;   //Find the proper pattern from the first POS
            int  nLittleFreqCount; //Counter for the person name role with little frequecy
            bool bMatched = false;

            while (j < i)
            {
                bMatched = false;
                for (k = 0; !bMatched && nPatternLen[k] > 0; k++)
                {
                    if (string.Compare(sPatterns[k], 0, sPOS, j, nPatternLen[k]) == 0 &&
                        string.Compare(m_sWords[j - 1], "·") != 0 && string.Compare(m_sWords[j + nPatternLen[k]], "·") != 0)
                    {
                        //Find the proper pattern k
                        if (string.Compare(sPatterns[k], "FB") == 0 && (sPOS[j + 2] == 'E' || sPOS[j + 2] == 'C' || sPOS[j + 2] == 'G'))
                        {
                            //Rule 1 for exclusion:前缀+姓+名1(名2): 规则(前缀+姓)失效;
                            continue;
                        }

                        /*
                         * if((strcmp(sPatterns[k],"BEE")==0||strcmp(sPatterns[k],"EE")==0)&&strcmp(m_sWords[j+nPatternLen[k]-1],m_sWords[j+nPatternLen[k]-2])!=0)
                         * {//Rule 2 for exclusion:姓+单名+单名:单名+单名 若EE对应的字不同,规则失效.如:韩磊磊
                         * continue;
                         * }
                         *
                         * if(strcmp(sPatterns[k],"B")==0&&m_nBestTag[j+1]!=12)
                         * {//Rule 3 for exclusion: 若姓后不是后缀,规则失效.如:江主席、刘大娘
                         * continue;
                         * }
                         */
                        //Get the possible name

                        nPos             = j; //Record the person position in the tag sequence
                        sPersonName      = null;
                        nLittleFreqCount = 0; //Record the number of role with little frequency
                        while (nPos < j + nPatternLen[k])
                        {
                            //Get the possible person name
                            //
                            if (m_nBestTag[nPos] < 4 && personDict.GetFrequency(m_sWords[nPos], m_nBestTag[nPos]) < Predefine.LITTLE_FREQUENCY)
                            {
                                nLittleFreqCount++;
                            }
                            //The counter increase
                            sPersonName += m_sWords[nPos];
                            nPos        += 1;
                        }

                        /*
                         * if(IsAllForeign(sPersonName)&&personDict.GetFrequency(m_sWords[j],1)<LITTLE_FREQUENCY)
                         * {//Exclusion foreign name
                         * //Rule 2 for exclusion:若均为外国人名用字 规则(名1+名2)失效
                         * j+=nPatternLen[k]-1;
                         * continue;
                         * }
                         */
                        if (string.Compare(sPatterns[k], "CDCD") == 0)
                        {
                            //Rule for exclusion
                            //规则(名1+名2+名1+名2)本身是排除规则:女高音歌唱家迪里拜尔演唱
                            //Rule 3 for exclusion:含外国人名用字 规则适用
                            //否则,排除规则失效:黑妞白妞姐俩拔了头筹。
                            if (Utility.GetForeignCharCount(sPersonName) > 0)
                            {
                                j += nPatternLen[k] - 1;
                            }
                            continue;
                        }

                        /*
                         * if(strcmp(sPatterns[k],"CD")==0&&IsAllForeign(sPersonName))
                         * {//
                         * j+=nPatternLen[k]-1;
                         * continue;
                         * }
                         * if(nLittleFreqCount==nPatternLen[k]||nLittleFreqCount==3)
                         * //马哈蒂尔;小扎耶德与他的中国阿姨胡彩玲受华黎明大使之邀,
                         * //The all roles appear with two lower frequecy,we will ignore them
                         * continue;
                         */
                        m_nUnknownWords[m_nUnknownWordsCount, 0]  = m_nWordPosition[j];
                        m_nUnknownWords[m_nUnknownWordsCount, 1]  = m_nWordPosition[j + nPatternLen[k]];
                        m_dWordsPossibility[m_nUnknownWordsCount] = -Math.Log(dFactor[k]) + ComputePossibility(j, nPatternLen[k], personDict);
                        //Mutiply the factor
                        m_nUnknownWordsCount += 1;
                        j       += nPatternLen[k];
                        bMatched = true;
                    }
                }
                if (!bMatched)
                {
                    //Not matched, add j by 1
                    j += 1;
                }
            }
            return(true);
        }