C# (CSharp) SharpICTCLAS WordDictionary.GetMaxMatch Examples

Programming Language: C# (CSharp)

Namespace/Package Name: SharpICTCLAS

Class/Type: WordDictionary

Method/Function: GetMaxMatch

Examples at hotexamples.com: 3

C# (CSharp) SharpICTCLAS WordDictionary.GetMaxMatch - 3 examples found. These are the top rated real world C# (CSharp) examples of SharpICTCLAS.WordDictionary.GetMaxMatch extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetFrequency(5)

GetWordInfo(2)

IsExist(2)

Load(2)

AddItem(1)

AddWord(1)

GetMaxMatch(1)

ReleaseDict(1)

Save(1)

ToWordDictionary(1)

Example #1

Show file

        //====================================================================
        // Func Name  : GenerateWordNet
        // Description: Generate the segmentation word net according
        //              the original sentence
        // Parameters : sSentence: the sentence
        //              dictCore : core dictionary
        //              bOriginalFreq=false: output original frequency
        // Returns    : bool
        //====================================================================
        public static RowFirstDynamicArray <ChainContent> GenerateWordNet(List <AtomNode> atomSegment, WordDictionary coreDict)
        {
            string sWord = "", sMaxMatchWord;
            int    nPOSRet, nPOS, nTotalFreq;
            double dValue = 0;

            RowFirstDynamicArray <ChainContent> m_segGraph = new RowFirstDynamicArray <ChainContent>();

            m_segGraph.SetEmpty();

            // 将原子部分存入m_segGraph
            for (int i = 0; i < atomSegment.Count; i++)//Init the cost array
            {
                if (atomSegment[i].nPOS == Predefine.CT_CHINESE)
                {
                    m_segGraph.SetElement(i, i + 1, new ChainContent(0, 0, atomSegment[i].sWord));
                }
                else
                {
                    sWord  = atomSegment[i].sWord;//init the word
                    dValue = Predefine.MAX_FREQUENCE;
                    switch (atomSegment[i].nPOS)
                    {
                    case Predefine.CT_INDEX:
                    case Predefine.CT_NUM:
                        nPOS   = -27904;//'m'*256
                        sWord  = "未##数";
                        dValue = 0;
                        break;

                    case Predefine.CT_DELIMITER:
                        nPOS = 30464;//'w'*256;
                        break;

                    case Predefine.CT_LETTER:
                        nPOS   = -28280; // -'n' * 256 - 'x';
                        dValue = 0;
                        sWord  = "未##串";
                        break;

                    case Predefine.CT_SINGLE://12021-2129-3121
                        if (Regex.IsMatch(atomSegment[i].sWord, @"^(-?\d+)(\.\d+)?$"))
                        {
                            　　                  //匹配浮点数
                            {
                                nPOS  = -27904; //'m'*256
                                sWord = "未##数";
                            }
                        }
                        else
                        {
                            nPOS  = -28280; // -'n' * 256 - 'x'
                            sWord = "未##串";
                        }
                        dValue = 0;
                        break;

                    default:
                        nPOS = atomSegment[i].nPOS;//'?'*256;
                        break;
                    }
                    m_segGraph.SetElement(i, i + 1, new ChainContent(dValue, nPOS, sWord));//init the link with minimum
                }
            }

            // 将所有可能的组词存入m_segGraph
            for (int i = 0; i < atomSegment.Count; i++) //All the word
            {
                sWord = atomSegment[i].sWord;           //Get the current atom
                int j = i + 1;

                while (j < atomSegment.Count && coreDict.GetMaxMatch(sWord, out sMaxMatchWord, out nPOSRet))
                {
                    if (sMaxMatchWord == sWord)                      // 就是我们要找的词
                    {
                        WordInfo info = coreDict.GetWordInfo(sWord); // 该词可能就有多种词性

                        // 计算该词的所有词频之和
                        nTotalFreq = 0;
                        for (int k = 0; k < info.Count; k++)
                        {
                            nTotalFreq += info.Frequencies[k];
                        }

                        // 限制出现某些特殊词
                        if (sWord.Length == 2 && (sWord.StartsWith("年") || sWord.StartsWith("月")) && i >= 1 &&
                            (Utility.IsAllNum(atomSegment[i - 1].sWord) ||
                             Utility.IsAllChineseNum(atomSegment[i - 1].sWord)))
                        {
                            //1年内、1999年末
                            if ("末内中底前间初".IndexOf(sWord.Substring(1)) >= 0)
                            {
                                break;
                            }
                        }

                        // 如果该词只有一个词性，则存储，否则词性记录为 0
                        if (info.Count == 1)
                        {
                            m_segGraph.SetElement(i, j, new ChainContent(nTotalFreq, info.POSs[0], sWord));
                        }
                        else
                        {
                            m_segGraph.SetElement(i, j, new ChainContent(nTotalFreq, 0, sWord));
                        }
                    }

                    sWord += atomSegment[j++].sWord;
                }
            }
            return(m_segGraph);
        }

Example #2

Show file

File: Segment.cs Project: andylaudotnet/StockFoo

        //====================================================================
        // Func Name  : GenerateWordNet
        // Description: Generate the segmentation word net according
        //              the original sentence
        // Parameters : sSentence: the sentence
        //              dictCore : core dictionary
        //              bOriginalFreq=false: output original frequency
        // Returns    : bool
        //====================================================================
        public static RowFirstDynamicArray<ChainContent> GenerateWordNet(List<AtomNode> atomSegment, WordDictionary coreDict)
        {
            string sWord = "", sMaxMatchWord;
             int nPOSRet, nPOS, nTotalFreq;
             double dValue = 0;

             RowFirstDynamicArray<ChainContent> m_segGraph = new RowFirstDynamicArray<ChainContent>();
             m_segGraph.SetEmpty();

             // ��ԭ�Ӳ��ִ���m_segGraph
             for (int i = 0; i < atomSegment.Count; i++)//Init the cost array
             {
            if (atomSegment[i].nPOS == Predefine.CT_CHINESE)
               m_segGraph.SetElement(i, i + 1, new ChainContent(0, 0, atomSegment[i].sWord));
            else
            {
               sWord = atomSegment[i].sWord;//init the word
               dValue = Predefine.MAX_FREQUENCE;
               switch (atomSegment[i].nPOS)
               {
                  case Predefine.CT_INDEX:
                  case Predefine.CT_NUM:
                     nPOS = -27904;//'m'*256
                     sWord = "δ##��";
                     dValue = 0;
                     break;
                  case Predefine.CT_DELIMITER:
                     nPOS = 30464;//'w'*256;
                     break;
                  case Predefine.CT_LETTER:
                     nPOS = -28280; // -'n' * 256 - 'x';
                     dValue = 0;
                     sWord = "δ##��";
                     break;
                  case Predefine.CT_SINGLE://12021-2129-3121
                     if (Regex.IsMatch(atomSegment[i].sWord, @"^(-?\d+)(\.\d+)?$"))����//ƥ�両����
                     {
                        nPOS = -27904;//'m'*256
                        sWord = "δ##��";
                     }
                     else
                     {
                        nPOS = -28280; // -'n' * 256 - 'x'
                        sWord = "δ##��";
                     }
                     dValue = 0;
                     break;
                  default:
                     nPOS = atomSegment[i].nPOS;//'?'*256;
                     break;
               }
               m_segGraph.SetElement(i, i + 1, new ChainContent(dValue, nPOS, sWord));//init the link with minimum
            }
             }

             // �����п��ܵ���ʴ���m_segGraph
             for (int i = 0; i < atomSegment.Count; i++)//All the word
             {
            sWord = atomSegment[i].sWord;//Get the current atom
            int j = i + 1;

            while (j < atomSegment.Count && coreDict.GetMaxMatch(sWord, out sMaxMatchWord, out nPOSRet))
            {
               if (sMaxMatchWord == sWord)  // ��������Ҫ�ҵĴ�
               {
                  WordInfo info = coreDict.GetWordInfo(sWord); // �ôʿ��ܾ��ж��ִ���

                  // ����ôʵ����д�Ƶ֮��
                  nTotalFreq = 0;
                  for (int k = 0; k < info.Count; k++)
                     nTotalFreq += info.Frequencies[k];

                  // ���Ƴ���ĳЩ�����
                  if (sWord.Length == 2 && (sWord.StartsWith("��") || sWord.StartsWith("��")) && i >= 1 &&
                     (Utility.IsAllNum(atomSegment[i - 1].sWord) ||
                     Utility.IsAllChineseNum(atomSegment[i - 1].sWord)))
                  {
                     //1���ڡ�1999��ĩ
                     if ("ĩ���е�ǰ���".IndexOf(sWord.Substring(1)) >= 0)
                        break;
                  }

                  // ����ô�ֻ��һ�����ԣ���洢��������Լ�¼Ϊ 0
                  if (info.Count == 1)
                     m_segGraph.SetElement(i, j, new ChainContent(nTotalFreq, info.POSs[0], sWord));
                  else
                     m_segGraph.SetElement(i, j, new ChainContent(nTotalFreq, 0, sWord));
               }

               sWord += atomSegment[j++].sWord;
            }
             }
             return m_segGraph;
        }

Example #3

Show file

File: Segment.cs Project: JamalAbuDayyeh/slowandsteadyparser

      //====================================================================
      // Func Name  : GenerateWordNet
      // Description: Generate the segmentation word net according 
      //              the original sentence
      // Parameters : sSentence: the sentence
      //              dictCore : core dictionary
      //              bOriginalFreq=false: output original frequency
      // Returns    : bool
      //====================================================================
      public static RowFirstDynamicArray<ChainContent> GenerateWordNet(List<AtomNode> atomSegment, WordDictionary coreDict)
      {
         string sWord = "", sMaxMatchWord;
         int nPOSRet, nPOS, nTotalFreq;
         double dValue = 0;

         RowFirstDynamicArray<ChainContent> m_segGraph = new RowFirstDynamicArray<ChainContent>();
         m_segGraph.SetEmpty();

         // 将原子部分存入m_segGraph
         for (int i = 0; i < atomSegment.Count; i++)//Init the cost array
         {
            if (atomSegment[i].nPOS == Predefine.CT_CHINESE)
               m_segGraph.SetElement(i, i + 1, new ChainContent(0, 0, atomSegment[i].sWord));
            else
            {
               sWord = atomSegment[i].sWord;//init the word 
               dValue = Predefine.MAX_FREQUENCE;
               switch (atomSegment[i].nPOS)
               {
                  case Predefine.CT_INDEX:
                  case Predefine.CT_NUM:
                     nPOS = -27904;//'m'*256
                     sWord = "未##数";
                     dValue = 0;
                     break;
                  case Predefine.CT_DELIMITER:
                     nPOS = 30464;//'w'*256;
                     break;
                  case Predefine.CT_LETTER:
                     nPOS = -28280; // -'n' * 256 - 'x';
                     dValue = 0;
                     sWord = "未##串";
                     break;
                  case Predefine.CT_SINGLE://12021-2129-3121
                     if (Regex.IsMatch(atomSegment[i].sWord, @"^(-?\d+)(\.\d+)?$"))　　//匹配浮点数
                     {
                        nPOS = -27904;//'m'*256
                        sWord = "未##数";
                     }
                     else
                     {
                        nPOS = -28280; // -'n' * 256 - 'x'
                        sWord = "未##串";
                     }
                     dValue = 0;
                     break;
                  default:
                     nPOS = atomSegment[i].nPOS;//'?'*256;
                     break;
               }
               m_segGraph.SetElement(i, i + 1, new ChainContent(dValue, nPOS, sWord));//init the link with minimum
            }
         }

         // 将所有可能的组词存入m_segGraph
         for (int i = 0; i < atomSegment.Count; i++)//All the word
         {
            sWord = atomSegment[i].sWord;//Get the current atom
            int j = i + 1;

            while (j < atomSegment.Count && coreDict.GetMaxMatch(sWord, out sMaxMatchWord, out nPOSRet))
            {
               if (sMaxMatchWord == sWord)  // 就是我们要找的词
               {
                  WordInfo info = coreDict.GetWordInfo(sWord); // 该词可能就有多种词性

                  // 计算该词的所有词频之和
                  nTotalFreq = 0;
                  for (int k = 0; k < info.Count; k++)
                     nTotalFreq += info.Frequencies[k];

                  // 限制出现某些特殊词
                  if (sWord.Length == 2 && (sWord.StartsWith("年") || sWord.StartsWith("月")) && i >= 1 &&
                     (Utility.IsAllNum(atomSegment[i - 1].sWord) ||
                     Utility.IsAllChineseNum(atomSegment[i - 1].sWord)))
                  {
                     //1年内、1999年末
                     if ("末内中底前间初".IndexOf(sWord.Substring(1)) >= 0)
                        break;
                  }

                  // 如果该词只有一个词性，则存储，否则词性记录为 0
                  if (info.Count == 1)
                     m_segGraph.SetElement(i, j, new ChainContent(nTotalFreq, info.POSs[0], sWord));
                  else
                     m_segGraph.SetElement(i, j, new ChainContent(nTotalFreq, 0, sWord));
               }

               sWord += atomSegment[j++].sWord;
            }
         }
         return m_segGraph;
      }