Esempio n. 1
0
        public static void TestBiGraphGenerate()
        {
            WordDictionary coreDict = new WordDictionary();

            if (!coreDict.Load(coreDictFile))
            {
                Console.WriteLine("coreDict 字典装入错误!");
                return;
            }

            WordDictionary biDict = new WordDictionary();

            if (!biDict.Load(biDictFile))
            {
                Console.WriteLine("字典装入错误!");
                return;
            }

            string sSentence = @"他说的确实在理";

            sSentence = Predefine.SENTENCE_BEGIN + sSentence + Predefine.SENTENCE_END;

            //---原子分词
            List <AtomNode> atomSegment = Segment.AtomSegment(sSentence);

            //---检索词库,加入所有可能分词方案并存入链表结构
            RowFirstDynamicArray <ChainContent> segGraph = Segment.GenerateWordNet(atomSegment, coreDict);

            //---检索所有可能的两两组合
            ColumnFirstDynamicArray <ChainContent> biGraphResult = Segment.BiGraphGenerate(segGraph, 0.1, biDict, coreDict);

            Console.WriteLine(biGraphResult.ToString());
        }
      public int BiSegment(string sSentence, double smoothPara, int nKind)
      {
         WordResult[] tmpResult;
         WordLinkedArray linkedArray;

         if (biDict == null || coreDict == null)
            throw new Exception("biDict 或 coreDict 尚未初始化!");

         //---原子分词
         atomSegment = AtomSegment(sSentence);
         OnAtomSegment(atomSegment);

         //---检索词库,加入所有可能分词方案并存入链表结构
         segGraph = GenerateWordNet(atomSegment, coreDict);
         OnGenSegGraph(segGraph);

         //---检索所有可能的两两组合
         biGraphResult = BiGraphGenerate(segGraph, smoothPara, biDict, coreDict);
         OnGenBiSegGraph(biGraphResult);

         //---N 最短路径计算出多个分词方案
         NShortPath.Calculate(biGraphResult, nKind);
         List<int[]> spResult = NShortPath.GetNPaths(Predefine.MAX_SEGMENT_NUM);
         OnNShortPath(spResult, segGraph);

         m_pWordSeg = new List<WordResult[]>();
         m_graphOptimum = new RowFirstDynamicArray<ChainContent>();

         for (int i = 0; i < spResult.Count; i++)
         {
            linkedArray = BiPath2LinkedArray(spResult[i], segGraph, atomSegment);
            tmpResult = GenerateWord(spResult[i], linkedArray, m_graphOptimum);

            if (tmpResult != null)
               m_pWordSeg.Add(tmpResult);
         }

         OnBeforeOptimize(m_pWordSeg);

         return m_pWordSeg.Count;
      }
Esempio n. 3
0
        //====================================================================
        // ��������н���Ͽ��ܵ�·����Ϊ·�������ṩ����׼��
        //====================================================================
        public static void Calculate(ColumnFirstDynamicArray<ChainContent> apCost, int nValueKind)
        {
            InitNShortPath(apCost, nValueKind);

             QueueElement tmpElement;
             CQueue queWork = new CQueue();
             double eWeight;

             for (int nCurNode = 1; nCurNode < m_nNode; nCurNode++)
             {
            // �����е���ǰ��㣨nCurNode)���ܵı߸���eWeight����ѹ�����
            EnQueueCurNodeEdges(ref queWork, nCurNode);

            // ��ʼ����ǰ������бߵ�eWeightֵ
            for (int i = 0; i < m_nValueKind; i++)
               m_pWeight[nCurNode - 1][i] = Predefine.INFINITE_VALUE;

            // ��queWork�е�����װ��m_pWeight��m_pParent
            tmpElement = queWork.DeQueue();
            if (tmpElement != null)
            {
               for (int i = 0; i < m_nValueKind; i++)
               {
                  eWeight = tmpElement.eWeight;
                  m_pWeight[nCurNode - 1][i] = eWeight;
                  do
                  {
                     m_pParent[nCurNode - 1][i].EnQueue(new QueueElement(tmpElement.nParent, tmpElement.nIndex, 0));
                     tmpElement = queWork.DeQueue();
                     if (tmpElement == null)
                        goto nextnode;

                  } while (tmpElement.eWeight == eWeight);
               }
            }
             nextnode: ;
             }
        }
Esempio n. 4
0
      private  void InitNShortPath(ColumnFirstDynamicArray<ChainContent> apCost, int nValueKind)
      {
         m_apCost = apCost; //Set the cost
         m_nValueKind = nValueKind; //Set the value kind

         // 获取顶点的数目
         // ----------------- 注:by zhenyulu ------------------
         // 原来程序为m_nNode = Math.Max(apCost.ColumnCount, apCost.RowCount) + 1;
         // 但apCost.ColumnCount应该一定大于apCost.RowCount,所以改成这样。
         m_nNode = apCost.ColumnCount + 1;

         m_pParent = new CQueue[m_nNode - 1][]; //not including the first node
         m_pWeight = new double[m_nNode - 1][];

         //The queue array for every node
         for (int i = 0; i < m_nNode - 1; i++)
         {
            m_pParent[i] = new CQueue[nValueKind];
            m_pWeight[i] = new double[nValueKind];

            for (int j = 0; j < nValueKind; j++)
               m_pParent[i][j] = new CQueue();
         }
      }
Esempio n. 5
0
        private static void InitNShortPath(ColumnFirstDynamicArray<ChainContent> apCost, int nValueKind)
        {
            m_apCost = apCost; //Set the cost
             m_nValueKind = nValueKind; //Set the value kind

             // ��ȡ�������Ŀ
             // ----------------- ע��by zhenyulu ------------------
             // ԭ������Ϊm_nNode = Math.Max(apCost.ColumnCount, apCost.RowCount) + 1;
             // ��apCost.ColumnCountӦ��һ������apCost.RowCount�����Ըij�������
             m_nNode = apCost.ColumnCount + 1;

             m_pParent = new CQueue[m_nNode - 1][]; //not including the first node
             m_pWeight = new double[m_nNode - 1][];

             //The queue array for every node
             for (int i = 0; i < m_nNode - 1; i++)
             {
            m_pParent[i] = new CQueue[nValueKind];
            m_pWeight[i] = new double[nValueKind];

            for (int j = 0; j < nValueKind; j++)
               m_pParent[i][j] = new CQueue();
             }
        }
Esempio n. 6
0
 private void OnGenBiSegGraph(ColumnFirstDynamicArray<ChainContent> biGraph)
 {
     SendEvents(new SegmentEventArgs(SegmentStage.GenBiSegGraph, biGraph.ToString()));
 }
Esempio n. 7
0
        public int BiSegment(string sSentence, double smoothPara, int nKind)
        {
            WordResult[] tmpResult;
             WordLinkedArray linkedArray;

             if (biDict == null || coreDict == null)
            throw new Exception("biDict �� coreDict ��δ��ʼ����");

             //---ԭ�ӷִ�
             atomSegment = AtomSegment(sSentence);
             OnAtomSegment(atomSegment);

             //---�����ʿ⣬�������п��ִܷʷ�������������ṹ
             segGraph = GenerateWordNet(atomSegment, coreDict);
             OnGenSegGraph(segGraph);

             //---�������п��ܵ��������
             biGraphResult = BiGraphGenerate(segGraph, smoothPara, biDict, coreDict);
             OnGenBiSegGraph(biGraphResult);

             //---N ���·�����������ִʷ���
             NShortPath.Calculate(biGraphResult, nKind);
             List<int[]> spResult = NShortPath.GetNPaths(Predefine.MAX_SEGMENT_NUM);
             OnNShortPath(spResult, segGraph);

             m_pWordSeg = new List<WordResult[]>();
             m_graphOptimum = new RowFirstDynamicArray<ChainContent>();

             for (int i = 0; i < spResult.Count; i++)
             {
            linkedArray = BiPath2LinkedArray(spResult[i], segGraph, atomSegment);
            tmpResult = GenerateWord(spResult[i], linkedArray, m_graphOptimum);

            if (tmpResult != null)
               m_pWordSeg.Add(tmpResult);
             }

             OnBeforeOptimize(m_pWordSeg);

             return m_pWordSeg.Count;
        }
Esempio n. 8
0
        //====================================================================
        // ����������֮��Ķ���ͼ��
        //====================================================================
        public static ColumnFirstDynamicArray<ChainContent> BiGraphGenerate(
         RowFirstDynamicArray<ChainContent> aWord, double smoothPara, WordDictionary biDict, WordDictionary coreDict)
        {
            ColumnFirstDynamicArray<ChainContent> aBiWordNet = new ColumnFirstDynamicArray<ChainContent>();

             ChainItem<ChainContent> pCur, pNextWords;
             int nTwoWordsFreq = 0, nCurWordIndex, nNextWordIndex;
             double dCurFreqency, dValue, dTemp;
             string sTwoWords;
             StringBuilder sb = new StringBuilder();

             //Record the position map of possible words
             int[] m_npWordPosMapTable = PreparePositionMap(aWord);

             pCur = aWord.GetHead();
             while (pCur != null)
             {
            if (pCur.Content.nPOS >= 0)
               //It's not an unknown words
               dCurFreqency = pCur.Content.eWeight;
            else
               //Unknown words
               dCurFreqency = coreDict.GetFrequency(pCur.Content.sWord, 2);

            //Get next words which begin with pCur.col��ע��������Ķ�Ӧ��ϵ��
            pNextWords = aWord.GetFirstElementOfRow(pCur.col);

            while (pNextWords != null && pNextWords.row == pCur.col)
            {
               sb.Remove(0, sb.Length);
               sb.Append(pCur.Content.sWord);
               sb.Append(Predefine.WORD_SEGMENTER);
               sb.Append(pNextWords.Content.sWord);

               sTwoWords = sb.ToString();

               //Two linked Words frequency
               nTwoWordsFreq = biDict.GetFrequency(sTwoWords, 3);

               //Smoothing
               dTemp = 1.0 / Predefine.MAX_FREQUENCE;

               //-log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
               dValue = -Math.Log(smoothPara * (1.0 + dCurFreqency) / (Predefine.MAX_FREQUENCE + 80000.0)
                 + (1.0 - smoothPara) * ((1.0 - dTemp) * nTwoWordsFreq / (1.0 + dCurFreqency) +
                 dTemp));

               //Unknown words: P(Wi|Ci);while known words:1
               if (pCur.Content.nPOS < 0)
                  dValue += pCur.Content.nPOS;

               //Get the position index of current word in the position map table
               nCurWordIndex = Utility.BinarySearch(pCur.row * Predefine.MAX_SENTENCE_LEN + pCur.col, m_npWordPosMapTable);
               nNextWordIndex = Utility.BinarySearch(pNextWords.row * Predefine.MAX_SENTENCE_LEN + pNextWords.col, m_npWordPosMapTable);

               aBiWordNet.SetElement(nCurWordIndex, nNextWordIndex, new ChainContent(dValue, pCur.Content.nPOS, sTwoWords));

               pNextWords = pNextWords.next; //Get next word
            }
            pCur = pCur.next;
             }

             return aBiWordNet;
        }
Esempio n. 9
0
      //====================================================================
      // 计算出所有结点上可能的路径,为路径数据提供数据准备
      //====================================================================
      public  void Calculate(ColumnFirstDynamicArray<ChainContent> apCost, int nValueKind)
      {
         InitNShortPath(apCost, nValueKind);

         QueueElement tmpElement;
         CQueue queWork = new CQueue();
         double eWeight;

         for (int nCurNode = 1; nCurNode < m_nNode; nCurNode++)
         {
            // 将所有到当前结点(nCurNode)可能的边根据eWeight排序并压入队列
            EnQueueCurNodeEdges(ref queWork, nCurNode);

            // 初始化当前结点所有边的eWeight值
            for (int i = 0; i < m_nValueKind; i++)
               m_pWeight[nCurNode - 1][i] = Predefine.INFINITE_VALUE;

            // 将queWork中的内容装入m_pWeight与m_pParent
            tmpElement = queWork.DeQueue();
            if (tmpElement != null)
            {
               for (int i = 0; i < m_nValueKind; i++)
               {
                  eWeight = tmpElement.eWeight;
                  m_pWeight[nCurNode - 1][i] = eWeight;
                  do
                  {
                     m_pParent[nCurNode - 1][i].EnQueue(new QueueElement(tmpElement.nParent, tmpElement.nIndex, 0));
                     tmpElement = queWork.DeQueue();
                     if (tmpElement == null)
                        goto nextnode;

                  } while (tmpElement.eWeight == eWeight);
               }
            }
         nextnode: ;
         }
      }
Esempio n. 10
0
 private void OnBackwardOptimize(ColumnFirstDynamicArray<ChainContent> biGraph)
 {
     SendEvents(new SegmentEventArgs(SegmentStage.BcakwardOptimize, biGraph.ToString()));
 }
Esempio n. 11
0
        public ColumnFirstDynamicArray<ChainContent> TestSegment(string sSentence, double smoothPara, int nKind)
        {
            WordResult[] tmpResult;
              WordLinkedArray linkedArray;

              if (biDict == null || coreDict == null)
              throw new Exception("biDict �� coreDict ��δ��ʼ����");

              //---ԭ�ӷִ�
              atomSegment = AtomSegment(sSentence);
              OnAtomSegment(atomSegment);

              //---�����ʿ⣬�������п��ִܷʷ�������������ṹ
              segGraph = GenerateWordNet(atomSegment, coreDict);
              //OnGenSegGraph(segGraph);

              //---�������п��ܵ��������
              biGraphResult = BiGraphGenerate(segGraph, smoothPara, biDict, coreDict);
              //OnGenBiSegGraph(biGraphResult);

              return biGraphResult;
              //--����ƥ���Ż�
              //biGraphResult = BackwardOptimize(biGraphResult);
              //OnBackwardOptimize(biGraphResult);
        }
Esempio n. 12
0
        public static ColumnFirstDynamicArray<ChainContent> BackwardOptimize(ColumnFirstDynamicArray<ChainContent> test)
        {
            ChainItem<ChainContent> pCur = test.GetHead();
              StringBuilder sb = new StringBuilder();
              while(pCur!=null)
              {
              double multiNum = Math.Pow(backNum,pCur.col);
              pCur.Content.eWeight *= multiNum;
              pCur = pCur.next;
              }

              return test;
        }
Esempio n. 13
0
        public static void TestNShortPath()
        {
            int          n = 2;
            List <int[]> result;

            int[] aPath;
            //--------------------------------------------------------------edie by SharpKey
            string dictPath = Path.Combine(Environment.CurrentDirectory, "Data") + Path.DirectorySeparatorChar;

            Console.WriteLine("正在初始化字典库,请稍候...");
            //WordSegmentSample sample = new WordSegmentSample(DictPath, 2);
            WordSegment wordSegment = new WordSegment();

            wordSegment.InitWordSegment(dictPath);
            Segment m_Seg = new Segment(wordSegment.m_dictBigram, wordSegment.m_dictCore);//Seg class
            //wordSegment.Segment("", 2);
            ColumnFirstDynamicArray <ChainContent> apCost = m_Seg.TestSegment("始##始这个人的确实在末##末", 0.1, 2);

            Console.WriteLine(apCost.ToString());
            //----------------------------------
            NShortPath.Calculate(apCost, n);
            NShortPath.printResultByIndex();

            //----------------------------------------------------
            // 所有路径
            //----------------------------------------------------
            Console.WriteLine("\r\n\r\n所有路径:");
            for (int i = 0; i < n; i++)
            {
                result = NShortPath.GetPaths(i);
                for (int j = 0; j < result.Count; j++)
                {
                    aPath = result[j];
                    for (int k = 0; k < aPath.Length; k++)
                    {
                        Console.Write("{0}, ", aPath[k]);
                    }

                    Console.WriteLine();
                }
                Console.WriteLine("========================");
            }

            //----------------------------------------------------
            // 最佳路径
            //----------------------------------------------------
            Console.WriteLine("\r\n最佳路径:");
            aPath = NShortPath.GetBestPath();
            for (int k = 0; k < aPath.Length; k++)
            {
                Console.Write("{0}, ", aPath[k]);
            }

            Console.WriteLine();

            //----------------------------------------------------
            // 最多 n 个路径
            //----------------------------------------------------
            Console.WriteLine("\r\n最多 {0} 条路径:", 5);
            result = NShortPath.GetNPaths(5);
            for (int j = 0; j < result.Count; j++)
            {
                aPath = result[j];
                for (int k = 0; k < aPath.Length; k++)
                {
                    Console.Write("{0}, ", aPath[k]);
                }

                Console.WriteLine();
            }
        }
Esempio n. 14
0
        public static void TestNShortPath()
        {
            int          n = 2;
            List <int[]> result;

            int[] aPath;

            ColumnFirstDynamicArray <ChainContent> apCost = new ColumnFirstDynamicArray <ChainContent>();

            apCost.SetElement(0, 1, new ChainContent(1));
            apCost.SetElement(1, 2, new ChainContent(1));
            apCost.SetElement(1, 3, new ChainContent(2));
            apCost.SetElement(2, 3, new ChainContent(1));
            apCost.SetElement(2, 4, new ChainContent(1));
            apCost.SetElement(3, 4, new ChainContent(1));
            apCost.SetElement(4, 5, new ChainContent(1));
            apCost.SetElement(3, 6, new ChainContent(2));
            apCost.SetElement(4, 6, new ChainContent(3));
            apCost.SetElement(5, 6, new ChainContent(1));
            Console.WriteLine(apCost.ToString());

            NShortPath.Calculate(apCost, n);
            NShortPath.printResultByIndex();

            //----------------------------------------------------
            // 所有路径
            //----------------------------------------------------
            Console.WriteLine("\r\n\r\n所有路径:");
            for (int i = 0; i < n; i++)
            {
                result = NShortPath.GetPaths(i);
                for (int j = 0; j < result.Count; j++)
                {
                    aPath = result[j];
                    for (int k = 0; k < aPath.Length; k++)
                    {
                        Console.Write("{0}, ", aPath[k]);
                    }

                    Console.WriteLine();
                }
                Console.WriteLine("========================");
            }

            //----------------------------------------------------
            // 最佳路径
            //----------------------------------------------------
            Console.WriteLine("\r\n最佳路径:");
            aPath = NShortPath.GetBestPath();
            for (int k = 0; k < aPath.Length; k++)
            {
                Console.Write("{0}, ", aPath[k]);
            }

            Console.WriteLine();

            //----------------------------------------------------
            // 最多 n 个路径
            //----------------------------------------------------
            Console.WriteLine("\r\n最多 {0} 条路径:", 5);
            result = NShortPath.GetNPaths(5);
            for (int j = 0; j < result.Count; j++)
            {
                aPath = result[j];
                for (int k = 0; k < aPath.Length; k++)
                {
                    Console.Write("{0}, ", aPath[k]);
                }

                Console.WriteLine();
            }
        }