Пример #1
0
        public List <WordPair> GenerateAllNaivePairs(BidirectionalGraph <Word, Edge <Word> > graph)
        {
            /*BidirectionalGraph<Word, Edge<Word>> graph = new BidirectionalGraph<Word, Edge<Word>>(false);
             #region Prepare graph
             * foreach (var item in g.Vertices)
             *  graph.AddVertex(item);
             #endregion*/
            List <WordPair> pairs = new List <WordPair>();
            //List<KeyValuePair<Word, Word>> pairs = new List<KeyValuePair<Word, Word>>();
            //List<KeyValuePair<List<KeyValuePair<Word, Word>>, float>> pairsProb = new List<KeyValuePair<List<KeyValuePair<Word, Word>>, float>>();

            var uWords = graph.Vertices.Where(t => t.Language == Language.Uyghur);
            var kWords = graph.Vertices.Where(t => t.Language == Language.Kazak);
            var cWords = graph.Vertices.Where(t => t.Language == Language.Chinese);

            foreach (var uWord in uWords)
            {
                float connectedUC = (float)graph.InDegree(uWord);
                foreach (var kWord in kWords)
                {
                    float    connectedCK = (float)graph.InDegree(kWord);
                    WordPair pair        = new WordPair(uWord, kWord);
                    pair.Prob = (connectedUC + connectedCK) / (2 * cWords.Count());
                    pairs.Add(pair);
                }
            }

            /*var output = pairs.Select(t => string.Format("{0},{1}", t.Key, t.Value));
             * System.IO.File.WriteAllLines(@"buffer\NaiveCombination.txt", output);
             * System.Media.SoundPlayer simpleSound = new System.Media.SoundPlayer(@"c:\Windows\Media\Ring03.wav");
             * simpleSound.Play();
             * Debug.WriteLine("Generate All Naive pairs is done");
             */
            return(pairs);
        }
Пример #2
0
        private WordPair createNewGreenEdges(Word uWord, Word kWord, BidirectionalGraph <Word, Edge <Word> > graph)
        {
            Cache1.Clear();
            List <SPath> paths = new List <SPath>();

            foreach (var item in graph.OutEdges(uWord))
            {
                SLink linkCU = new SLink(item.Target, uWord);
                linkCU.Exists = graph.ContainsEdge(uWord, item.Target);

                SLink linkCK = new SLink(item.Target, kWord);
                linkCK.Exists = graph.ContainsEdge(item.Target, kWord);

                SPath path = new SPath(linkCU, linkCK);
                paths.Add(path);

                Cache1.Add(item.Target.ID, true);
            }

            foreach (var item in graph.InEdges(kWord))
            {
                if (Cache1.ContainsKey(item.Source.ID))
                {
                    continue;
                }
                SLink linkCK = new SLink(item.Source, kWord);
                linkCK.Exists = graph.ContainsEdge(item.Source, kWord);

                SLink linkCU = new SLink(item.Source, uWord);
                linkCU.Exists = graph.ContainsEdge(uWord, item.Source);

                SPath path = new SPath(linkCU, linkCK);
                paths.Add(path);
            }

            //calculate probability

            float pUK    = 0;
            float pKU    = 0;
            float probUK = 0;
            float probKU = 0;

            foreach (var item in paths)
            {
                if (!item.LinkCU.Exists || !item.LinkCK.Exists) //containing non-existance link
                {
                    continue;
                }
                float PrCU = 0.0f;
                float PrKC = 0.0f;
                float PrCK = 0.0f;
                float PrUC = 0.0f;
                foreach (var downEdgeCU in graph.OutEdges(item.LinkCU.WordNonPivot))                 //Loop through down-path from nonpivot1 to pivot
                {
                    PrCU += 1.0f / LinkWeightCache[new SLink(downEdgeCU.Target, downEdgeCU.Source)]; //P(C|U) = P(C&U)/P(U)
                }
                foreach (var downEdgeCK in graph.OutEdges(item.LinkCK.WordPivot))                    //Loop through down-path from pivot to nonpivot2
                {
                    PrKC += 1.0f / LinkWeightCache[new SLink(downEdgeCK.Source, downEdgeCK.Target)]; //P(K|C) = P(K&C)/P(C)
                }
                foreach (var upEdgeCK in graph.InEdges(item.LinkCK.WordNonPivot))                    //Loop through up-path from nonpivot2 to pivot
                {
                    PrCK += 1.0f / LinkWeightCache[new SLink(upEdgeCK.Source, upEdgeCK.Target)];     //P(C|K) = P(C&K)/P(K)
                }
                foreach (var upEdgeCU in graph.InEdges(item.LinkCU.WordPivot))                       //Loop through up-path from pivot to nonpivot1
                {
                    PrUC += 1.0f / LinkWeightCache[new SLink(upEdgeCU.Target, upEdgeCU.Source)];     //P(U|C) = P(U&C)/P(C)
                }

                PrCU = 1.0f / PrCU;
                PrKC = 1.0f / PrKC;
                PrCK = 1.0f / PrCK;
                PrUC = 1.0f / PrUC;
                pUK += PrUC * PrCK;
                pKU += PrKC * PrCU;
            }
            probUK = pUK * pKU;

            WordPair pair = new WordPair(uWord, kWord);

            pair.Paths = paths;
            pair.Prob  = (float)probUK;

            //set link weights
            foreach (var item in pair.Paths)
            {
                //CU
                if (!item.LinkCU.Exists)
                {
                    float value = 0;
                    if (LinkWeightCache.TryGetValue(item.LinkCU, out value))
                    {
                        if (pair.Prob > value)
                        {
                            item.LinkCU.Pr = LinkWeightCache[item.LinkCU] = pair.Prob;
                        }
                        else
                        {
                            item.LinkCU.Pr = value;
                        }
                    }
                    else
                    {
                        item.LinkCU.Pr = pair.Prob;
                        LinkWeightCache.Add(item.LinkCU, pair.Prob);
                    }
                }

                //CK
                if (!item.LinkCK.Exists)
                {
                    float value = 0;
                    if (LinkWeightCache.TryGetValue(item.LinkCK, out value))
                    {
                        if (pair.Prob > value)
                        {
                            LinkWeightCache[item.LinkCK] = item.LinkCK.Pr = pair.Prob;
                        }
                        else
                        {
                            item.LinkCK.Pr = value;
                        }
                    }
                    else
                    {
                        item.LinkCK.Pr = pair.Prob;
                        LinkWeightCache.Add(item.LinkCK, pair.Prob);
                    }
                }
            }
            return(pair);
        }
Пример #3
0
        private List <WordPair> GeneratePossiblePairs(BidirectionalGraph <Word, Edge <Word> > g)
        {
            LinkCache         = new Dictionary <int, SLink>();
            LinkWeightCache   = new Dictionary <SLink, float>();
            graph             = new BidirectionalGraph <Word, Edge <Word> >(false);
            semiCompleteGraph = new BidirectionalGraph <Word, Edge <Word> >(false);
            completeGraph     = new BidirectionalGraph <Word, Edge <Word> >(false);
            #region Prepare graph
            foreach (var item in g.Vertices)
            {
                graph.AddVertex(item);
                semiCompleteGraph.AddVertex(item);
                completeGraph.AddVertex(item);
            }

            foreach (var item in g.Edges)
            {
                if (item.Source.Language == Console.Language.Chinese && item.Target.Language == Console.Language.Uyghur)
                {
                    graph.AddEdge(new Edge <Word>(item.Target, item.Source));
                    semiCompleteGraph.AddEdge(new Edge <Word>(item.Target, item.Source));
                    completeGraph.AddEdge(new Edge <Word>(item.Target, item.Source));
                }
                else
                {
                    graph.AddEdge(new Edge <Word>(item.Source, item.Target));
                    semiCompleteGraph.AddEdge(new Edge <Word>(item.Source, item.Target));
                    completeGraph.AddEdge(new Edge <Word>(item.Source, item.Target));
                }
            }
            #endregion

            List <WordPair>             ooPairs     = new List <WordPair>();
            Dictionary <WordPair, bool> ooPairsDict = new Dictionary <WordPair, bool>();

            var uWords = graph.Vertices.Where(t => t.Language == Language.Uyghur);
            var kWords = graph.Vertices.Where(t => t.Language == Language.Kazak);
            var cWords = graph.Vertices.Where(t => t.Language == Language.Chinese);

            //int uWordCount = uWords.Count();
            //int kWordCount = kWords.Count();
            //int cWordCount = cWords.Count();
            int u = 0, k = 0;

            /*foreach (var uWord in uWords)
             * {
             *  foreach (var edge1 in graph.OutEdges(uWord))
             *  {
             *      foreach (var  edge2 in graph.OutEdges(edge1.Target))
             *      {
             *          Word kWord = edge2.Target;
             *          WordPair ooPair = new WordPair(uWord, kWord);
             *          ooPair = createNewEdges(uWord, kWord, graph);//, uWordCount, kWordCount);
             *          if (ooPairsDict.ContainsKey(ooPair))
             *              continue;
             *          else
             *          {
             *              ooPairsDict.Add(ooPair, true);
             *              ooPairs.Add(ooPair);
             *          }
             *      }
             *  }
             *
             * }*/
            currentCycle = 1;
            while (currentCycle <= symmetryCycle)
            {
                Debug.WriteLine(currentCycle);
                if (currentCycle > 1)
                {
                    //semiCompleteGraph.Clear();
                    semiCompleteGraph = completeGraph;
                }
                ooPairsDict.Clear();
                ooPairs.Clear();
                foreach (var uWord in uWords)
                {
                    Word cWord;
                    Word kWord;

                    foreach (var edge1 in graph.OutEdges(uWord))
                    {
                        cWord = edge1.Target;
                        foreach (var edge2 in graph.OutEdges(cWord))
                        {
                            kWord = edge2.Target;

                            WordPair pair = new WordPair(uWord, kWord);
                            pair = createNewEdges(uWord, kWord, semiCompleteGraph, completeGraph);
                            if (ooPairsDict.ContainsKey(pair))
                            {
                                continue;
                            }
                            else
                            {
                                ooPairsDict.Add(pair, true);
                                ooPairs.Add(pair);
                            }
                        }
                    }
                }

                currentCycle++;
            }

            /*//2nd cycle to add new blue edge and generate more pairs
             * //Add new pair candidate from the semiCompleteGraph
             * if (false)//languageOption == 2 && symmetryCycle > 1)
             * {
             *  ooPairsDict.Clear();
             *  ooPairs.Clear();
             *
             *  foreach (var uWord in uWords)
             *  {
             *      Word cWord;
             *      Word kWord;
             *
             *      foreach (var edge1 in semiCompleteGraph.OutEdges(uWord))
             *      {
             *          cWord = edge1.Target;
             *          foreach (var edge2 in semiCompleteGraph.OutEdges(cWord))
             *          {
             *              kWord = edge2.Target;
             *
             *              WordPair pair = new WordPair(null, null);
             *              pair = createNewBlueEdges(uWord, kWord, semiCompleteGraph);
             *              if (ooPairsDict.ContainsKey(pair))
             *                  continue;
             *              else
             *              {
             *                  ooPairsDict.Add(pair, true);
             *                  ooPairs.Add(pair);
             *              }
             *          }
             *      }
             *  }
             * }
             *
             * //3rd cycle to add new green edge and generate more pairs
             * //Add new pair candidate from the semiCompleteGraph
             * if (false)//languageOption == 2 && symmetryCycle > 2)
             * {
             *  ooPairsDict.Clear();
             *  ooPairs.Clear();
             *
             *  foreach (var uWord in uWords)
             *  {
             *      Word cWord;
             *      Word kWord;
             *
             *      foreach (var edge1 in completeGraph.OutEdges(uWord))
             *      {
             *          cWord = edge1.Target;
             *          foreach (var edge2 in completeGraph.OutEdges(cWord))
             *          {
             *              kWord = edge2.Target;
             *
             *              WordPair pair = new WordPair(null, null);
             *              pair = createNewGreenEdges(uWord, kWord, completeGraph);
             *              if (ooPairsDict.ContainsKey(pair))
             *                  continue;
             *              else
             *              {
             *                  ooPairsDict.Add(pair, true);
             *                  ooPairs.Add(pair);
             *              }
             *          }
             *      }
             *  }
             * }*/
            float maxWeight = 0;
            return(ooPairs);
        }
Пример #4
0
        private WordPair createNewEdges(Word uWord, Word kWord, BidirectionalGraph <Word, Edge <Word> > semiCompleteGraph, BidirectionalGraph <Word, Edge <Word> > completeGraph)//, int uCount, int kCount)
        {
            Cache1.Clear();
            List <SPath> paths = new List <SPath>();
            WordPair     pair  = new WordPair(uWord, kWord);

            pair.Polysemy = 0f;
            foreach (var item in semiCompleteGraph.OutEdges(uWord)) // Using the updated graph with new edges
            {
                int inDegreePivot  = (int)semiCompleteGraph.InDegree(item.Target);
                int outDegreePivot = (int)semiCompleteGraph.OutDegree(item.Target);
                int totalSenseEdge = (Math.Max(inDegreePivot, outDegreePivot) - 1) * (inDegreePivot + outDegreePivot);
                pair.Polysemy += totalSenseEdge;
                //Word pivot_sense = item.Target;
                //for (int sense = 1; sense <= totalSense; sense++)
                //{
                //pivot_sense.Value = pivot_sense.Value + "_sense" + sense;

                SLink linkCU = new SLink(item.Target, uWord);
                linkCU.Exists = true;// semiCompleteGraph.ContainsEdge(uWord, pivot_sense);

                SLink linkCK = new SLink(item.Target, kWord);
                linkCK.Exists = semiCompleteGraph.ContainsEdge(item.Target, kWord);

                SPath path = new SPath(linkCU, linkCK);
                paths.Add(path);

                Cache1.Add(item.Target.ID, true);
                //}
            }

            foreach (var item in semiCompleteGraph.InEdges(kWord)) // Using the updated graph with new edges
            {
                if (Cache1.ContainsKey(item.Source.ID))
                {
                    continue;
                }
                int inDegreePivot  = (int)semiCompleteGraph.InDegree(item.Source);
                int outDegreePivot = (int)semiCompleteGraph.OutDegree(item.Source);
                int totalSenseEdge = (Math.Max(inDegreePivot, outDegreePivot) - 1) * (inDegreePivot + outDegreePivot);
                pair.Polysemy += totalSenseEdge;

                SLink linkCK = new SLink(item.Source, kWord);
                linkCK.Exists = true;// semiCompleteGraph.ContainsEdge(item.Source, kWord);

                SLink linkCU = new SLink(item.Source, uWord);
                linkCU.Exists = semiCompleteGraph.ContainsEdge(uWord, item.Source);

                SPath path = new SPath(linkCU, linkCK);
                paths.Add(path);
            }

            //calculate probability

            //float couverage = Math.Min(uCount, kCount) / (float)Math.Max(uCount, kCount);
            float pUK    = 0;
            float pKU    = 0;
            float probUK = 0;
            float probKU = 0;

            //bool hasPolysemy = false;
            foreach (var item in paths)
            {
                //if (!item.LinkCU.Exists || !item.LinkCK.Exists) //containning non-existance link
                if (!item.LinkCU.Exists)
                {
                    if (languageOption == 2)
                    {
                        completeGraph.AddEdge(new Edge <Word>(item.LinkCU.WordNonPivot, item.LinkCU.WordPivot));
                    }
                    continue;
                }
                if (!item.LinkCK.Exists)
                {
                    if (languageOption == 2)
                    {
                        completeGraph.AddEdge(new Edge <Word>(item.LinkCK.WordPivot, item.LinkCK.WordNonPivot));
                    }
                    continue;
                }
                //if ((float)graph.InDegree(item.LinkCU.WordPivot) > 1 || (float)graph.OutDegree(item.LinkCK.WordPivot) > 1)
                //    hasPolysemy = true;
                if (currentCycle == 1)
                {
                    float PrCU = 1.0f / (float)semiCompleteGraph.OutDegree(item.LinkCU.WordNonPivot); //P(C|U) = P(C&U)/P(U)
                    float PrKC = 1.0f / (float)semiCompleteGraph.OutDegree(item.LinkCK.WordPivot);    //P(K|C) = P(K&C)/P(C)
                    float PrCK = 1.0f / (float)semiCompleteGraph.InDegree(item.LinkCK.WordNonPivot);  //P(C|K) = P(C&K)/P(K)
                    float PrUC = 1.0f / (float)semiCompleteGraph.InDegree(item.LinkCU.WordPivot);     //P(U|C) = P(U&C)/P(C)

                    pKU += PrCU * PrKC;
                    pUK += PrCK * PrUC;
                }
                else
                {
                    float PrCU = 0.0f;
                    float PrKC = 0.0f;
                    float PrCK = 0.0f;
                    float PrUC = 0.0f;
                    foreach (var downEdgeCU in semiCompleteGraph.OutEdges(item.LinkCU.WordNonPivot))     //Loop through down-path from nonpivot1 to pivot
                    {
                        PrCU += 1.0f / LinkWeightCache[new SLink(downEdgeCU.Target, downEdgeCU.Source)]; //P(C|U) = P(C&U)/P(U)
                    }
                    foreach (var downEdgeCK in semiCompleteGraph.OutEdges(item.LinkCK.WordPivot))        //Loop through down-path from pivot to nonpivot2
                    {
                        PrKC += 1.0f / LinkWeightCache[new SLink(downEdgeCK.Source, downEdgeCK.Target)]; //P(K|C) = P(K&C)/P(C)
                    }
                    foreach (var upEdgeCK in semiCompleteGraph.InEdges(item.LinkCK.WordNonPivot))        //Loop through up-path from nonpivot2 to pivot
                    {
                        PrCK += 1.0f / LinkWeightCache[new SLink(upEdgeCK.Target, upEdgeCK.Source)];     //P(C|K) = P(C&K)/P(K)
                    }
                    foreach (var upEdgeCU in semiCompleteGraph.InEdges(item.LinkCU.WordPivot))           //Loop through up-path from pivot to nonpivot1
                    {
                        PrUC += 1.0f / LinkWeightCache[new SLink(upEdgeCU.Source, upEdgeCU.Target)];     //P(U|C) = P(U&C)/P(C)
                    }

                    PrCU = 1.0f / PrCU;
                    PrKC = 1.0f / PrKC;
                    PrCK = 1.0f / PrCK;
                    PrUC = 1.0f / PrUC;
                    pUK += PrUC * PrCK;
                    pKU += PrKC * PrCU;
                }
            }
            probUK = pUK * pKU;

            //WordPair pair = new WordPair(uWord, kWord);
            //pair.HasMissingEdge = hasPolysemy;
            pair.Paths     = paths;
            pair.Prob      = (float)probUK;
            pair.Polysemy *= (1 - pair.Prob);

            //set link weights
            foreach (var item in pair.Paths)
            {
                //CU
                //float polysemyCost = 1 / ((float)graph.InDegree(item.LinkCU.WordPivot) * (float)graph.OutDegree(item.LinkCK.WordPivot));
                if (item.LinkCU.Exists)
                {
                    item.LinkCU.Pr = 1f; //polysemyCost;
                    if (!LinkWeightCache.ContainsKey(item.LinkCU))
                    {
                        LinkWeightCache.Add(item.LinkCU, item.LinkCU.Pr);
                    }
                }
                else
                {
                    //pair.HasMissingCUEdge = true;
                    float value = 0;
                    if (LinkWeightCache.TryGetValue(item.LinkCU, out value))
                    {
                        if (pair.Prob > value)
                        {
                            item.LinkCU.Pr = LinkWeightCache[item.LinkCU] = pair.Prob; //polysemyCost *
                        }
                        else
                        {
                            item.LinkCU.Pr = value;
                        }
                    }
                    else
                    {
                        item.LinkCU.Pr = pair.Prob; //polysemyCost *
                        LinkWeightCache.Add(item.LinkCU, pair.Prob);
                    }
                }

                //CK
                if (item.LinkCK.Exists)  //false)//
                {
                    item.LinkCK.Pr = 1f; //polysemyCost
                    if (!LinkWeightCache.ContainsKey(item.LinkCK))
                    {
                        LinkWeightCache.Add(item.LinkCK, item.LinkCK.Pr);
                    }
                }
                else
                {
                    float value = 0;
                    if (LinkWeightCache.TryGetValue(item.LinkCK, out value))
                    {
                        if (pair.Prob > value)
                        {
                            LinkWeightCache[item.LinkCK] = item.LinkCK.Pr = pair.Prob; //polysemyCost *
                        }
                        else
                        {
                            item.LinkCK.Pr = value;
                        }
                    }
                    else
                    {
                        item.LinkCK.Pr = pair.Prob; //polysemyCost *
                        LinkWeightCache.Add(item.LinkCK, pair.Prob);
                    }
                }
            }
            return(pair);
        }