public List <WordPair> GenerateAllNaivePairs(BidirectionalGraph <Word, Edge <Word> > graph) { /*BidirectionalGraph<Word, Edge<Word>> graph = new BidirectionalGraph<Word, Edge<Word>>(false); #region Prepare graph * foreach (var item in g.Vertices) * graph.AddVertex(item); #endregion*/ List <WordPair> pairs = new List <WordPair>(); //List<KeyValuePair<Word, Word>> pairs = new List<KeyValuePair<Word, Word>>(); //List<KeyValuePair<List<KeyValuePair<Word, Word>>, float>> pairsProb = new List<KeyValuePair<List<KeyValuePair<Word, Word>>, float>>(); var uWords = graph.Vertices.Where(t => t.Language == Language.Uyghur); var kWords = graph.Vertices.Where(t => t.Language == Language.Kazak); var cWords = graph.Vertices.Where(t => t.Language == Language.Chinese); foreach (var uWord in uWords) { float connectedUC = (float)graph.InDegree(uWord); foreach (var kWord in kWords) { float connectedCK = (float)graph.InDegree(kWord); WordPair pair = new WordPair(uWord, kWord); pair.Prob = (connectedUC + connectedCK) / (2 * cWords.Count()); pairs.Add(pair); } } /*var output = pairs.Select(t => string.Format("{0},{1}", t.Key, t.Value)); * System.IO.File.WriteAllLines(@"buffer\NaiveCombination.txt", output); * System.Media.SoundPlayer simpleSound = new System.Media.SoundPlayer(@"c:\Windows\Media\Ring03.wav"); * simpleSound.Play(); * Debug.WriteLine("Generate All Naive pairs is done"); */ return(pairs); }
private WordPair createNewGreenEdges(Word uWord, Word kWord, BidirectionalGraph <Word, Edge <Word> > graph) { Cache1.Clear(); List <SPath> paths = new List <SPath>(); foreach (var item in graph.OutEdges(uWord)) { SLink linkCU = new SLink(item.Target, uWord); linkCU.Exists = graph.ContainsEdge(uWord, item.Target); SLink linkCK = new SLink(item.Target, kWord); linkCK.Exists = graph.ContainsEdge(item.Target, kWord); SPath path = new SPath(linkCU, linkCK); paths.Add(path); Cache1.Add(item.Target.ID, true); } foreach (var item in graph.InEdges(kWord)) { if (Cache1.ContainsKey(item.Source.ID)) { continue; } SLink linkCK = new SLink(item.Source, kWord); linkCK.Exists = graph.ContainsEdge(item.Source, kWord); SLink linkCU = new SLink(item.Source, uWord); linkCU.Exists = graph.ContainsEdge(uWord, item.Source); SPath path = new SPath(linkCU, linkCK); paths.Add(path); } //calculate probability float pUK = 0; float pKU = 0; float probUK = 0; float probKU = 0; foreach (var item in paths) { if (!item.LinkCU.Exists || !item.LinkCK.Exists) //containing non-existance link { continue; } float PrCU = 0.0f; float PrKC = 0.0f; float PrCK = 0.0f; float PrUC = 0.0f; foreach (var downEdgeCU in graph.OutEdges(item.LinkCU.WordNonPivot)) //Loop through down-path from nonpivot1 to pivot { PrCU += 1.0f / LinkWeightCache[new SLink(downEdgeCU.Target, downEdgeCU.Source)]; //P(C|U) = P(C&U)/P(U) } foreach (var downEdgeCK in graph.OutEdges(item.LinkCK.WordPivot)) //Loop through down-path from pivot to nonpivot2 { PrKC += 1.0f / LinkWeightCache[new SLink(downEdgeCK.Source, downEdgeCK.Target)]; //P(K|C) = P(K&C)/P(C) } foreach (var upEdgeCK in graph.InEdges(item.LinkCK.WordNonPivot)) //Loop through up-path from nonpivot2 to pivot { PrCK += 1.0f / LinkWeightCache[new SLink(upEdgeCK.Source, upEdgeCK.Target)]; //P(C|K) = P(C&K)/P(K) } foreach (var upEdgeCU in graph.InEdges(item.LinkCU.WordPivot)) //Loop through up-path from pivot to nonpivot1 { PrUC += 1.0f / LinkWeightCache[new SLink(upEdgeCU.Target, upEdgeCU.Source)]; //P(U|C) = P(U&C)/P(C) } PrCU = 1.0f / PrCU; PrKC = 1.0f / PrKC; PrCK = 1.0f / PrCK; PrUC = 1.0f / PrUC; pUK += PrUC * PrCK; pKU += PrKC * PrCU; } probUK = pUK * pKU; WordPair pair = new WordPair(uWord, kWord); pair.Paths = paths; pair.Prob = (float)probUK; //set link weights foreach (var item in pair.Paths) { //CU if (!item.LinkCU.Exists) { float value = 0; if (LinkWeightCache.TryGetValue(item.LinkCU, out value)) { if (pair.Prob > value) { item.LinkCU.Pr = LinkWeightCache[item.LinkCU] = pair.Prob; } else { item.LinkCU.Pr = value; } } else { item.LinkCU.Pr = pair.Prob; LinkWeightCache.Add(item.LinkCU, pair.Prob); } } //CK if (!item.LinkCK.Exists) { float value = 0; if (LinkWeightCache.TryGetValue(item.LinkCK, out value)) { if (pair.Prob > value) { LinkWeightCache[item.LinkCK] = item.LinkCK.Pr = pair.Prob; } else { item.LinkCK.Pr = value; } } else { item.LinkCK.Pr = pair.Prob; LinkWeightCache.Add(item.LinkCK, pair.Prob); } } } return(pair); }
private List <WordPair> GeneratePossiblePairs(BidirectionalGraph <Word, Edge <Word> > g) { LinkCache = new Dictionary <int, SLink>(); LinkWeightCache = new Dictionary <SLink, float>(); graph = new BidirectionalGraph <Word, Edge <Word> >(false); semiCompleteGraph = new BidirectionalGraph <Word, Edge <Word> >(false); completeGraph = new BidirectionalGraph <Word, Edge <Word> >(false); #region Prepare graph foreach (var item in g.Vertices) { graph.AddVertex(item); semiCompleteGraph.AddVertex(item); completeGraph.AddVertex(item); } foreach (var item in g.Edges) { if (item.Source.Language == Console.Language.Chinese && item.Target.Language == Console.Language.Uyghur) { graph.AddEdge(new Edge <Word>(item.Target, item.Source)); semiCompleteGraph.AddEdge(new Edge <Word>(item.Target, item.Source)); completeGraph.AddEdge(new Edge <Word>(item.Target, item.Source)); } else { graph.AddEdge(new Edge <Word>(item.Source, item.Target)); semiCompleteGraph.AddEdge(new Edge <Word>(item.Source, item.Target)); completeGraph.AddEdge(new Edge <Word>(item.Source, item.Target)); } } #endregion List <WordPair> ooPairs = new List <WordPair>(); Dictionary <WordPair, bool> ooPairsDict = new Dictionary <WordPair, bool>(); var uWords = graph.Vertices.Where(t => t.Language == Language.Uyghur); var kWords = graph.Vertices.Where(t => t.Language == Language.Kazak); var cWords = graph.Vertices.Where(t => t.Language == Language.Chinese); //int uWordCount = uWords.Count(); //int kWordCount = kWords.Count(); //int cWordCount = cWords.Count(); int u = 0, k = 0; /*foreach (var uWord in uWords) * { * foreach (var edge1 in graph.OutEdges(uWord)) * { * foreach (var edge2 in graph.OutEdges(edge1.Target)) * { * Word kWord = edge2.Target; * WordPair ooPair = new WordPair(uWord, kWord); * ooPair = createNewEdges(uWord, kWord, graph);//, uWordCount, kWordCount); * if (ooPairsDict.ContainsKey(ooPair)) * continue; * else * { * ooPairsDict.Add(ooPair, true); * ooPairs.Add(ooPair); * } * } * } * * }*/ currentCycle = 1; while (currentCycle <= symmetryCycle) { Debug.WriteLine(currentCycle); if (currentCycle > 1) { //semiCompleteGraph.Clear(); semiCompleteGraph = completeGraph; } ooPairsDict.Clear(); ooPairs.Clear(); foreach (var uWord in uWords) { Word cWord; Word kWord; foreach (var edge1 in graph.OutEdges(uWord)) { cWord = edge1.Target; foreach (var edge2 in graph.OutEdges(cWord)) { kWord = edge2.Target; WordPair pair = new WordPair(uWord, kWord); pair = createNewEdges(uWord, kWord, semiCompleteGraph, completeGraph); if (ooPairsDict.ContainsKey(pair)) { continue; } else { ooPairsDict.Add(pair, true); ooPairs.Add(pair); } } } } currentCycle++; } /*//2nd cycle to add new blue edge and generate more pairs * //Add new pair candidate from the semiCompleteGraph * if (false)//languageOption == 2 && symmetryCycle > 1) * { * ooPairsDict.Clear(); * ooPairs.Clear(); * * foreach (var uWord in uWords) * { * Word cWord; * Word kWord; * * foreach (var edge1 in semiCompleteGraph.OutEdges(uWord)) * { * cWord = edge1.Target; * foreach (var edge2 in semiCompleteGraph.OutEdges(cWord)) * { * kWord = edge2.Target; * * WordPair pair = new WordPair(null, null); * pair = createNewBlueEdges(uWord, kWord, semiCompleteGraph); * if (ooPairsDict.ContainsKey(pair)) * continue; * else * { * ooPairsDict.Add(pair, true); * ooPairs.Add(pair); * } * } * } * } * } * * //3rd cycle to add new green edge and generate more pairs * //Add new pair candidate from the semiCompleteGraph * if (false)//languageOption == 2 && symmetryCycle > 2) * { * ooPairsDict.Clear(); * ooPairs.Clear(); * * foreach (var uWord in uWords) * { * Word cWord; * Word kWord; * * foreach (var edge1 in completeGraph.OutEdges(uWord)) * { * cWord = edge1.Target; * foreach (var edge2 in completeGraph.OutEdges(cWord)) * { * kWord = edge2.Target; * * WordPair pair = new WordPair(null, null); * pair = createNewGreenEdges(uWord, kWord, completeGraph); * if (ooPairsDict.ContainsKey(pair)) * continue; * else * { * ooPairsDict.Add(pair, true); * ooPairs.Add(pair); * } * } * } * } * }*/ float maxWeight = 0; return(ooPairs); }
private WordPair createNewEdges(Word uWord, Word kWord, BidirectionalGraph <Word, Edge <Word> > semiCompleteGraph, BidirectionalGraph <Word, Edge <Word> > completeGraph)//, int uCount, int kCount) { Cache1.Clear(); List <SPath> paths = new List <SPath>(); WordPair pair = new WordPair(uWord, kWord); pair.Polysemy = 0f; foreach (var item in semiCompleteGraph.OutEdges(uWord)) // Using the updated graph with new edges { int inDegreePivot = (int)semiCompleteGraph.InDegree(item.Target); int outDegreePivot = (int)semiCompleteGraph.OutDegree(item.Target); int totalSenseEdge = (Math.Max(inDegreePivot, outDegreePivot) - 1) * (inDegreePivot + outDegreePivot); pair.Polysemy += totalSenseEdge; //Word pivot_sense = item.Target; //for (int sense = 1; sense <= totalSense; sense++) //{ //pivot_sense.Value = pivot_sense.Value + "_sense" + sense; SLink linkCU = new SLink(item.Target, uWord); linkCU.Exists = true;// semiCompleteGraph.ContainsEdge(uWord, pivot_sense); SLink linkCK = new SLink(item.Target, kWord); linkCK.Exists = semiCompleteGraph.ContainsEdge(item.Target, kWord); SPath path = new SPath(linkCU, linkCK); paths.Add(path); Cache1.Add(item.Target.ID, true); //} } foreach (var item in semiCompleteGraph.InEdges(kWord)) // Using the updated graph with new edges { if (Cache1.ContainsKey(item.Source.ID)) { continue; } int inDegreePivot = (int)semiCompleteGraph.InDegree(item.Source); int outDegreePivot = (int)semiCompleteGraph.OutDegree(item.Source); int totalSenseEdge = (Math.Max(inDegreePivot, outDegreePivot) - 1) * (inDegreePivot + outDegreePivot); pair.Polysemy += totalSenseEdge; SLink linkCK = new SLink(item.Source, kWord); linkCK.Exists = true;// semiCompleteGraph.ContainsEdge(item.Source, kWord); SLink linkCU = new SLink(item.Source, uWord); linkCU.Exists = semiCompleteGraph.ContainsEdge(uWord, item.Source); SPath path = new SPath(linkCU, linkCK); paths.Add(path); } //calculate probability //float couverage = Math.Min(uCount, kCount) / (float)Math.Max(uCount, kCount); float pUK = 0; float pKU = 0; float probUK = 0; float probKU = 0; //bool hasPolysemy = false; foreach (var item in paths) { //if (!item.LinkCU.Exists || !item.LinkCK.Exists) //containning non-existance link if (!item.LinkCU.Exists) { if (languageOption == 2) { completeGraph.AddEdge(new Edge <Word>(item.LinkCU.WordNonPivot, item.LinkCU.WordPivot)); } continue; } if (!item.LinkCK.Exists) { if (languageOption == 2) { completeGraph.AddEdge(new Edge <Word>(item.LinkCK.WordPivot, item.LinkCK.WordNonPivot)); } continue; } //if ((float)graph.InDegree(item.LinkCU.WordPivot) > 1 || (float)graph.OutDegree(item.LinkCK.WordPivot) > 1) // hasPolysemy = true; if (currentCycle == 1) { float PrCU = 1.0f / (float)semiCompleteGraph.OutDegree(item.LinkCU.WordNonPivot); //P(C|U) = P(C&U)/P(U) float PrKC = 1.0f / (float)semiCompleteGraph.OutDegree(item.LinkCK.WordPivot); //P(K|C) = P(K&C)/P(C) float PrCK = 1.0f / (float)semiCompleteGraph.InDegree(item.LinkCK.WordNonPivot); //P(C|K) = P(C&K)/P(K) float PrUC = 1.0f / (float)semiCompleteGraph.InDegree(item.LinkCU.WordPivot); //P(U|C) = P(U&C)/P(C) pKU += PrCU * PrKC; pUK += PrCK * PrUC; } else { float PrCU = 0.0f; float PrKC = 0.0f; float PrCK = 0.0f; float PrUC = 0.0f; foreach (var downEdgeCU in semiCompleteGraph.OutEdges(item.LinkCU.WordNonPivot)) //Loop through down-path from nonpivot1 to pivot { PrCU += 1.0f / LinkWeightCache[new SLink(downEdgeCU.Target, downEdgeCU.Source)]; //P(C|U) = P(C&U)/P(U) } foreach (var downEdgeCK in semiCompleteGraph.OutEdges(item.LinkCK.WordPivot)) //Loop through down-path from pivot to nonpivot2 { PrKC += 1.0f / LinkWeightCache[new SLink(downEdgeCK.Source, downEdgeCK.Target)]; //P(K|C) = P(K&C)/P(C) } foreach (var upEdgeCK in semiCompleteGraph.InEdges(item.LinkCK.WordNonPivot)) //Loop through up-path from nonpivot2 to pivot { PrCK += 1.0f / LinkWeightCache[new SLink(upEdgeCK.Target, upEdgeCK.Source)]; //P(C|K) = P(C&K)/P(K) } foreach (var upEdgeCU in semiCompleteGraph.InEdges(item.LinkCU.WordPivot)) //Loop through up-path from pivot to nonpivot1 { PrUC += 1.0f / LinkWeightCache[new SLink(upEdgeCU.Source, upEdgeCU.Target)]; //P(U|C) = P(U&C)/P(C) } PrCU = 1.0f / PrCU; PrKC = 1.0f / PrKC; PrCK = 1.0f / PrCK; PrUC = 1.0f / PrUC; pUK += PrUC * PrCK; pKU += PrKC * PrCU; } } probUK = pUK * pKU; //WordPair pair = new WordPair(uWord, kWord); //pair.HasMissingEdge = hasPolysemy; pair.Paths = paths; pair.Prob = (float)probUK; pair.Polysemy *= (1 - pair.Prob); //set link weights foreach (var item in pair.Paths) { //CU //float polysemyCost = 1 / ((float)graph.InDegree(item.LinkCU.WordPivot) * (float)graph.OutDegree(item.LinkCK.WordPivot)); if (item.LinkCU.Exists) { item.LinkCU.Pr = 1f; //polysemyCost; if (!LinkWeightCache.ContainsKey(item.LinkCU)) { LinkWeightCache.Add(item.LinkCU, item.LinkCU.Pr); } } else { //pair.HasMissingCUEdge = true; float value = 0; if (LinkWeightCache.TryGetValue(item.LinkCU, out value)) { if (pair.Prob > value) { item.LinkCU.Pr = LinkWeightCache[item.LinkCU] = pair.Prob; //polysemyCost * } else { item.LinkCU.Pr = value; } } else { item.LinkCU.Pr = pair.Prob; //polysemyCost * LinkWeightCache.Add(item.LinkCU, pair.Prob); } } //CK if (item.LinkCK.Exists) //false)// { item.LinkCK.Pr = 1f; //polysemyCost if (!LinkWeightCache.ContainsKey(item.LinkCK)) { LinkWeightCache.Add(item.LinkCK, item.LinkCK.Pr); } } else { float value = 0; if (LinkWeightCache.TryGetValue(item.LinkCK, out value)) { if (pair.Prob > value) { LinkWeightCache[item.LinkCK] = item.LinkCK.Pr = pair.Prob; //polysemyCost * } else { item.LinkCK.Pr = value; } } else { item.LinkCK.Pr = pair.Prob; //polysemyCost * LinkWeightCache.Add(item.LinkCK, pair.Prob); } } } return(pair); }