/// <summary> /// The Viterbi decoding algorithm function with input parameters for both forward & backward method. /// </summary> /// <param name="tagger"></param> /// <param name="testWords"></param> /// <param name="modelForward"></param> /// <param name="modelBackward"></param> /// <param name="mode"></param> public void ViterbiDecoding(PartOfSpeechModel tagger, List <Tokenizer.WordTag> testWords, string modelForward = "bigram", string modelBackward = "bigram", string mode = "forward", int beam = 0) { this.UnknownWords = new HashSet <string>(); this.ForwardHistory = new List <ViterbiNode>(); this.BackwardHistory = new List <ViterbiNode>(); this.PredictedTags = new List <string>(); this.ViterbiGraph = new List <List <ViterbiNode> >(); if (mode.Equals("forward") || mode.Equals("f+b")) { this.ForwardAlgorithm(tagger, testWords, modelForward, beam); } if (mode.Equals("backward") || mode.Equals("f+b")) { this.BackwardAlgorithm(tagger, testWords, modelBackward, mode, beam); } if (mode.Equals("f+b")) { this.BiDirectionalModelTrace(); } TextPreprocessing.Cleaning.EliminateAllEndOfSentenceTags(ref testWords); }
/// <summary> /// Forward method for the Viterbi decoding algorithm. /// </summary> /// <param name="tagger"></param> /// <param name="testWords"></param> /// <param name="model"></param> private void ForwardAlgorithm(PartOfSpeechModel tagger, List <Tokenizer.WordTag> testWords, string model, int beam) { // left to right encoding - forward approach bool startPoint = true; int triPoz = -1; for (int i = 0; i < testWords.Count; i++) // starting from left (0 index) { triPoz++; if (testWords[i].tag == ".") // we can verify word instead of tag here { Backtrace(method: "forward"); // decompress method, going from right to left using prev nodes, applied only when '.' is met startPoint = true; continue; } PartOfSpeechModel.EmissionProbabilisticModel foundWord = tagger.WordCapitalizedTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word); if (foundWord == null) { foundWord = tagger.WordTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word.ToLower()); } if (startPoint) // first node (start) { triPoz = 0; List <ViterbiNode> vList = new List <ViterbiNode>(); if (foundWord != null) { if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey(".")) // case where the only tag is '.' { foundWord = null; } } if (foundWord == null) { UnknownWords.Add(testWords[i].word); // we take the best transition case where first item is "." // case 2: all the transitions var orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList(); double product = 0.0d; string nodeTag = "NULL"; foreach (var item in orderedTransitions) { if (item.Key.Item1.Equals(".") && item.Key.Item2 != ".") { double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item2)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2); double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item2); product = biTrans * unknownProcent; nodeTag = item.Key.Item2; ViterbiNode node = new ViterbiNode(product, nodeTag); vList.Add(node); } } } else { foreach (var wt in foundWord.TagFreq) { if (wt.Key == ".") { continue; } double emissionFreqValue = wt.Value; // eg. Jane -> 0.1111 (NN) Tuple <string, string> tuple = new Tuple <string, string>(".", wt.Key); double biTransition = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25 double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(wt.Key)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2); double product = (double)emissionFreqValue * biTrans; ViterbiNode node = new ViterbiNode(product, wt.Key); vList.Add(node); } } this.ViterbiGraph.Add(vList); startPoint = false; } else { List <ViterbiNode> vList = new List <ViterbiNode>(); if (foundWord != null) { if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey(".")) { foundWord = null; } } if (foundWord == null) { UnknownWords.Add(testWords[i].word); for (int j = 0; j < this.ViterbiGraph[this.ViterbiGraph.Count - 1].Count; j++) { ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL"); ViterbiNode elem = this.ViterbiGraph[this.ViterbiGraph.Count - 1][j]; // we take the best transition case where first item is "." var orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList(); if (model == "trigram" && triPoz >= 2) { if (elem.PrevNode == null) { continue; } ViterbiNode elem2 = elem.PrevNode; var orderedTransitionsTri = tagger.TrigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList(); double product = 0.0d; string nodeTag = "NULL_TRI"; foreach (var item in orderedTransitionsTri) { if (item.Key.Item1.Equals(elem2.CurrentTag) && item.Key.Item2.Equals(elem.CurrentTag) && item.Key.Item3 != ".") { Tuple <string, string> biTuple = new Tuple <string, string>(elem.CurrentTag, item.Key.Item3); double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value; double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item3)).Value; double triTransition = (double)(tagger.TgramLambda3 * item.Value) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal); double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item3); product = (double)elem.value * triTransition * unknownProcent; nodeTag = item.Key.Item3; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, nodeTag, PrevNode: elem); } } } } else { double product = 0.0d; string nodeTag = "NULL_BI"; foreach (var item in orderedTransitions) { if (item.Key.Item1.Equals(elem.CurrentTag) && item.Key.Item2 != ".") { double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item2)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2); double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item2); product = (double)elem.value * biTrans * unknownProcent; nodeTag = item.Key.Item2; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, nodeTag, PrevNode: elem); } } } } vList.Add(vGoodNode); } } else { foreach (var tf in foundWord.TagFreq) { if (tf.Key == ".") { continue; } ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL"); foreach (ViterbiNode vn in this.ViterbiGraph[this.ViterbiGraph.Count - 1]) { if (model == "trigram" && triPoz >= 2) { if (vn.PrevNode == null) { continue; } Tuple <string, string, string> triTuple = new Tuple <string, string, string>(vn.PrevNode.CurrentTag, vn.CurrentTag, tf.Key); double triVal = tagger.TrigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(triTuple)).Value; Tuple <string, string> biTuple = new Tuple <string, string>(vn.CurrentTag, tf.Key); double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value; double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value; double triTransition = (double)(tagger.TgramLambda3 * triVal) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal); double product = (double)vn.value * triTransition * tf.Value; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, tf.Key, PrevNode: vn); } } else { Tuple <string, string> tuple = new Tuple <string, string>(vn.CurrentTag, tf.Key); double biTransition = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25 double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2); double product = (double)vn.value * biTrans * tf.Value; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, tf.Key, PrevNode: vn); } } } vList.Add(vGoodNode); } } this.ViterbiGraph.Add(vList); } this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].OrderByDescending(x => x.value).ToList(); if (beam != 0) { this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].Take(beam).ToList(); } } }
/// <summary> /// Backward method for the Viterbi decoding algorithm. /// </summary> /// <param name="tagger"></param> /// <param name="testWords"></param> /// <param name="model"></param> /// <param name="mode"></param> private void BackwardAlgorithm(PartOfSpeechModel tagger, List <Tokenizer.WordTag> testWords, string model, string mode, int beam) { // right to left encoding - backward approach bool startPoint = true; int triPoz = -1; for (int i = testWords.Count - 2; i >= -1; i--) // count - 2 is to start from the first word != "." { triPoz++; if (i == -1) // we first check to see if we got to index -1 { Backtrace(method: "backward"); startPoint = true; continue; } if (testWords[i].tag == ".") { Backtrace(method: "backward"); startPoint = true; continue; } PartOfSpeechModel.EmissionProbabilisticModel foundWord = tagger.WordCapitalizedTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word); if (foundWord == null) { foundWord = tagger.WordTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word.ToLower()); } if (startPoint) { triPoz = 0; List <ViterbiNode> vList = new List <ViterbiNode>(); if (foundWord != null) { if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey(".")) { foundWord = null; } } if (foundWord == null) { UnknownWords.Add(testWords[i].word); // we take the best transition case where first item is "." var orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList(); double product = 0.0d; string nodeTag = "NULL"; foreach (var item in orderedTransitions) { if (item.Key.Item2.Equals(".") && item.Key.Item1 != ".") { double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item1)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2); double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item1); product = biTrans * unknownProcent; nodeTag = item.Key.Item1; ViterbiNode node = new ViterbiNode(product, nodeTag); vList.Add(node); } } } else { foreach (var wt in foundWord.TagFreq) { if (wt.Key == ".") { continue; } double emissionFreqValue = wt.Value; // eg. Jane -> 0.1111 (NN) Tuple <string, string> tuple = new Tuple <string, string>(wt.Key, "."); double biTransition = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25 double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(wt.Key)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2); double product = (double)emissionFreqValue * biTrans; ViterbiNode node = new ViterbiNode(product, wt.Key); vList.Add(node); } } this.ViterbiGraph.Add(vList); startPoint = false; } else { List <ViterbiNode> vList = new List <ViterbiNode>(); if (foundWord != null) { if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey(".")) { foundWord = null; } } if (foundWord == null) { UnknownWords.Add(testWords[i].word); for (int j = 0; j < this.ViterbiGraph[this.ViterbiGraph.Count - 1].Count; j++) { ViterbiNode elem = this.ViterbiGraph[this.ViterbiGraph.Count - 1][j]; ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL"); // we take the best transition case where first item is "." var orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList(); if (model == "trigram" && triPoz >= 2) { if (elem.NextNode == null) { continue; } ViterbiNode elem2 = elem.NextNode; var orderedTransitionsTri = tagger.TrigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList(); double product = 0.0d; string nodeTag = "NULL_TRI"; foreach (var item in orderedTransitionsTri) { if (item.Key.Item3.Equals(elem2.CurrentTag) && item.Key.Item2.Equals(elem.CurrentTag) && item.Key.Item1 != ".") { Tuple <string, string> biTuple = new Tuple <string, string>(item.Key.Item1, elem.CurrentTag); double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value; double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item1)).Value; double triTransition = (double)(tagger.TgramLambda3 * item.Value) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal); double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item1); product = (double)elem.value * triTransition * unknownProcent; nodeTag = item.Key.Item1; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, nodeTag, NextNode: elem); } } } } else { double product = 0.0d; string nodeTag = "NULL_BI"; foreach (var item in orderedTransitions) { if (item.Key.Item2.Equals(elem.CurrentTag) && item.Key.Item1 != ".") { double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item1)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2); double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item1); product = (double)elem.value * biTrans * unknownProcent; nodeTag = item.Key.Item1; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, nodeTag, NextNode: elem); } } } } vList.Add(vGoodNode); } } else { foreach (var tf in foundWord.TagFreq) { if (tf.Key == ".") { continue; } ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL"); foreach (ViterbiNode vn in this.ViterbiGraph[this.ViterbiGraph.Count - 1]) { if (model == "trigram" && triPoz >= 2) { if (vn.NextNode == null) { continue; } Tuple <string, string, string> triTuple = new Tuple <string, string, string>(tf.Key, vn.CurrentTag, vn.NextNode.CurrentTag); double triVal = tagger.TrigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(triTuple)).Value; Tuple <string, string> biTuple = new Tuple <string, string>(tf.Key, vn.CurrentTag); double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value; double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value; double triTransition = (double)(tagger.TgramLambda3 * triVal) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal); double product = (double)vn.value * triTransition * tf.Value; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, tf.Key, NextNode: vn); } } else { Tuple <string, string> tuple = new Tuple <string, string>(tf.Key, vn.CurrentTag); double biTransition = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25 double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value; double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2); double product = (double)vn.value * biTrans * tf.Value; if (product >= vGoodNode.value) { vGoodNode = new ViterbiNode(product, tf.Key, NextNode: vn); } } } vList.Add(vGoodNode); } } this.ViterbiGraph.Add(vList); } this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].OrderByDescending(x => x.value).ToList(); if (beam != 0) { this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].Take(beam).ToList(); } } if (mode == "backward") { this.PredictedTags = new List <string>(); List <ViterbiNode> historyCopy = new List <ViterbiNode>(BackwardHistory); for (int i = 0; i < historyCopy.Count; i++) { List <string> tagsViterbi = new List <string>(); while (true) { if (historyCopy[i].CurrentTag != ".") { tagsViterbi.Add(historyCopy[i].CurrentTag); } if (historyCopy[i].NextNode == null) { break; } historyCopy[i] = historyCopy[i].NextNode; } this.PredictedTags.AddRange(tagsViterbi); } } }