Example #1
0
        /// <summary>
        /// The Viterbi decoding algorithm function with input parameters for both forward & backward method.
        /// </summary>
        /// <param name="tagger"></param>
        /// <param name="testWords"></param>
        /// <param name="modelForward"></param>
        /// <param name="modelBackward"></param>
        /// <param name="mode"></param>
        public void ViterbiDecoding(PartOfSpeechModel tagger, List <Tokenizer.WordTag> testWords, string modelForward = "bigram", string modelBackward = "bigram", string mode = "forward", int beam = 0)
        {
            this.UnknownWords = new HashSet <string>();

            this.ForwardHistory  = new List <ViterbiNode>();
            this.BackwardHistory = new List <ViterbiNode>();

            this.PredictedTags = new List <string>();
            this.ViterbiGraph  = new List <List <ViterbiNode> >();

            if (mode.Equals("forward") || mode.Equals("f+b"))
            {
                this.ForwardAlgorithm(tagger, testWords, modelForward, beam);
            }
            if (mode.Equals("backward") || mode.Equals("f+b"))
            {
                this.BackwardAlgorithm(tagger, testWords, modelBackward, mode, beam);
            }

            if (mode.Equals("f+b"))
            {
                this.BiDirectionalModelTrace();
            }

            TextPreprocessing.Cleaning.EliminateAllEndOfSentenceTags(ref testWords);
        }
Example #2
0
        /// <summary>
        /// Forward method for the Viterbi decoding algorithm.
        /// </summary>
        /// <param name="tagger"></param>
        /// <param name="testWords"></param>
        /// <param name="model"></param>
        private void ForwardAlgorithm(PartOfSpeechModel tagger, List <Tokenizer.WordTag> testWords, string model, int beam)
        {
            // left to right encoding - forward approach
            bool startPoint = true;
            int  triPoz     = -1;

            for (int i = 0; i < testWords.Count; i++) // starting from left (0 index)
            {
                triPoz++;
                if (testWords[i].tag == ".")      // we can verify word instead of tag here
                {
                    Backtrace(method: "forward"); // decompress method, going from right to left using prev nodes, applied only when '.' is met
                    startPoint = true;
                    continue;
                }

                PartOfSpeechModel.EmissionProbabilisticModel foundWord = tagger.WordCapitalizedTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word);
                if (foundWord == null)
                {
                    foundWord = tagger.WordTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word.ToLower());
                }

                if (startPoint) // first node (start)
                {
                    triPoz = 0;
                    List <ViterbiNode> vList = new List <ViterbiNode>();

                    if (foundWord != null)
                    {
                        if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey(".")) // case where the only tag is '.'
                        {
                            foundWord = null;
                        }
                    }

                    if (foundWord == null)
                    {
                        UnknownWords.Add(testWords[i].word);
                        // we take the best transition case where first item is "."
                        // case 2: all the transitions
                        var    orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList();
                        double product            = 0.0d;
                        string nodeTag            = "NULL";

                        foreach (var item in orderedTransitions)
                        {
                            if (item.Key.Item1.Equals(".") && item.Key.Item2 != ".")
                            {
                                double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item2)).Value;

                                double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2);

                                double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item2);

                                product = biTrans * unknownProcent;
                                nodeTag = item.Key.Item2;
                                ViterbiNode node = new ViterbiNode(product, nodeTag);
                                vList.Add(node);
                            }
                        }
                    }
                    else
                    {
                        foreach (var wt in foundWord.TagFreq)
                        {
                            if (wt.Key == ".")
                            {
                                continue;
                            }
                            double emissionFreqValue     = wt.Value;                                                                            // eg. Jane -> 0.1111 (NN)
                            Tuple <string, string> tuple = new Tuple <string, string>(".", wt.Key);
                            double biTransition          = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25

                            double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(wt.Key)).Value;

                            double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2);

                            double      product = (double)emissionFreqValue * biTrans;
                            ViterbiNode node    = new ViterbiNode(product, wt.Key);
                            vList.Add(node);
                        }
                    }
                    this.ViterbiGraph.Add(vList);
                    startPoint = false;
                }
                else
                {
                    List <ViterbiNode> vList = new List <ViterbiNode>();

                    if (foundWord != null)
                    {
                        if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey("."))
                        {
                            foundWord = null;
                        }
                    }

                    if (foundWord == null)
                    {
                        UnknownWords.Add(testWords[i].word);
                        for (int j = 0; j < this.ViterbiGraph[this.ViterbiGraph.Count - 1].Count; j++)
                        {
                            ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL");
                            ViterbiNode elem      = this.ViterbiGraph[this.ViterbiGraph.Count - 1][j];
                            // we take the best transition case where first item is "."

                            var orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList();

                            if (model == "trigram" && triPoz >= 2)
                            {
                                if (elem.PrevNode == null)
                                {
                                    continue;
                                }
                                ViterbiNode elem2 = elem.PrevNode;
                                var         orderedTransitionsTri = tagger.TrigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList();

                                double product = 0.0d;
                                string nodeTag = "NULL_TRI";

                                foreach (var item in orderedTransitionsTri)
                                {
                                    if (item.Key.Item1.Equals(elem2.CurrentTag) && item.Key.Item2.Equals(elem.CurrentTag) && item.Key.Item3 != ".")
                                    {
                                        Tuple <string, string> biTuple = new Tuple <string, string>(elem.CurrentTag, item.Key.Item3);
                                        double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value;

                                        double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item3)).Value;

                                        double triTransition = (double)(tagger.TgramLambda3 * item.Value) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal);

                                        double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item3);

                                        product = (double)elem.value * triTransition * unknownProcent;
                                        nodeTag = item.Key.Item3;
                                        if (product >= vGoodNode.value)
                                        {
                                            vGoodNode = new ViterbiNode(product, nodeTag, PrevNode: elem);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                double product = 0.0d;
                                string nodeTag = "NULL_BI";

                                foreach (var item in orderedTransitions)
                                {
                                    if (item.Key.Item1.Equals(elem.CurrentTag) && item.Key.Item2 != ".")
                                    {
                                        double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item2)).Value;

                                        double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2);

                                        double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item2);

                                        product = (double)elem.value * biTrans * unknownProcent;
                                        nodeTag = item.Key.Item2;
                                        if (product >= vGoodNode.value)
                                        {
                                            vGoodNode = new ViterbiNode(product, nodeTag, PrevNode: elem);
                                        }
                                    }
                                }
                            }
                            vList.Add(vGoodNode);
                        }
                    }
                    else
                    {
                        foreach (var tf in foundWord.TagFreq)
                        {
                            if (tf.Key == ".")
                            {
                                continue;
                            }
                            ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL");
                            foreach (ViterbiNode vn in this.ViterbiGraph[this.ViterbiGraph.Count - 1])
                            {
                                if (model == "trigram" && triPoz >= 2)
                                {
                                    if (vn.PrevNode == null)
                                    {
                                        continue;
                                    }
                                    Tuple <string, string, string> triTuple = new Tuple <string, string, string>(vn.PrevNode.CurrentTag, vn.CurrentTag, tf.Key);
                                    double triVal = tagger.TrigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(triTuple)).Value;

                                    Tuple <string, string> biTuple = new Tuple <string, string>(vn.CurrentTag, tf.Key);
                                    double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value;

                                    double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value;

                                    double triTransition = (double)(tagger.TgramLambda3 * triVal) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal);

                                    double product = (double)vn.value * triTransition * tf.Value;
                                    if (product >= vGoodNode.value)
                                    {
                                        vGoodNode = new ViterbiNode(product, tf.Key, PrevNode: vn);
                                    }
                                }
                                else
                                {
                                    Tuple <string, string> tuple = new Tuple <string, string>(vn.CurrentTag, tf.Key);
                                    double biTransition          = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25

                                    double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value;

                                    double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2);

                                    double product = (double)vn.value * biTrans * tf.Value;
                                    if (product >= vGoodNode.value)
                                    {
                                        vGoodNode = new ViterbiNode(product, tf.Key, PrevNode: vn);
                                    }
                                }
                            }
                            vList.Add(vGoodNode);
                        }
                    }
                    this.ViterbiGraph.Add(vList);
                }
                this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].OrderByDescending(x => x.value).ToList();
                if (beam != 0)
                {
                    this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].Take(beam).ToList();
                }
            }
        }
Example #3
0
        /// <summary>
        /// Backward method for the Viterbi decoding algorithm.
        /// </summary>
        /// <param name="tagger"></param>
        /// <param name="testWords"></param>
        /// <param name="model"></param>
        /// <param name="mode"></param>
        private void BackwardAlgorithm(PartOfSpeechModel tagger, List <Tokenizer.WordTag> testWords, string model, string mode, int beam)
        {
            // right to left encoding - backward approach
            bool startPoint = true;
            int  triPoz     = -1;

            for (int i = testWords.Count - 2; i >= -1; i--) // count - 2 is to start from the first word != "."
            {
                triPoz++;
                if (i == -1) // we first check to see if we got to index -1
                {
                    Backtrace(method: "backward");
                    startPoint = true;
                    continue;
                }
                if (testWords[i].tag == ".")
                {
                    Backtrace(method: "backward");
                    startPoint = true;
                    continue;
                }

                PartOfSpeechModel.EmissionProbabilisticModel foundWord = tagger.WordCapitalizedTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word);
                if (foundWord == null)
                {
                    foundWord = tagger.WordTagsEmissionProbabilities.Find(x => x.Word == testWords[i].word.ToLower());
                }

                if (startPoint)
                {
                    triPoz = 0;
                    List <ViterbiNode> vList = new List <ViterbiNode>();

                    if (foundWord != null)
                    {
                        if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey("."))
                        {
                            foundWord = null;
                        }
                    }

                    if (foundWord == null)
                    {
                        UnknownWords.Add(testWords[i].word);
                        // we take the best transition case where first item is "."
                        var    orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList();
                        double product            = 0.0d;
                        string nodeTag            = "NULL";

                        foreach (var item in orderedTransitions)
                        {
                            if (item.Key.Item2.Equals(".") && item.Key.Item1 != ".")
                            {
                                double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item1)).Value;

                                double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2);

                                double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item1);

                                product = biTrans * unknownProcent;
                                nodeTag = item.Key.Item1;
                                ViterbiNode node = new ViterbiNode(product, nodeTag);
                                vList.Add(node);
                            }
                        }
                    }
                    else
                    {
                        foreach (var wt in foundWord.TagFreq)
                        {
                            if (wt.Key == ".")
                            {
                                continue;
                            }
                            double emissionFreqValue     = wt.Value;                                                                            // eg. Jane -> 0.1111 (NN)
                            Tuple <string, string> tuple = new Tuple <string, string>(wt.Key, ".");
                            double biTransition          = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25

                            double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(wt.Key)).Value;

                            double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2);

                            double      product = (double)emissionFreqValue * biTrans;
                            ViterbiNode node    = new ViterbiNode(product, wt.Key);
                            vList.Add(node);
                        }
                    }
                    this.ViterbiGraph.Add(vList);
                    startPoint = false;
                }
                else
                {
                    List <ViterbiNode> vList = new List <ViterbiNode>();

                    if (foundWord != null)
                    {
                        if (foundWord.TagFreq.Count == 1 && foundWord.TagFreq.ContainsKey("."))
                        {
                            foundWord = null;
                        }
                    }

                    if (foundWord == null)
                    {
                        UnknownWords.Add(testWords[i].word);
                        for (int j = 0; j < this.ViterbiGraph[this.ViterbiGraph.Count - 1].Count; j++)
                        {
                            ViterbiNode elem      = this.ViterbiGraph[this.ViterbiGraph.Count - 1][j];
                            ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL");
                            // we take the best transition case where first item is "."
                            var orderedTransitions = tagger.BigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList();

                            if (model == "trigram" && triPoz >= 2)
                            {
                                if (elem.NextNode == null)
                                {
                                    continue;
                                }
                                ViterbiNode elem2 = elem.NextNode;
                                var         orderedTransitionsTri = tagger.TrigramTransitionProbabilities.OrderByDescending(x => x.Value).ToList();

                                double product = 0.0d;
                                string nodeTag = "NULL_TRI";

                                foreach (var item in orderedTransitionsTri)
                                {
                                    if (item.Key.Item3.Equals(elem2.CurrentTag) && item.Key.Item2.Equals(elem.CurrentTag) && item.Key.Item1 != ".")
                                    {
                                        Tuple <string, string> biTuple = new Tuple <string, string>(item.Key.Item1, elem.CurrentTag);
                                        double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value;

                                        double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item1)).Value;

                                        double triTransition = (double)(tagger.TgramLambda3 * item.Value) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal);

                                        double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item1);

                                        product = (double)elem.value * triTransition * unknownProcent;
                                        nodeTag = item.Key.Item1;
                                        if (product >= vGoodNode.value)
                                        {
                                            vGoodNode = new ViterbiNode(product, nodeTag, NextNode: elem);
                                        }
                                    }
                                }
                            }
                            else
                            {
                                double product = 0.0d;
                                string nodeTag = "NULL_BI";

                                foreach (var item in orderedTransitions)
                                {
                                    if (item.Key.Item2.Equals(elem.CurrentTag) && item.Key.Item1 != ".")
                                    {
                                        double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(item.Key.Item1)).Value;

                                        double biTrans = (double)(uniVal * tagger.BgramLambda1) + (item.Value * tagger.BgramLambda2);

                                        double unknownProcent = tagger.GetValueWeightForUnknownWord(testWords[i].word, item.Key.Item1);

                                        product = (double)elem.value * biTrans * unknownProcent;
                                        nodeTag = item.Key.Item1;
                                        if (product >= vGoodNode.value)
                                        {
                                            vGoodNode = new ViterbiNode(product, nodeTag, NextNode: elem);
                                        }
                                    }
                                }
                            }
                            vList.Add(vGoodNode);
                        }
                    }
                    else
                    {
                        foreach (var tf in foundWord.TagFreq)
                        {
                            if (tf.Key == ".")
                            {
                                continue;
                            }
                            ViterbiNode vGoodNode = new ViterbiNode(0.0d, "NULL");
                            foreach (ViterbiNode vn in this.ViterbiGraph[this.ViterbiGraph.Count - 1])
                            {
                                if (model == "trigram" && triPoz >= 2)
                                {
                                    if (vn.NextNode == null)
                                    {
                                        continue;
                                    }
                                    Tuple <string, string, string> triTuple = new Tuple <string, string, string>(tf.Key, vn.CurrentTag, vn.NextNode.CurrentTag);
                                    double triVal = tagger.TrigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(triTuple)).Value;

                                    Tuple <string, string> biTuple = new Tuple <string, string>(tf.Key, vn.CurrentTag);
                                    double biVal = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(biTuple)).Value;

                                    double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value;

                                    double triTransition = (double)(tagger.TgramLambda3 * triVal) + (tagger.TgramLambda2 * biVal) + (tagger.TgramLambda1 * uniVal);

                                    double product = (double)vn.value * triTransition * tf.Value;

                                    if (product >= vGoodNode.value)
                                    {
                                        vGoodNode = new ViterbiNode(product, tf.Key, NextNode: vn);
                                    }
                                }
                                else
                                {
                                    Tuple <string, string> tuple = new Tuple <string, string>(tf.Key, vn.CurrentTag);
                                    double biTransition          = tagger.BigramTransitionProbabilities.FirstOrDefault(x => x.Key.Equals(tuple)).Value; // eg. NN->VB - 0.25

                                    double uniVal = tagger.UnigramProbabilities.FirstOrDefault(x => x.Key.Equals(tf.Key)).Value;

                                    double biTrans = (double)(uniVal * tagger.BgramLambda1) + (biTransition * tagger.BgramLambda2);

                                    double product = (double)vn.value * biTrans * tf.Value;
                                    if (product >= vGoodNode.value)
                                    {
                                        vGoodNode = new ViterbiNode(product, tf.Key, NextNode: vn);
                                    }
                                }
                            }
                            vList.Add(vGoodNode);
                        }
                    }
                    this.ViterbiGraph.Add(vList);
                }
                this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].OrderByDescending(x => x.value).ToList();
                if (beam != 0)
                {
                    this.ViterbiGraph[this.ViterbiGraph.Count - 1] = this.ViterbiGraph[this.ViterbiGraph.Count - 1].Take(beam).ToList();
                }
            }

            if (mode == "backward")
            {
                this.PredictedTags = new List <string>();
                List <ViterbiNode> historyCopy = new List <ViterbiNode>(BackwardHistory);
                for (int i = 0; i < historyCopy.Count; i++)
                {
                    List <string> tagsViterbi = new List <string>();
                    while (true)
                    {
                        if (historyCopy[i].CurrentTag != ".")
                        {
                            tagsViterbi.Add(historyCopy[i].CurrentTag);
                        }
                        if (historyCopy[i].NextNode == null)
                        {
                            break;
                        }
                        historyCopy[i] = historyCopy[i].NextNode;
                    }
                    this.PredictedTags.AddRange(tagsViterbi);
                }
            }
        }