Example #1
0
        /// <summary>
        /// get tagged list
        /// </summary>
        /// <param name="inputWordList"></param>
        /// <param name="isSuccessful"></param>
        /// <param name="message"></param>
        /// <returns></returns>
        public List<Tag> GetTagList(List<Word> inputWordList, out bool isSuccessful, out string message)
        {
            //make Viterbi table as array
            var viterbiArr = new List<ViterbiObject>[inputWordList.Count];

            #region initilization
            //add into first position
            viterbiArr[0] = inputWordList[0].AssociatedTagDict.Keys.Select(tag3 => new ViterbiObject()
            {
                Tag1 = DumpTag,
                Tag2 = DumpTag,
                Tag3 = tag3,
                Pi = Math.Log(GetProbabilitySequence3Tag(DumpTag, DumpTag, tag3))
                    + Math.Log(GetProbabilityWordGivenTag(inputWordList[0], tag3))//use log to store probabilities
            }).ToList();

            //add into second position
            if (inputWordList.Count >= 2)
            {
                viterbiArr[1] = (from tag2 in inputWordList[0].AssociatedTagDict.Keys
                                 from tag3 in inputWordList[1].AssociatedTagDict.Keys
                    let prevViterbi = viterbiArr[0].FirstOrDefault(m => m.Tag3 == tag2)
                    where prevViterbi != null
                    select new ViterbiObject()
                                {
                                    Tag1 = DumpTag,
                                    Tag2 = tag2,
                                    Tag3 = tag3,
                                    Pi = prevViterbi.Pi
                                        + Math.Log(GetProbabilitySequence2Tag(tag2, tag3))
                                        + Math.Log(GetProbabilityWordGivenTag(inputWordList[1], tag3))
                                }).ToList();
            }
            #endregion

            #region calculate Pi and generate viterbi table
            //add from 3 to n
            for (var i = 2; i < inputWordList.Count; i++)
            {
                var viterbiList = new List<ViterbiObject>();

                foreach (var tag3 in inputWordList[i].AssociatedTagDict.Keys)
                {
                    foreach (var tag2 in inputWordList[i - 1].AssociatedTagDict.Keys)
                    {
                        ViterbiObject maxValue = null;
                        foreach (var tag1 in inputWordList[i - 2].AssociatedTagDict.Keys)
                        {
                            var prevViterbi = viterbiArr[i - 1].FirstOrDefault(m => m.Tag3 == tag2 && m.Tag2 == tag1);
                            if (prevViterbi == null) continue;
                            var currentPi = prevViterbi.Pi
                                            + Math.Log(GetProbabilitySequence3Tag(tag1, tag2, tag3))
                                            + Math.Log(GetProbabilityWordGivenTag(inputWordList[i], tag3));
                            if (maxValue == null || maxValue.Pi < currentPi)
                            {
                                maxValue = new ViterbiObject()
                                {
                                    Tag1 = tag1,
                                    Tag2 = tag2,
                                    Tag3 = tag3,
                                    Pi = currentPi
                                };
                            }
                        }
                        viterbiList.Add(maxValue);
                    }
                }
                viterbiArr[i] = viterbiList;
            }

            #endregion

            #region backtrack to find sequence tag

            var tagArr = new Tag[inputWordList.Count];

            //find last tag (n)th
            tagArr[inputWordList.Count - 1] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag3;

            //find (n-1)th tag
            if (inputWordList.Count > 1)
            {
                tagArr[inputWordList.Count - 2] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag2;
            }

            //find (n-2)th tag
            if (inputWordList.Count > 2)
            {
                tagArr[inputWordList.Count - 3] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag1;
            }

            //find remaing tag
            for (var i = inputWordList.Count - 4; i >= 0; i--)
            {
                var viterbiObj =
                    viterbiArr[i + 2].FirstOrDefault(m => m.Tag3 == tagArr[i + 2] && m.Tag2 == tagArr[i + 1]);
                if (viterbiObj != null)
                {
                    tagArr[i] = viterbiObj.Tag1;
                }
            }

            #endregion

            isSuccessful = true;
            message = "";
            return tagArr.ToList();
        }