/// <summary> /// get tagged list /// </summary> /// <param name="inputWordList"></param> /// <param name="isSuccessful"></param> /// <param name="message"></param> /// <returns></returns> public List<Tag> GetTagList(List<Word> inputWordList, out bool isSuccessful, out string message) { //make Viterbi table as array var viterbiArr = new List<ViterbiObject>[inputWordList.Count]; #region initilization //add into first position viterbiArr[0] = inputWordList[0].AssociatedTagDict.Keys.Select(tag3 => new ViterbiObject() { Tag1 = DumpTag, Tag2 = DumpTag, Tag3 = tag3, Pi = Math.Log(GetProbabilitySequence3Tag(DumpTag, DumpTag, tag3)) + Math.Log(GetProbabilityWordGivenTag(inputWordList[0], tag3))//use log to store probabilities }).ToList(); //add into second position if (inputWordList.Count >= 2) { viterbiArr[1] = (from tag2 in inputWordList[0].AssociatedTagDict.Keys from tag3 in inputWordList[1].AssociatedTagDict.Keys let prevViterbi = viterbiArr[0].FirstOrDefault(m => m.Tag3 == tag2) where prevViterbi != null select new ViterbiObject() { Tag1 = DumpTag, Tag2 = tag2, Tag3 = tag3, Pi = prevViterbi.Pi + Math.Log(GetProbabilitySequence2Tag(tag2, tag3)) + Math.Log(GetProbabilityWordGivenTag(inputWordList[1], tag3)) }).ToList(); } #endregion #region calculate Pi and generate viterbi table //add from 3 to n for (var i = 2; i < inputWordList.Count; i++) { var viterbiList = new List<ViterbiObject>(); foreach (var tag3 in inputWordList[i].AssociatedTagDict.Keys) { foreach (var tag2 in inputWordList[i - 1].AssociatedTagDict.Keys) { ViterbiObject maxValue = null; foreach (var tag1 in inputWordList[i - 2].AssociatedTagDict.Keys) { var prevViterbi = viterbiArr[i - 1].FirstOrDefault(m => m.Tag3 == tag2 && m.Tag2 == tag1); if (prevViterbi == null) continue; var currentPi = prevViterbi.Pi + Math.Log(GetProbabilitySequence3Tag(tag1, tag2, tag3)) + Math.Log(GetProbabilityWordGivenTag(inputWordList[i], tag3)); if (maxValue == null || maxValue.Pi < currentPi) { maxValue = new ViterbiObject() { Tag1 = tag1, Tag2 = tag2, Tag3 = tag3, Pi = currentPi }; } } viterbiList.Add(maxValue); } } viterbiArr[i] = viterbiList; } #endregion #region backtrack to find sequence tag var tagArr = new Tag[inputWordList.Count]; //find last tag (n)th tagArr[inputWordList.Count - 1] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag3; //find (n-1)th tag if (inputWordList.Count > 1) { tagArr[inputWordList.Count - 2] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag2; } //find (n-2)th tag if (inputWordList.Count > 2) { tagArr[inputWordList.Count - 3] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag1; } //find remaing tag for (var i = inputWordList.Count - 4; i >= 0; i--) { var viterbiObj = viterbiArr[i + 2].FirstOrDefault(m => m.Tag3 == tagArr[i + 2] && m.Tag2 == tagArr[i + 1]); if (viterbiObj != null) { tagArr[i] = viterbiObj.Tag1; } } #endregion isSuccessful = true; message = ""; return tagArr.ToList(); }