예제 #1
0
파일: Word.cs 프로젝트: bongusagi/Tagger
 /// <summary>
 /// Caculate P(word|tag)
 /// </summary>
 /// <param name="tag"></param>
 public double GetProbabilityTagGivenWord(Tag tag)
 {
     if (AssociatedTagDict.ContainsKey(tag))
     {
         return (double)AssociatedTagDict[tag] / WordCount;
     }
     return 0;
 }
예제 #2
0
파일: Word.cs 프로젝트: bongusagi/Tagger
 public void AddTag(Tag tag)
 {
     if (AssociatedTagDict.ContainsKey(tag))
     {
         //add 1 count
         AssociatedTagDict[tag]++;
     }
     else
     {
         //and new tag into dictionary
         AssociatedTagDict.Add(tag, 1);
     }
     WordCount++;
 }
예제 #3
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
        public Tagger()
        {
            WordDict = new Dictionary<string, Word>();
            TagDict = new Dictionary<string, Tag>();
            TrainingSentenceList = new List<string>();
            TrainingTaggedSentenceList = new List<string>();
            Seq2TagDict = new Dictionary<string, Sequence2Tag>();
            Seq3TagDict = new Dictionary<string, Sequence3Tag>();
            TagDescDict = new Dictionary<string, string>();

            //set default value for lamda
            Lamda1 = (double)1 / 3;
            Lamda2 = (double)1 / 3;
            Lamda3 = (double)1 / 3;

            //add blank tag
            DumpTag = new Tag() { Value = "Dump" };

            //get tag dict desc
            MakeTagDescDictionary();
        }
예제 #4
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
 /// <summary>
 /// Caculate P(word|tag)
 /// </summary>
 /// <param name="word"></param>
 /// <param name="tag"></param>
 private double GetProbabilityWordGivenTag(Word word, Tag tag)
 {
     return (tag.GetProbabilityWordGivenTag(word));
 }
예제 #5
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
 /// <summary>
 /// Caculate P(word|tag)
 /// </summary>
 /// <param name="word"></param>
 /// <param name="tag"></param>
 private double GetProbabilityTagGivenWord(Word word, Tag tag)
 {
     return (word.GetProbabilityTagGivenWord(tag));
 }
예제 #6
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
 /// <summary>
 /// add to sequence 3 tag to dictionary
 /// </summary>
 /// <param name="tag1"></param>
 /// <param name="tag2"></param>
 /// <param name="tag3"></param>
 private void AddSequence3Tag(Tag tag1, Tag tag2, Tag tag3)
 {
     var key = tag1.Value + SeparteStr + tag2.Value + SeparteStr + tag3.Value;
     if (Seq3TagDict.ContainsKey(key))
     {
         Seq3TagDict[key].Count++;
     }
     else
     {
         var seq = new Sequence3Tag()
         {
             Tag1 = tag1,
             Tag2 = tag2,
             Tag3 = tag3,
             Count = 1
         };
         Seq3TagDict.Add(key, seq);
     }
 }
예제 #7
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
        /// <summary>
        /// get tagged list
        /// </summary>
        /// <param name="inputWordList"></param>
        /// <param name="isSuccessful"></param>
        /// <param name="message"></param>
        /// <returns></returns>
        public List<Tag> GetTagList(List<Word> inputWordList, out bool isSuccessful, out string message)
        {
            //make Viterbi table as array
            var viterbiArr = new List<ViterbiObject>[inputWordList.Count];

            #region initilization
            //add into first position
            viterbiArr[0] = inputWordList[0].AssociatedTagDict.Keys.Select(tag3 => new ViterbiObject()
            {
                Tag1 = DumpTag,
                Tag2 = DumpTag,
                Tag3 = tag3,
                Pi = Math.Log(GetProbabilitySequence3Tag(DumpTag, DumpTag, tag3))
                    + Math.Log(GetProbabilityWordGivenTag(inputWordList[0], tag3))//use log to store probabilities
            }).ToList();

            //add into second position
            if (inputWordList.Count >= 2)
            {
                viterbiArr[1] = (from tag2 in inputWordList[0].AssociatedTagDict.Keys
                                 from tag3 in inputWordList[1].AssociatedTagDict.Keys
                    let prevViterbi = viterbiArr[0].FirstOrDefault(m => m.Tag3 == tag2)
                    where prevViterbi != null
                    select new ViterbiObject()
                                {
                                    Tag1 = DumpTag,
                                    Tag2 = tag2,
                                    Tag3 = tag3,
                                    Pi = prevViterbi.Pi
                                        + Math.Log(GetProbabilitySequence2Tag(tag2, tag3))
                                        + Math.Log(GetProbabilityWordGivenTag(inputWordList[1], tag3))
                                }).ToList();
            }
            #endregion

            #region calculate Pi and generate viterbi table
            //add from 3 to n
            for (var i = 2; i < inputWordList.Count; i++)
            {
                var viterbiList = new List<ViterbiObject>();

                foreach (var tag3 in inputWordList[i].AssociatedTagDict.Keys)
                {
                    foreach (var tag2 in inputWordList[i - 1].AssociatedTagDict.Keys)
                    {
                        ViterbiObject maxValue = null;
                        foreach (var tag1 in inputWordList[i - 2].AssociatedTagDict.Keys)
                        {
                            var prevViterbi = viterbiArr[i - 1].FirstOrDefault(m => m.Tag3 == tag2 && m.Tag2 == tag1);
                            if (prevViterbi == null) continue;
                            var currentPi = prevViterbi.Pi
                                            + Math.Log(GetProbabilitySequence3Tag(tag1, tag2, tag3))
                                            + Math.Log(GetProbabilityWordGivenTag(inputWordList[i], tag3));
                            if (maxValue == null || maxValue.Pi < currentPi)
                            {
                                maxValue = new ViterbiObject()
                                {
                                    Tag1 = tag1,
                                    Tag2 = tag2,
                                    Tag3 = tag3,
                                    Pi = currentPi
                                };
                            }
                        }
                        viterbiList.Add(maxValue);
                    }
                }
                viterbiArr[i] = viterbiList;
            }

            #endregion

            #region backtrack to find sequence tag

            var tagArr = new Tag[inputWordList.Count];

            //find last tag (n)th
            tagArr[inputWordList.Count - 1] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag3;

            //find (n-1)th tag
            if (inputWordList.Count > 1)
            {
                tagArr[inputWordList.Count - 2] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag2;
            }

            //find (n-2)th tag
            if (inputWordList.Count > 2)
            {
                tagArr[inputWordList.Count - 3] = viterbiArr[inputWordList.Count - 1].MaxBy(m => m.Pi).Tag1;
            }

            //find remaing tag
            for (var i = inputWordList.Count - 4; i >= 0; i--)
            {
                var viterbiObj =
                    viterbiArr[i + 2].FirstOrDefault(m => m.Tag3 == tagArr[i + 2] && m.Tag2 == tagArr[i + 1]);
                if (viterbiObj != null)
                {
                    tagArr[i] = viterbiObj.Tag1;
                }
            }

            #endregion

            isSuccessful = true;
            message = "";
            return tagArr.ToList();
        }
예제 #8
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
        /// <summary>
        /// Caculate P(tag3|tag1,tag2)
        /// Equation: P(tag3|tag1,tag2)
        /// = lamda1 * (Count(tag1, tag2,tag3)/Count(tag1,tag2)) 
        /// + lamda2 * (Count(tag2,tag3)/Count(tag2)) 
        /// + lamda3 * (Count(tag3)/Count(total tags))
        /// </summary>
        /// <param name="tag1"></param>
        /// <param name="tag2"></param>
        /// <param name="tag3"></param>
        /// <returns></returns>
        public double GetProbabilitySequence3Tag(Tag tag1, Tag tag2, Tag tag3)
        {
            if (tag3 == null) return 0;

            double firstParam = 0;
            double secondParam = 0;

            //calculate first, second param
            if (tag2 != null)
            {
                if (tag1 != null)
                {
                    var key12 = tag1.Value + SeparteStr + tag2.Value;
                    if (Seq2TagDict.ContainsKey(key12))
                    {
                        var key123 = tag1.Value + SeparteStr + tag2.Value + SeparteStr + tag3.Value;
                        if (Seq3TagDict.ContainsKey(key123))
                        {
                            firstParam = Lamda1 * Seq3TagDict[key123].Count / Seq2TagDict[key12].Count;
                        }
                    }
                }

                var key23 = tag2.Value + SeparteStr + tag3.Value;
                if (Seq2TagDict.ContainsKey(key23))
                {
                    if (tag2 == DumpTag)
                    {
                        secondParam = Lamda2*Seq2TagDict[key23].Count / TrainingSentenceList.Count;
                    }
                    else
                    {
                        secondParam = Lamda2 * Seq2TagDict[key23].Count / TagDict[tag2.Value].TagCount;
                    }
                }
            }
            //calculate second param
            var thirdParam = Lamda3 * tag3.TagCount / WordCount;
            return (firstParam + secondParam + thirdParam) / (Lamda1 + Lamda2 + Lamda3);
        }
예제 #9
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
        /// <summary>
        /// Caculate P(tag2|tag1)
        /// Equation: P(tag2|tag1) 
        /// = lamda2 * (Count(tag1,tag2)/Count(tag1)) 
        /// + lamda3 * (Count(tag2)/Count(total tags))
        /// </summary>
        /// <param name="tag1"></param>
        /// <param name="tag2"></param>
        /// <returns></returns>
        public double GetProbabilitySequence2Tag(Tag tag1, Tag tag2)
        {
            if (tag2 == null) return 0;

            double firstParam = 0;
            //calculate first param
            if (tag1 != null)
            {
                var key = tag1.Value + SeparteStr + tag2.Value;
                if (Seq2TagDict.ContainsKey(key))
                {
                    firstParam = Lamda2 * Seq2TagDict[key].Count / TagDict[tag1.Value].TagCount;
                }
            }
            //calculate second param
            var secondParam = Lamda3 * tag2.TagCount / WordCount;
            return (firstParam + secondParam) / (Lamda2 + Lamda3);
        }
예제 #10
0
파일: Tagger.cs 프로젝트: bongusagi/Tagger
 public Tag AddTag(string tag)
 {
     if (TagDict.ContainsKey(tag))
     {
         return TagDict[tag];
     }
     var newTag = new Tag { Value = tag };
     TagDict.Add(tag, newTag);
     return newTag;
 }