/** * <summary>Train method for the Dummy pos tagger. The algorithm gets all possible tag list.</summary> * * <param name="corpus">Training data for the tagger.</param> */ public void Train(PosTaggedCorpus corpus) { var corpusTagList = corpus.GetTagList(); _tagList = new string[corpusTagList.Count]; _tagList = corpusTagList.ToArray(); }
/** * <summary>Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all * possible tags (with its counts) for each possible word.</summary> * * <param name="corpus">Training data for the tagger.</param> */ public void Train(PosTaggedCorpus corpus) { var map = new Dictionary <string, CounterHashMap <string> >(); for (var i = 0; i < corpus.SentenceCount(); i++) { var s = corpus.GetSentence(i); for (var j = 0; j < s.WordCount(); j++) { var word = (PosTaggedWord)corpus.GetSentence(i).GetWord(j); if (map.ContainsKey(word.GetName())) { map[word.GetName()].Put(word.GetTag()); } else { var counterMap = new CounterHashMap <string>(); counterMap.Put(word.GetTag()); map[word.GetName()] = counterMap; } } } _maxMap = new Dictionary <string, string>(); foreach (var word in map.Keys) { _maxMap[word] = map[word].Max(); } }
/** * <summary>Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes * as an observation array.</summary> * * <param name="corpus">Training data for the tagger.</param> */ public void Train(PosTaggedCorpus corpus) { var emittedSymbols = new List <string> [corpus.SentenceCount()]; for (var i = 0; i < emittedSymbols.Length; i++) { emittedSymbols[i] = new List <string>(); for (var j = 0; j < corpus.GetSentence(i).WordCount(); j++) { var word = (PosTaggedWord)corpus.GetSentence(i).GetWord(j); emittedSymbols[i].Add(word.GetTag()); } } _hmm = new Hmm1 <string, Word>(corpus.GetTagList(), emittedSymbols, corpus.GetAllWordsAsArray()); }