Example #1
0
        /**
         * <summary>Train method for the Dummy pos tagger. The algorithm gets all possible tag list.</summary>
         *
         * <param name="corpus">Training data for the tagger.</param>
         */
        public void Train(PosTaggedCorpus corpus)
        {
            var corpusTagList = corpus.GetTagList();

            _tagList = new string[corpusTagList.Count];
            _tagList = corpusTagList.ToArray();
        }
Example #2
0
        /**
         * <summary>Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
         * possible tags (with its counts) for each possible word.</summary>
         *
         * <param name="corpus">Training data for the tagger.</param>
         */
        public void Train(PosTaggedCorpus corpus)
        {
            var map = new Dictionary <string, CounterHashMap <string> >();

            for (var i = 0; i < corpus.SentenceCount(); i++)
            {
                var s = corpus.GetSentence(i);
                for (var j = 0; j < s.WordCount(); j++)
                {
                    var word = (PosTaggedWord)corpus.GetSentence(i).GetWord(j);
                    if (map.ContainsKey(word.GetName()))
                    {
                        map[word.GetName()].Put(word.GetTag());
                    }
                    else
                    {
                        var counterMap = new CounterHashMap <string>();
                        counterMap.Put(word.GetTag());
                        map[word.GetName()] = counterMap;
                    }
                }
            }
            _maxMap = new Dictionary <string, string>();
            foreach (var word in map.Keys)
            {
                _maxMap[word] = map[word].Max();
            }
        }
Example #3
0
        /**
         * <summary>Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
         * as an observation array.</summary>
         *
         * <param name="corpus">Training data for the tagger.</param>
         */
        public void Train(PosTaggedCorpus corpus)
        {
            var emittedSymbols = new List <string> [corpus.SentenceCount()];

            for (var i = 0; i < emittedSymbols.Length; i++)
            {
                emittedSymbols[i] = new List <string>();
                for (var j = 0; j < corpus.GetSentence(i).WordCount(); j++)
                {
                    var word = (PosTaggedWord)corpus.GetSentence(i).GetWord(j);
                    emittedSymbols[i].Add(word.GetTag());
                }
            }
            _hmm = new Hmm1 <string, Word>(corpus.GetTagList(), emittedSymbols, corpus.GetAllWordsAsArray());
        }