예제 #1
0
파일: NGramTagger.cs 프로젝트: Spongebob5/_
        public void Tag(Sentence sentence, TagOptions options)
        {
            // need training to generate model
            if(_contextMapping == null)
            {
                var corpus = new CoNLLReader().Read(new ReaderOptions
                {
                    DataDir = Path.Combine(options.CorpusDir, "CoNLL"),
                    FileName = "conll2000_chunking_train.txt"
                });

                Train(corpus, options);
            }

            Fill(sentence, options);

            for (int pos = options.NGram - 1; pos < sentence.Words.Count; pos++)
            {
                sentence.Words[pos].Pos = _contextMapping.FirstOrDefault(x => x.Context == GetContext(pos, sentence.Words, options))?.Tag;

                // set default tag
                if(sentence.Words[pos].Pos == null)
                {
                    sentence.Words[pos].Pos = options.Tag;
                }
            }

            for(int pos = 0; pos < options.NGram - 1; pos++)
            {
                sentence.Words.RemoveAt(0);
            }
        }
예제 #2
0
파일: NGramTagger.cs 프로젝트: Spongebob5/_
        public void Train(List<Sentence> sentences, TagOptions options)
        {
            var cache = new List<NGramFreq>();

            for (int idx = 0; idx < sentences.Count; idx++)
            {
                var sent = sentences[idx];

                Fill(sent, options);

                for (int pos = options.NGram - 1; pos < sent.Words.Count; pos++)
                {
                    var freq = new NGramFreq
                    {
                        Context = GetContext(pos, sent.Words, options),
                        Tag = sent.Words[pos].Pos,
                        Count = 1
                    };

                    cache.Add(freq);
                }
            }

            _contextMapping = (from c in cache
                               group c by new { c.Context, c.Tag } into g
                               select new NGramFreq
                               {
                                   Context = g.Key.Context,
                                   Tag = g.Key.Tag,
                                   Count = g.Count()
                               }).OrderByDescending(x => x.Count)
                               .ToList();
        }
예제 #3
0
파일: NGramTagger.cs 프로젝트: Spongebob5/_
 private void Fill(Sentence sent, TagOptions options)
 {
     for (int ngram = 1; ngram < options.NGram; ngram++)
     {
         sent.Words.Insert(0, new Token { Text = "NIL", Pos = options.Tag, Start = (ngram - 1) * 3 });
     }
 }
예제 #4
0
        public void Tag(Sentence sentence, TagOptions options)
        {
            // need training to generate model
            if (_contextMapping == null)
            {
                Train(options.Corpus, options);
            }

            Fill(sentence, options);

            for (int pos = options.NGram - 1; pos < sentence.Words.Count; pos++)
            {
                sentence.Words[pos].Pos = _contextMapping.FirstOrDefault(x => x.Context == GetContext(pos, sentence.Words, options))?.Tag;

                // set default tag
                if (sentence.Words[pos].Pos == null)
                {
                    sentence.Words[pos].Pos = options.Tag;
                }
            }

            for (int pos = 0; pos < options.NGram - 1; pos++)
            {
                sentence.Words.RemoveAt(0);
            }
        }
예제 #5
0
파일: NGramTagger.cs 프로젝트: Spongebob5/_
        private string GetContext(int pos, List<Token> words, TagOptions options)
        {
            string context = words[pos].Text;
            for (int ngram = options.NGram - 1; ngram > 0; ngram--)
            {
                context = words[pos - ngram].Pos + " " + context;
            }

            return context;
        }
예제 #6
0
 public TaggerFactory(TagOptions options, SupportedLanguage lang)
 {
     _lang    = lang;
     _options = options;
     _tagger  = new ITag();
 }
예제 #7
0
 public void Train(List <Sentence> sentences, TagOptions options)
 {
 }
예제 #8
0
 public void Tag(Sentence sentence, TagOptions options)
 {
 }