示例#1
0
        public void Train(List <Sentence> sentences, TagOptions options)
        {
            var cache = new List <NGramFreq>();

            for (int idx = 0; idx < sentences.Count; idx++)
            {
                var sent = sentences[idx];

                Fill(sent, options);

                for (int pos = options.NGram - 1; pos < sent.Words.Count; pos++)
                {
                    var freq = new NGramFreq
                    {
                        Context = GetContext(pos, sent.Words, options),
                        Tag     = sent.Words[pos].Pos,
                        Count   = 1
                    };

                    cache.Add(freq);
                }
            }

            _contextMapping = (from c in cache
                               group c by new { c.Context, c.Tag } into g
                               select new NGramFreq
            {
                Context = g.Key.Context,
                Tag = g.Key.Tag,
                Count = g.Count()
            }).OrderByDescending(x => x.Count)
                              .ToList();
        }
示例#2
0
        public void Tag(Sentence sentence, TagOptions options)
        {
            // need training to generate model
            if (_contextMapping == null)
            {
                var corpus = new CoNLLReader().Read(new ReaderOptions
                {
                    DataDir  = Path.Combine(options.CorpusDir, "CoNLL"),
                    FileName = "conll2000_chunking_train.txt"
                });

                Train(corpus, options);
            }

            Fill(sentence, options);

            for (int pos = options.NGram - 1; pos < sentence.Words.Count; pos++)
            {
                sentence.Words[pos].Pos = _contextMapping.FirstOrDefault(x => x.Context == GetContext(pos, sentence.Words, options))?.Tag;

                // set default tag
                if (sentence.Words[pos].Pos == null)
                {
                    sentence.Words[pos].Pos = options.Tag;
                }
            }

            for (int pos = 0; pos < options.NGram - 1; pos++)
            {
                sentence.Words.RemoveAt(0);
            }
        }
示例#3
0
 private void Fill(Sentence sent, TagOptions options)
 {
     for (int ngram = 1; ngram < options.NGram; ngram++)
     {
         sent.Words.Insert(0, new Token {
             Text = "NIL", Pos = options.Tag, Start = (ngram - 1) * 3
         });
     }
 }
示例#4
0
        private string GetContext(int pos, List <Token> words, TagOptions options)
        {
            string context = words[pos].Text;

            for (int ngram = options.NGram - 1; ngram > 0; ngram--)
            {
                context = words[pos - ngram].Pos + " " + context;
            }

            return(context);
        }
示例#5
0
 public TaggerFactory(TagOptions options, SupportedLanguage lang)
 {
     _lang    = lang;
     _options = options;
 }
示例#6
0
 public void Train(List <Sentence> sentences, TagOptions options)
 {
 }
示例#7
0
 public void Tag(Sentence sentence, TagOptions options)
 {
 }