public void Tag(Sentence sentence, TagOptions options) { // need training to generate model if(_contextMapping == null) { var corpus = new CoNLLReader().Read(new ReaderOptions { DataDir = Path.Combine(options.CorpusDir, "CoNLL"), FileName = "conll2000_chunking_train.txt" }); Train(corpus, options); } Fill(sentence, options); for (int pos = options.NGram - 1; pos < sentence.Words.Count; pos++) { sentence.Words[pos].Pos = _contextMapping.FirstOrDefault(x => x.Context == GetContext(pos, sentence.Words, options))?.Tag; // set default tag if(sentence.Words[pos].Pos == null) { sentence.Words[pos].Pos = options.Tag; } } for(int pos = 0; pos < options.NGram - 1; pos++) { sentence.Words.RemoveAt(0); } }
public void Train(List<Sentence> sentences, TagOptions options) { var cache = new List<NGramFreq>(); for (int idx = 0; idx < sentences.Count; idx++) { var sent = sentences[idx]; Fill(sent, options); for (int pos = options.NGram - 1; pos < sent.Words.Count; pos++) { var freq = new NGramFreq { Context = GetContext(pos, sent.Words, options), Tag = sent.Words[pos].Pos, Count = 1 }; cache.Add(freq); } } _contextMapping = (from c in cache group c by new { c.Context, c.Tag } into g select new NGramFreq { Context = g.Key.Context, Tag = g.Key.Tag, Count = g.Count() }).OrderByDescending(x => x.Count) .ToList(); }
private void Fill(Sentence sent, TagOptions options) { for (int ngram = 1; ngram < options.NGram; ngram++) { sent.Words.Insert(0, new Token { Text = "NIL", Pos = options.Tag, Start = (ngram - 1) * 3 }); } }
public void Tag(Sentence sentence, TagOptions options) { // need training to generate model if (_contextMapping == null) { Train(options.Corpus, options); } Fill(sentence, options); for (int pos = options.NGram - 1; pos < sentence.Words.Count; pos++) { sentence.Words[pos].Pos = _contextMapping.FirstOrDefault(x => x.Context == GetContext(pos, sentence.Words, options))?.Tag; // set default tag if (sentence.Words[pos].Pos == null) { sentence.Words[pos].Pos = options.Tag; } } for (int pos = 0; pos < options.NGram - 1; pos++) { sentence.Words.RemoveAt(0); } }
private string GetContext(int pos, List<Token> words, TagOptions options) { string context = words[pos].Text; for (int ngram = options.NGram - 1; ngram > 0; ngram--) { context = words[pos - ngram].Pos + " " + context; } return context; }
public TaggerFactory(TagOptions options, SupportedLanguage lang) { _lang = lang; _options = options; _tagger = new ITag(); }
public void Train(List <Sentence> sentences, TagOptions options) { }
public void Tag(Sentence sentence, TagOptions options) { }