public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta) { doc.Sentences.ForEach(x => _tagger.Tag(new Sentence { Words = x.Tokens })); return(true); }
public void TriGramInCoNLL2000() { // tokenization var tokenizer = new TokenizerFactory <RegexTokenizer>(new TokenizationOptions { Pattern = RegexTokenizer.WORD_PUNC }, SupportedLanguage.English); var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment"); // test tag var tagger = new TaggerFactory <NGramTagger>(new TagOptions { NGram = 3, Tag = "NN", Corpus = GetTaggedCorpus() }, SupportedLanguage.English); tagger.Tag(new Sentence { Words = tokens }); Assert.IsTrue(tokens[0].Pos == "NNP"); Assert.IsTrue(tokens[1].Pos == "IN"); Assert.IsTrue(tokens[2].Pos == "DT"); Assert.IsTrue(tokens[3].Pos == "NNP"); }
public void TriGramInCoNLL2000() { // tokenization var tokenizer = new TokenizerFactory(new TokenizationOptions { Pattern = RegexTokenizer.WORD_PUNC }, SupportedLanguage.English); tokenizer.GetTokenizer <RegexTokenizer>(); var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment"); // test tag var tagger = new TaggerFactory(new TagOptions { CorpusDir = Configuration.GetValue <String>("CherubNLP:dataDir"), NGram = 3, Tag = "NN" }, SupportedLanguage.English); tagger.GetTagger <NGramTagger>(); tagger.Tag(new Sentence { Words = tokens }); Assert.IsTrue(tokens[0].Pos == "NNP"); Assert.IsTrue(tokens[1].Pos == "IN"); Assert.IsTrue(tokens[2].Pos == "DT"); Assert.IsTrue(tokens[3].Pos == "NNP"); }
public void UniGramInCoNLL2000() { // tokenization var tokenizer = new TokenizerFactory(new TokenizationOptions { Pattern = RegexTokenizer.WORD_PUNC }, SupportedLanguage.English); tokenizer.GetTokenizer <RegexTokenizer>(); var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment"); // test tag var tagger = new TaggerFactory(new TagOptions { CorpusDir = Configuration.GetValue <String>("CherubNLP:dataDir"), NGram = 1, Tag = "NN" }, SupportedLanguage.English); tagger.GetTagger <NGramTagger>(); var watch = Stopwatch.StartNew(); tagger.Tag(new Sentence { Words = tokens }); watch.Stop(); var elapsedMs1 = watch.ElapsedMilliseconds; Assert.IsTrue(tokens[0].Pos == "NNP"); Assert.IsTrue(tokens[1].Pos == "IN"); Assert.IsTrue(tokens[2].Pos == "DT"); Assert.IsTrue(tokens[3].Pos == "NNP"); // test if model is loaded repeatly. watch = Stopwatch.StartNew(); tagger.Tag(new Sentence { Words = tokens }); watch.Stop(); var elapsedMs2 = watch.ElapsedMilliseconds; Assert.IsTrue(elapsedMs1 > elapsedMs2 * 100); }
public void TagInCoNLL2000() { var tokenizer = new TokenizerFactory <RegexTokenizer>(new TokenizationOptions { }, SupportedLanguage.English); var tokens = tokenizer.Tokenize("How are you doing?"); var tagger = new TaggerFactory <DefaultTagger>(new TagOptions { Tag = "NN" }, SupportedLanguage.English); tagger.Tag(new Sentence { Words = tokens }); }
public bool Tag(string tagName) { if (!TaggerFactory.IsTagRegistered(this.Locale, tagName)) { throw new ArgumentException("No Tagger is associated with this tag name!", "tagName"); return(false); } tagName = tagName.ToLower(); object[] tagValues; if (!m_tags.TryGetValue(tagName, out tagValues)) { var tagsValues = TaggerFactory.Tag(tagName, this); if (tagsValues == null) { m_tags.Add(tagName, new object[m_tokens.Length]); } else { foreach (var tagAndValues in tagsValues) { if (!m_tags.ContainsKey(tagAndValues.Key.ToLower())) { m_tags.Add(tagAndValues.Key.ToLower(), tagAndValues.Value); } else { m_tags[tagAndValues.Key.ToLower()] = tagAndValues.Value; } } } } return(true); }