Ejemplo n.º 1
0
        public async Task <bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
        {
            doc.Sentences.ForEach(x => _tagger.Tag(new Sentence {
                Words = x.Tokens
            }));

            return(true);
        }
Ejemplo n.º 2
0
        public void TriGramInCoNLL2000()
        {
            // tokenization
            var tokenizer = new TokenizerFactory <RegexTokenizer>(new TokenizationOptions
            {
                Pattern = RegexTokenizer.WORD_PUNC
            }, SupportedLanguage.English);

            var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment");

            // test tag
            var tagger = new TaggerFactory <NGramTagger>(new TagOptions
            {
                NGram  = 3,
                Tag    = "NN",
                Corpus = GetTaggedCorpus()
            }, SupportedLanguage.English);

            tagger.Tag(new Sentence {
                Words = tokens
            });

            Assert.IsTrue(tokens[0].Pos == "NNP");
            Assert.IsTrue(tokens[1].Pos == "IN");
            Assert.IsTrue(tokens[2].Pos == "DT");
            Assert.IsTrue(tokens[3].Pos == "NNP");
        }
Ejemplo n.º 3
0
        public void TriGramInCoNLL2000()
        {
            // tokenization
            var tokenizer = new TokenizerFactory(new TokenizationOptions
            {
                Pattern = RegexTokenizer.WORD_PUNC
            }, SupportedLanguage.English);

            tokenizer.GetTokenizer <RegexTokenizer>();

            var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment");

            // test tag
            var tagger = new TaggerFactory(new TagOptions
            {
                CorpusDir = Configuration.GetValue <String>("CherubNLP:dataDir"),
                NGram     = 3,
                Tag       = "NN"
            }, SupportedLanguage.English);

            tagger.GetTagger <NGramTagger>();

            tagger.Tag(new Sentence {
                Words = tokens
            });

            Assert.IsTrue(tokens[0].Pos == "NNP");
            Assert.IsTrue(tokens[1].Pos == "IN");
            Assert.IsTrue(tokens[2].Pos == "DT");
            Assert.IsTrue(tokens[3].Pos == "NNP");
        }
Ejemplo n.º 4
0
        public void UniGramInCoNLL2000()
        {
            // tokenization
            var tokenizer = new TokenizerFactory(new TokenizationOptions
            {
                Pattern = RegexTokenizer.WORD_PUNC
            }, SupportedLanguage.English);

            tokenizer.GetTokenizer <RegexTokenizer>();

            var tokens = tokenizer.Tokenize("Chancellor of the Exchequer Nigel Lawson's restated commitment");

            // test tag
            var tagger = new TaggerFactory(new TagOptions
            {
                CorpusDir = Configuration.GetValue <String>("CherubNLP:dataDir"),
                NGram     = 1,
                Tag       = "NN"
            }, SupportedLanguage.English);

            tagger.GetTagger <NGramTagger>();

            var watch = Stopwatch.StartNew();

            tagger.Tag(new Sentence {
                Words = tokens
            });
            watch.Stop();
            var elapsedMs1 = watch.ElapsedMilliseconds;

            Assert.IsTrue(tokens[0].Pos == "NNP");
            Assert.IsTrue(tokens[1].Pos == "IN");
            Assert.IsTrue(tokens[2].Pos == "DT");
            Assert.IsTrue(tokens[3].Pos == "NNP");

            // test if model is loaded repeatly.
            watch = Stopwatch.StartNew();
            tagger.Tag(new Sentence {
                Words = tokens
            });
            watch.Stop();
            var elapsedMs2 = watch.ElapsedMilliseconds;

            Assert.IsTrue(elapsedMs1 > elapsedMs2 * 100);
        }
Ejemplo n.º 5
0
        public void TagInCoNLL2000()
        {
            var tokenizer = new TokenizerFactory <RegexTokenizer>(new TokenizationOptions {
            }, SupportedLanguage.English);
            var tokens    = tokenizer.Tokenize("How are you doing?");

            var tagger = new TaggerFactory <DefaultTagger>(new TagOptions
            {
                Tag = "NN"
            }, SupportedLanguage.English);

            tagger.Tag(new Sentence {
                Words = tokens
            });
        }
Ejemplo n.º 6
0
        public bool Tag(string tagName)
        {
            if (!TaggerFactory.IsTagRegistered(this.Locale, tagName))
            {
                throw new ArgumentException("No Tagger is associated with this tag name!", "tagName");
                return(false);
            }

            tagName = tagName.ToLower();
            object[] tagValues;

            if (!m_tags.TryGetValue(tagName, out tagValues))
            {
                var tagsValues = TaggerFactory.Tag(tagName, this);

                if (tagsValues == null)
                {
                    m_tags.Add(tagName, new object[m_tokens.Length]);
                }
                else
                {
                    foreach (var tagAndValues in tagsValues)
                    {
                        if (!m_tags.ContainsKey(tagAndValues.Key.ToLower()))
                        {
                            m_tags.Add(tagAndValues.Key.ToLower(), tagAndValues.Value);
                        }
                        else
                        {
                            m_tags[tagAndValues.Key.ToLower()] = tagAndValues.Value;
                        }
                    }
                }
            }

            return(true);
        }