Beispiel #1
0
        public void ThreeInterestingDates()
        {
            var pipeline = new AnnotationPipeline();

            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var filePath = Files.CoreNlp.Models("pos-tagger/english-left3words-distsim.tagger");
            var tagger   = new MaxentTagger(filePath);

            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var props = Java.Props(new Dictionary <string, string>
            {
                { "sutime.binders", "0" },
                {
                    "sutime.rules", string.Join(",",
                                                Files.CoreNlp.Models("sutime/defs.sutime.txt"),
                                                Files.CoreNlp.Models("sutime/english.sutime.txt"),
                                                Files.CoreNlp.Models("sutime/english.holidays.sutime.txt"))
                }
            });

            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var text       = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);

            annotation.set(Java.GetAnnotationClass <CoreAnnotations.DocDateAnnotation>(), "2013-07-14");
            pipeline.annotate(annotation);

            TestContext.Out.WriteLine(annotation.get(Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>()));
            var timexAnnsAll = (java.util.ArrayList)annotation.get(Java.GetAnnotationClass <TimeAnnotations.TimexAnnotations>());

            Assert.Greater(timexAnnsAll.size(), 0);

            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>());
                Assert.Greater(tokens.size(), 0);

                var first = tokens.get(0);
                var last  = tokens.get(tokens.size() - 1);
                var time  = (TimeExpression)cm.get(Java.GetAnnotationClass <TimeExpression.Annotation>());
                Assert.IsNotNull(time, "Time expression is null");

                TestContext.Out.WriteLine($"{cm} [from char offset '{first}' to '{last}'] --> {time.getTemporal()}");
            }
        }
        public static void CoreNlpClient()
        {
            // creates a StanfordCoreNLP object with POS tagging, lemmatization, NER, parsing, and coreference resolution
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            StanfordCoreNLPClient pipeline = new(props, "http://localhost", 9000, 2);

            // read some text in the text variable
            var text = "Kosgi Santosh sent an email to Stanford University.";
            // create an empty Annotation just with the given text
            Annotation document = new(text);

            // run all Annotators on this text
            pipeline.annotate(document);


            var sentencesAnnotationClass      = Java.GetAnnotationClass <CoreAnnotations.SentencesAnnotation>();
            var tokensAnnotationClass         = Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>();
            var textAnnotationClass           = Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>();
            var partOfSpeechAnnotationClass   = Java.GetAnnotationClass <CoreAnnotations.PartOfSpeechAnnotation>();
            var namedEntityTagAnnotationClass = Java.GetAnnotationClass <CoreAnnotations.NamedEntityTagAnnotation>();

            var sentences = sentencesAnnotationClass.getClasses().Select(document.get).ToList();

            foreach (CoreMap sentence in sentences)
            {
                var tokens = (AbstractList)sentence.get(tokensAnnotationClass);
                Console.WriteLine("----");
                foreach (CoreLabel token in tokens)
                {
                    var word = token.get(textAnnotationClass);
                    var pos  = token.get(partOfSpeechAnnotationClass);
                    var ner  = token.get(namedEntityTagAnnotationClass);
                    Console.WriteLine($"{word}\t[pos={pos};\tner={ner};]");
                }
            }
        }