Example #1
0
        /// <summary>Process a single file or line of standard in.</summary>
        /// <param name="pipeline">The annotation pipeline to run the lines of the input through.</param>
        /// <param name="docid">The docid of the document we are extracting.</param>
        /// <param name="document">the document to annotate.</param>
        private static void ProcessDocument(AnnotationPipeline pipeline, string docid, string document)
        {
            // Error checks
            if (document.Trim().IsEmpty())
            {
                return;
            }
            // Annotate the document
            Annotation ann = new Annotation(document);

            pipeline.Annotate(ann);
            // Get the extractions
            bool empty = true;

            lock (Output)
            {
                foreach (ICoreMap sentence in ann.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    foreach (RelationTriple extraction in sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation)))
                    {
                        // Print the extractions
                        Output.WriteLine(TripleToString(extraction, docid, sentence));
                        empty = false;
                    }
                }
            }
            if (empty)
            {
                log.Info("No extractions in: " + ("stdin".Equals(docid) ? document : docid));
            }
        }
Example #2
0
 static SUTimeSimpleParser()
 {
     // = 0;
     // = 0;
     pipeline = MakeNumericPipeline();
     cache    = Generics.NewHashMap();
 }
Example #3
0
        /// <summary>TODO(gabor) JavaDoc</summary>
        /// <param name="sentence"/>
        /// <param name="pipeline"/>
        public static void Annotate(ICoreMap sentence, AnnotationPipeline pipeline)
        {
            Annotation ann = new Annotation(StringUtils.Join(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)), " "));

            ann.Set(typeof(CoreAnnotations.TokensAnnotation), sentence.Get(typeof(CoreAnnotations.TokensAnnotation)));
            ann.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence));
            pipeline.Annotate(ann);
        }
Example #4
0
        private static AnnotationPipeline MakeNumericPipeline()
        {
            AnnotationPipeline pipeline = new AnnotationPipeline();

            pipeline.AddAnnotator(new TokenizerAnnotator(false, "en"));
            pipeline.AddAnnotator(new WordsToSentencesAnnotator(false));
            pipeline.AddAnnotator(new POSTaggerAnnotator(false));
            pipeline.AddAnnotator(new TimeAnnotator(true));
            return(pipeline);
        }
Example #5
0
        public LangProcessing()
        {
            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);
            Properties props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos");
            pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);
        }
Example #6
0
        public void ThreeInterestingDates()
        {
            var pipeline = new AnnotationPipeline();

            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var filePath = Files.CoreNlp.Models("pos-tagger/english-left3words-distsim.tagger");
            var tagger   = new MaxentTagger(filePath);

            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var props = Java.Props(new Dictionary <string, string>
            {
                { "sutime.binders", "0" },
                {
                    "sutime.rules", string.Join(",",
                                                Files.CoreNlp.Models("sutime/defs.sutime.txt"),
                                                Files.CoreNlp.Models("sutime/english.sutime.txt"),
                                                Files.CoreNlp.Models("sutime/english.holidays.sutime.txt"))
                }
            });

            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var text       = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);

            annotation.set(Java.GetAnnotationClass <CoreAnnotations.DocDateAnnotation>(), "2013-07-14");
            pipeline.annotate(annotation);

            TestContext.Out.WriteLine(annotation.get(Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>()));
            var timexAnnsAll = (java.util.ArrayList)annotation.get(Java.GetAnnotationClass <TimeAnnotations.TimexAnnotations>());

            Assert.Greater(timexAnnsAll.size(), 0);

            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>());
                Assert.Greater(tokens.size(), 0);

                var first = tokens.get(0);
                var last  = tokens.get(tokens.size() - 1);
                var time  = (TimeExpression)cm.get(Java.GetAnnotationClass <TimeExpression.Annotation>());
                Assert.IsNotNull(time, "Time expression is null");

                TestContext.Out.WriteLine($"{cm} [from char offset '{first}' to '{last}'] --> {time.getTemporal()}");
            }
        }
Example #7
0
        private static void Main()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.8.0-models.jar`
            var jarRoot         = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2017-06-09\models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();

            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // Loading POS Tagger and including them into pipeline
            var tagger = new MaxentTagger(modelsDirectory +
                                          @"\pos-tagger\english-left3words\english-left3words-distsim.tagger");

            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";
            var props = new Properties();

            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction
            var text       = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);

            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;

            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List;
                var first  = tokens.get(0);
                var last   = tokens.get(tokens.size() - 1);
                var time   = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;
                Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal());
            }
        }
        private static void Main()
        {

            // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // Loading POS Tagger and including them into pipeline
            var tagger = new MaxentTagger(modelsDirectory +
                                          @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";
            var props = new Properties();
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction
            var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;
            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List;
                var first = tokens.get(0);
                var last = tokens.get(tokens.size() - 1);
                var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;
                Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal());
            }
        }
Example #9
0
        public void SUTimeDefautTest()
        {
            var pipeline = new AnnotationPipeline();

            pipeline.addAnnotator(new PTBTokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var tagger =
                new MaxentTagger(
                    Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"));

            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var sutimeRules = new[] {
                Config.GetModel(@"sutime\defs.sutime.txt"),
                Config.GetModel(@"sutime\english.holidays.sutime.txt"),
                Config.GetModel(@"sutime\english.sutime.txt")
            };

            var props = new Properties();

            props.setProperty("sutime.rules", String.Join(",", sutimeRules));
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            const string text       = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var          annotation = new Annotation(text);

            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass()) + "\n");
            var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass());

            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass());
                var first  = tokens.get(0);
                var last   = tokens.get(tokens.size() - 1);
                var time   = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass());
                Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}",
                                  cm, first, last, (time.getTemporal()));
            }
        }
        private Annotation PrepareAnnotation(string text, DateTime currentDate)
        {
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));
            var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt,"
                                       + modelsDir + @"\sutime\english.holidays.sutime.txt,"
                                       + modelsDir + @"\sutime\english.sutime.txt";
            var props = new Properties();
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd"));
            pipeline.annotate(annotation);
            return annotation;
        }
Example #11
0
        public void SUTimeDefautTest()
        {
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new PTBTokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var tagger =
                new MaxentTagger(
                    Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"));
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var sutimeRules = new[] {
                                      Config.GetModel(@"sutime\defs.sutime.txt"),
                                      Config.GetModel(@"sutime\english.holidays.sutime.txt"),
                                      Config.GetModel(@"sutime\english.sutime.txt")
                                  };

            var props = new Properties();
            props.setProperty("sutime.rules", String.Join(",", sutimeRules));
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n");
            var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass());
            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass());
                var first = tokens.get(0);
                var last = tokens.get(tokens.size() - 1);
                var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass());
                Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}",
                    cm, first, last, (time.getTemporal()));
            }
        }
Example #12
0
        private Annotation PrepareAnnotation(string text, DateTime currentDate)
        {
            var pipeline = new AnnotationPipeline();

            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");

            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));
            var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt,"
                              + modelsDir + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDir + @"\sutime\english.sutime.txt";
            var props = new Properties();

            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var annotation = new Annotation(text);

            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd"));
            pipeline.annotate(annotation);
            return(annotation);
        }
        public void extractTime(string text)
        {
            sentenceInput = text;
            string presentDate = "2015-10-10";
            string curr = Environment.CurrentDirectory;
            var jarRoot = curr + @"\stanford-corenlp-3.5.2-models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";

            var props = new Properties();

            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            props.setProperty("sutime.markTimeRanges", "true");
            props.setProperty("sutime.includeRange", "true");

            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction

            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate);
            pipeline.annotate(annotation);

            //  Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;
            foreach (CoreMap cm in timexAnnsAll)
            {

                var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;

                string typeTimex = time.getTemporal().getTimexType().toString();
                if (typeTimex.ToLower() == "duration")
                {
                    typeTime = "tPeriod";
                    valueTime = time.getTemporal().toISOString();
                    Console.WriteLine(valueTime);
                }

                if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date")
                {
                    string textOftime = time.getText().ToString();

                    char[] delimiterChars = { ' ' };
                    string[] words = textOftime.Split(delimiterChars);

                    string mainword = words[0];
                    var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");

                    var sentences = MaxentTagger.tokenizeText(new StringReader(text));
                    var first = sentences.get(0) as ArrayList;
                    int size = first.size();

                    int i = 0;
                    int index = -3;
                    while (i < size)
                    {
                        if (first.get(i).ToString() == mainword)
                            index = i;

                        i++;
                    }
                    var taggedSentence = tagger.tagSentence(first);

                    string checker = taggedSentence.get(index - 1).ToString();
                    if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in")
                    {
                        typeTime = "tTrigger";
                        valueTime = "Start : " + time.getTemporal().toISOString();

                        Console.WriteLine(valueTime);

                    }

                    else if (checker.ToLower() == "before/in")
                    {
                        if (typeTimex == "TIME")
                        {
                            typeTime = "tTrigger";
                            valueTime = "End : " + time.getTemporal().toISOString();

                            Console.WriteLine(valueTime);
                        }
                        else
                        {
                            DateTime result = new DateTime();
                            DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture);
                            string dt = time.getTemporal().toString();
                            char[] delimiter = { '-', '-', '-' };
                            string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter);
                            int count = partsOfDate.Length;
                            if (count == 1)
                            {

                                result = Convert.ToDateTime("01-01-" + partsOfDate[0]);
                            }

                            if (count == 2)
                            {
                                result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]);
                            }

                            //  result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture);
                            int comp = DateTime.Compare(current, result);
                            if (comp < 0)
                            {
                                typeTime = "tTrigger";
                                valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString();
                                Console.WriteLine(valueTime);
                            }

                            else
                            {
                                typeTime = "tTrigger";
                                valueTime = "End : " + time.getTemporal().toString();
                                Console.WriteLine(valueTime);
                            }

                        }

                    }

                    else
                    {

                        typeTime = "tStamp";
                        valueTime = time.getTemporal().toISOString();
                        Console.WriteLine(valueTime);

                    }
                }
            }
        }