/// <summary>Process a single file or line of standard in.</summary> /// <param name="pipeline">The annotation pipeline to run the lines of the input through.</param> /// <param name="docid">The docid of the document we are extracting.</param> /// <param name="document">the document to annotate.</param> private static void ProcessDocument(AnnotationPipeline pipeline, string docid, string document) { // Error checks if (document.Trim().IsEmpty()) { return; } // Annotate the document Annotation ann = new Annotation(document); pipeline.Annotate(ann); // Get the extractions bool empty = true; lock (Output) { foreach (ICoreMap sentence in ann.Get(typeof(CoreAnnotations.SentencesAnnotation))) { foreach (RelationTriple extraction in sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation))) { // Print the extractions Output.WriteLine(TripleToString(extraction, docid, sentence)); empty = false; } } } if (empty) { log.Info("No extractions in: " + ("stdin".Equals(docid) ? document : docid)); } }
static SUTimeSimpleParser() { // = 0; // = 0; pipeline = MakeNumericPipeline(); cache = Generics.NewHashMap(); }
/// <summary>TODO(gabor) JavaDoc</summary> /// <param name="sentence"/> /// <param name="pipeline"/> public static void Annotate(ICoreMap sentence, AnnotationPipeline pipeline) { Annotation ann = new Annotation(StringUtils.Join(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)), " ")); ann.Set(typeof(CoreAnnotations.TokensAnnotation), sentence.Get(typeof(CoreAnnotations.TokensAnnotation))); ann.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence)); pipeline.Annotate(ann); }
private static AnnotationPipeline MakeNumericPipeline() { AnnotationPipeline pipeline = new AnnotationPipeline(); pipeline.AddAnnotator(new TokenizerAnnotator(false, "en")); pipeline.AddAnnotator(new WordsToSentencesAnnotator(false)); pipeline.AddAnnotator(new POSTaggerAnnotator(false)); pipeline.AddAnnotator(new TimeAnnotator(true)); return(pipeline); }
public LangProcessing() { // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); Properties props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos"); pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); }
public void ThreeInterestingDates() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var filePath = Files.CoreNlp.Models("pos-tagger/english-left3words-distsim.tagger"); var tagger = new MaxentTagger(filePath); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var props = Java.Props(new Dictionary <string, string> { { "sutime.binders", "0" }, { "sutime.rules", string.Join(",", Files.CoreNlp.Models("sutime/defs.sutime.txt"), Files.CoreNlp.Models("sutime/english.sutime.txt"), Files.CoreNlp.Models("sutime/english.holidays.sutime.txt")) } }); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(Java.GetAnnotationClass <CoreAnnotations.DocDateAnnotation>(), "2013-07-14"); pipeline.annotate(annotation); TestContext.Out.WriteLine(annotation.get(Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>())); var timexAnnsAll = (java.util.ArrayList)annotation.get(Java.GetAnnotationClass <TimeAnnotations.TimexAnnotations>()); Assert.Greater(timexAnnsAll.size(), 0); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>()); Assert.Greater(tokens.size(), 0); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(Java.GetAnnotationClass <TimeExpression.Annotation>()); Assert.IsNotNull(time, "Time expression is null"); TestContext.Out.WriteLine($"{cm} [from char offset '{first}' to '{last}'] --> {time.getTemporal()}"); } }
private static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.8.0-models.jar` var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2017-06-09\models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // Loading POS Tagger and including them into pipeline var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-left3words\english-left3words-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List; var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal()); } }
private static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // Loading POS Tagger and including them into pipeline var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List; var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal()); } }
public void SUTimeDefautTest() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger( Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; var props = new Properties(); props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass()) + "\n"); var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass()); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass()); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass()); Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}", cm, first, last, (time.getTemporal())); } }
private Annotation PrepareAnnotation(string text, DateTime currentDate) { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt," + modelsDir + @"\sutime\english.holidays.sutime.txt," + modelsDir + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd")); pipeline.annotate(annotation); return annotation; }
public void SUTimeDefautTest() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger( Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; var props = new Properties(); props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n"); var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass()); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass()); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass()); Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}", cm, first, last, (time.getTemporal())); } }
private Annotation PrepareAnnotation(string text, DateTime currentDate) { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt," + modelsDir + @"\sutime\english.holidays.sutime.txt," + modelsDir + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd")); pipeline.annotate(annotation); return(annotation); }
public void extractTime(string text) { sentenceInput = text; string presentDate = "2015-10-10"; string curr = Environment.CurrentDirectory; var jarRoot = curr + @"\stanford-corenlp-3.5.2-models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); props.setProperty("sutime.markTimeRanges", "true"); props.setProperty("sutime.includeRange", "true"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate); pipeline.annotate(annotation); // Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; string typeTimex = time.getTemporal().getTimexType().toString(); if (typeTimex.ToLower() == "duration") { typeTime = "tPeriod"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date") { string textOftime = time.getText().ToString(); char[] delimiterChars = { ' ' }; string[] words = textOftime.Split(delimiterChars); string mainword = words[0]; var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); var sentences = MaxentTagger.tokenizeText(new StringReader(text)); var first = sentences.get(0) as ArrayList; int size = first.size(); int i = 0; int index = -3; while (i < size) { if (first.get(i).ToString() == mainword) index = i; i++; } var taggedSentence = tagger.tagSentence(first); string checker = taggedSentence.get(index - 1).ToString(); if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in") { typeTime = "tTrigger"; valueTime = "Start : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else if (checker.ToLower() == "before/in") { if (typeTimex == "TIME") { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else { DateTime result = new DateTime(); DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); string dt = time.getTemporal().toString(); char[] delimiter = { '-', '-', '-' }; string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter); int count = partsOfDate.Length; if (count == 1) { result = Convert.ToDateTime("01-01-" + partsOfDate[0]); } if (count == 2) { result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]); } // result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); int comp = DateTime.Compare(current, result); if (comp < 0) { typeTime = "tTrigger"; valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString(); Console.WriteLine(valueTime); } else { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toString(); Console.WriteLine(valueTime); } } } else { typeTime = "tStamp"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } } } }