public void ThreeInterestingDates() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var filePath = Files.CoreNlp.Models("pos-tagger/english-left3words-distsim.tagger"); var tagger = new MaxentTagger(filePath); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var props = Java.Props(new Dictionary <string, string> { { "sutime.binders", "0" }, { "sutime.rules", string.Join(",", Files.CoreNlp.Models("sutime/defs.sutime.txt"), Files.CoreNlp.Models("sutime/english.sutime.txt"), Files.CoreNlp.Models("sutime/english.holidays.sutime.txt")) } }); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(Java.GetAnnotationClass <CoreAnnotations.DocDateAnnotation>(), "2013-07-14"); pipeline.annotate(annotation); TestContext.Out.WriteLine(annotation.get(Java.GetAnnotationClass <CoreAnnotations.TextAnnotation>())); var timexAnnsAll = (java.util.ArrayList)annotation.get(Java.GetAnnotationClass <TimeAnnotations.TimexAnnotations>()); Assert.Greater(timexAnnsAll.size(), 0); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(Java.GetAnnotationClass <CoreAnnotations.TokensAnnotation>()); Assert.Greater(tokens.size(), 0); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(Java.GetAnnotationClass <TimeExpression.Annotation>()); Assert.IsNotNull(time, "Time expression is null"); TestContext.Out.WriteLine($"{cm} [from char offset '{first}' to '{last}'] --> {time.getTemporal()}"); } }
private static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.8.0-models.jar` var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2017-06-09\models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // Loading POS Tagger and including them into pipeline var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-left3words\english-left3words-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List; var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal()); } }
private static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // Loading POS Tagger and including them into pipeline var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List; var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal()); } }
public void SUTimeDefautTest() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger( Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; var props = new Properties(); props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass()) + "\n"); var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass()); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass()); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass()); Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}", cm, first, last, (time.getTemporal())); } }
private Annotation PrepareAnnotation(string text, DateTime currentDate) { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt," + modelsDir + @"\sutime\english.holidays.sutime.txt," + modelsDir + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd")); pipeline.annotate(annotation); return annotation; }
public void SUTimeDefautTest() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger( Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; var props = new Properties(); props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n"); var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass()); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass()); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass()); Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}", cm, first, last, (time.getTemporal())); } }
private Annotation PrepareAnnotation(string text, DateTime currentDate) { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt," + modelsDir + @"\sutime\english.holidays.sutime.txt," + modelsDir + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd")); pipeline.annotate(annotation); return(annotation); }
public void extractTime(string text) { sentenceInput = text; string presentDate = "2015-10-10"; string curr = Environment.CurrentDirectory; var jarRoot = curr + @"\stanford-corenlp-3.5.2-models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); props.setProperty("sutime.markTimeRanges", "true"); props.setProperty("sutime.includeRange", "true"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate); pipeline.annotate(annotation); // Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; string typeTimex = time.getTemporal().getTimexType().toString(); if (typeTimex.ToLower() == "duration") { typeTime = "tPeriod"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date") { string textOftime = time.getText().ToString(); char[] delimiterChars = { ' ' }; string[] words = textOftime.Split(delimiterChars); string mainword = words[0]; var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); var sentences = MaxentTagger.tokenizeText(new StringReader(text)); var first = sentences.get(0) as ArrayList; int size = first.size(); int i = 0; int index = -3; while (i < size) { if (first.get(i).ToString() == mainword) index = i; i++; } var taggedSentence = tagger.tagSentence(first); string checker = taggedSentence.get(index - 1).ToString(); if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in") { typeTime = "tTrigger"; valueTime = "Start : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else if (checker.ToLower() == "before/in") { if (typeTimex == "TIME") { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else { DateTime result = new DateTime(); DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); string dt = time.getTemporal().toString(); char[] delimiter = { '-', '-', '-' }; string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter); int count = partsOfDate.Length; if (count == 1) { result = Convert.ToDateTime("01-01-" + partsOfDate[0]); } if (count == 2) { result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]); } // result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); int comp = DateTime.Compare(current, result); if (comp < 0) { typeTime = "tTrigger"; valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString(); Console.WriteLine(valueTime); } else { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toString(); Console.WriteLine(valueTime); } } } else { typeTime = "tStamp"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } } } }