private void InitEnv() { env = TokenSequencePattern.GetNewEnv(); env.SetDefaultTokensAnnotationKey(typeof(CoreAnnotations.NumerizedTokensAnnotation)); // Do case insensitive matching env.SetDefaultStringMatchFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase); env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase); try { Units.RegisterUnits(env, options.unitsFilename); } catch (IOException ex) { throw new Exception("Error loading units from " + options.unitsFilename, ex); } try { UnitPrefix.RegisterPrefixes(env, options.prefixFilename); } catch (IOException ex) { throw new Exception("Error loading prefixes from " + options.prefixFilename, ex); } env.Bind("options", options); env.Bind("numcomptype", typeof(CoreAnnotations.NumericCompositeTypeAnnotation)); env.Bind("numcompvalue", typeof(CoreAnnotations.NumericCompositeValueAnnotation)); }
public KBPTokensregexExtractor(string tokensregexDir, bool verbose) { if (verbose) { logger.Log("Creating TokensRegexExtractor"); } // Create extractors foreach (KBPRelationExtractor.RelationType rel in KBPRelationExtractor.RelationType.Values()) { string relFileNameComponent = rel.canonicalName.ReplaceAll(":", "_"); string path = tokensregexDir + File.separator + relFileNameComponent.ReplaceAll("/", "SLASH") + ".rules"; if (IOUtils.ExistsInClasspathOrFileSystem(path)) { IList <string> listFiles = new List <string>(); listFiles.Add(tokensregexDir + File.separator + "defs.rules"); listFiles.Add(path); if (verbose) { logger.Log("Rule files for relation " + rel + " is " + path); } Env env = TokenSequencePattern.GetNewEnv(); env.Bind("collapseExtractionRules", true); env.Bind("verbose", verbose); CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, listFiles).KeepTemporaryTags(); rules[rel] = extr; } } }
public TokensRegexAnnotator(string name, Properties props) { string prefix = (name == null) ? string.Empty : name + '.'; string[] files = PropertiesUtils.GetStringArray(props, prefix + "rules"); env = TokenSequencePattern.GetNewEnv(); env.Bind("options", options); if (PropertiesUtils.GetBool(props, prefix + "caseInsensitive")) { System.Console.Error.WriteLine("using case insensitive!"); env.SetDefaultStringMatchFlags(NodePattern.CaseInsensitive | Pattern.UnicodeCase); env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase); } if (files.Length != 0) { extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files); } else { extractor = null; } verbose = PropertiesUtils.GetBool(props, prefix + "verbose", false); options.setTokenOffsets = PropertiesUtils.GetBool(props, prefix + "setTokenOffsets", options.setTokenOffsets); options.extractWithTokens = PropertiesUtils.GetBool(props, prefix + "extractWithTokens", options.extractWithTokens); options.flatten = PropertiesUtils.GetBool(props, prefix + "flatten", options.flatten); string matchedExpressionsAnnotationKeyName = props.GetProperty(prefix + "matchedExpressionsAnnotationKey"); if (matchedExpressionsAnnotationKeyName != null) { options.matchedExpressionsAnnotationKey = EnvLookup.LookupAnnotationKeyWithClassname(env, matchedExpressionsAnnotationKeyName); if (options.matchedExpressionsAnnotationKey == null) { string propName = prefix + "matchedExpressionsAnnotationKey"; throw new Exception("Cannot determine annotation key for " + propName + '=' + matchedExpressionsAnnotationKeyName); } } }
public TokensRegexAnnotator(params string[] files) { env = TokenSequencePattern.GetNewEnv(); extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files); verbose = false; }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string rules; if (args.Length > 0) { rules = args[0]; } else { rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt"; } PrintWriter @out; if (args.Length > 2) { @out = new PrintWriter(args[2]); } else { @out = new PrintWriter(System.Console.Out); } CoreMapExpressionExtractor <MatchedExpression> extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(TokenSequencePattern.GetNewEnv(), rules); StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner")); Annotation annotation; if (args.Length > 1) { annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1])); } else { annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three"); } pipeline.Annotate(annotation); // An Annotation is a Map and you can get and use the various analyses individually. @out.Println(); // The toString() method on an Annotation just prints the text of the Annotation // But you can see what is in it with other methods like toShorterString() @out.Println("The top level annotation"); @out.Println(annotation.ToShorterString()); IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); int i = 0; foreach (ICoreMap sentence in sentences) { @out.Println("Sentence #" + ++i); foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation))) { @out.Println(" Token: " + "word=" + token.Get(typeof(CoreAnnotations.TextAnnotation)) + ", pos=" + token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)) + ", ne=" + token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation))); } IList <MatchedExpression> matchedExpressions = extractor.ExtractExpressions(sentence); foreach (MatchedExpression matched in matchedExpressions) { // Print out matched text and value @out.Println("Matched expression: " + matched.GetText() + " with value " + matched.GetValue()); // Print out token information ICoreMap cm = matched.GetAnnotation(); foreach (CoreLabel token_1 in cm.Get(typeof(CoreAnnotations.TokensAnnotation))) { string word = token_1.Get(typeof(CoreAnnotations.TextAnnotation)); string lemma = token_1.Get(typeof(CoreAnnotations.LemmaAnnotation)); string pos = token_1.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); string ne = token_1.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); @out.Println(" Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne); } } } @out.Flush(); }
private void InitEnv() { env = TokenSequencePattern.GetNewEnv(); env.SetDefaultResultsAnnotationExtractor(TimeExpression.TimeExpressionConverter); env.SetDefaultTokensAnnotationKey(typeof(CoreAnnotations.NumerizedTokensAnnotation)); env.SetDefaultResultAnnotationKey(typeof(TimeExpression.Annotation)); env.SetDefaultNestedResultsAnnotationKey(typeof(TimeExpression.ChildrenAnnotation)); env.SetDefaultTokensAggregators(CoreMapAttributeAggregator.DefaultNumericTokensAggregators); env.Bind("nested", typeof(TimeExpression.ChildrenAnnotation)); env.Bind("time", new TimeFormatter.TimePatternExtractRuleCreator()); // Do case insensitive matching env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase); env.Bind("options", options); env.Bind("TIME_REF", SUTime.TimeRef); env.Bind("TIME_REF_UNKNOWN", SUTime.TimeRefUnknown); env.Bind("TIME_UNKNOWN", SUTime.TimeUnknown); env.Bind("TIME_NONE", SUTime.TimeNone); env.Bind("ERA_AD", SUTime.EraAd); env.Bind("ERA_BC", SUTime.EraBc); env.Bind("ERA_UNKNOWN", SUTime.EraUnknown); env.Bind("HALFDAY_AM", SUTime.HalfdayAm); env.Bind("HALFDAY_PM", SUTime.HalfdayPm); env.Bind("HALFDAY_UNKNOWN", SUTime.HalfdayUnknown); env.Bind("RESOLVE_TO_THIS", SUTime.ResolveToThis); env.Bind("RESOLVE_TO_PAST", SUTime.ResolveToPast); env.Bind("RESOLVE_TO_FUTURE", SUTime.ResolveToFuture); env.Bind("RESOLVE_TO_CLOSEST", SUTime.ResolveToClosest); env.Bind("numcomptype", typeof(CoreAnnotations.NumericCompositeTypeAnnotation)); env.Bind("numcompvalue", typeof(CoreAnnotations.NumericCompositeValueAnnotation)); env.Bind("temporal", typeof(TimeExpression.Annotation)); // env.bind("tags", SequenceMatchRules.Tags.TagsAnnotation.class); env.Bind("::IS_TIMEX_DATE", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Date)); env.Bind("::IS_TIMEX_DURATION", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Duration)); env.Bind("::IS_TIMEX_TIME", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Time)); env.Bind("::IS_TIMEX_SET", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Set)); env.Bind("::IS_TIME_UNIT", new GenericTimeExpressionPatterns.MatchedExpressionValueTypeMatchNodePattern("TIMEUNIT")); env.Bind("::MONTH", new GenericTimeExpressionPatterns.MatchedExpressionValueTypeMatchNodePattern("MONTH_OF_YEAR")); env.Bind("::DAYOFWEEK", new GenericTimeExpressionPatterns.MatchedExpressionValueTypeMatchNodePattern("DAY_OF_WEEK")); // BINDINGS for parsing from file!!!!!!! foreach (SUTime.TemporalOp t in SUTime.TemporalOp.Values()) { env.Bind(t.ToString(), new Expressions.PrimitiveValue <SUTime.TemporalOp>("TemporalOp", t)); } foreach (SUTime.TimeUnit t_1 in SUTime.TimeUnit.Values()) { if (!t_1.Equals(SUTime.TimeUnit.Unknown)) { //env.bind(t.name(), new SequenceMatchRules.PrimitiveValue<SUTime.Temporal>("DURATION", t.getDuration(), "TIMEUNIT")); env.Bind(t_1.ToString(), new Expressions.PrimitiveValue <SUTime.Temporal>("TIMEUNIT", t_1.GetDuration())); } } foreach (SUTime.StandardTemporalType t_2 in SUTime.StandardTemporalType.Values()) { env.Bind(t_2.ToString(), new Expressions.PrimitiveValue <SUTime.StandardTemporalType>("TemporalType", t_2)); } env.Bind("Duration", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_124("Duration"))); // New so we get different time ids // TODO: Check args // TODO: Handle Strings... // TODO: This should already be in durations.... //String durationUnitString = (durationUnitTokens != null)? durationUnitTokens.get(0).get(CoreAnnotations.TextAnnotation.class):null; //SUTime.Duration durationUnit = getDuration(durationUnitString); // TODO: Handle inexactness // Create duration range... // Add begin and end times env.Bind("DayOfWeek", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_212("DayOfWeek"))); env.Bind("MonthOfYear", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_235("MonthOfYear"))); env.Bind("MakePeriodicTemporalSet", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_258("MakePeriodicTemporalSet"))); // First argument is the temporal acting as the base of the periodic set // Second argument is the quantifier (string) // Third argument is the multiple (how much to scale the natural period) /*"P1X"*/ env.Bind("TemporalCompose", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_328("TemporalCompose"))); }