private void InitEnv()
 {
     env = TokenSequencePattern.GetNewEnv();
     env.SetDefaultTokensAnnotationKey(typeof(CoreAnnotations.NumerizedTokensAnnotation));
     // Do case insensitive matching
     env.SetDefaultStringMatchFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase);
     env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase);
     try
     {
         Units.RegisterUnits(env, options.unitsFilename);
     }
     catch (IOException ex)
     {
         throw new Exception("Error loading units from " + options.unitsFilename, ex);
     }
     try
     {
         UnitPrefix.RegisterPrefixes(env, options.prefixFilename);
     }
     catch (IOException ex)
     {
         throw new Exception("Error loading prefixes from " + options.prefixFilename, ex);
     }
     env.Bind("options", options);
     env.Bind("numcomptype", typeof(CoreAnnotations.NumericCompositeTypeAnnotation));
     env.Bind("numcompvalue", typeof(CoreAnnotations.NumericCompositeValueAnnotation));
 }
コード例 #2
0
 public KBPTokensregexExtractor(string tokensregexDir, bool verbose)
 {
     if (verbose)
     {
         logger.Log("Creating TokensRegexExtractor");
     }
     // Create extractors
     foreach (KBPRelationExtractor.RelationType rel in KBPRelationExtractor.RelationType.Values())
     {
         string relFileNameComponent = rel.canonicalName.ReplaceAll(":", "_");
         string path = tokensregexDir + File.separator + relFileNameComponent.ReplaceAll("/", "SLASH") + ".rules";
         if (IOUtils.ExistsInClasspathOrFileSystem(path))
         {
             IList <string> listFiles = new List <string>();
             listFiles.Add(tokensregexDir + File.separator + "defs.rules");
             listFiles.Add(path);
             if (verbose)
             {
                 logger.Log("Rule files for relation " + rel + " is " + path);
             }
             Env env = TokenSequencePattern.GetNewEnv();
             env.Bind("collapseExtractionRules", true);
             env.Bind("verbose", verbose);
             CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, listFiles).KeepTemporaryTags();
             rules[rel] = extr;
         }
     }
 }
コード例 #3
0
        public TokensRegexAnnotator(string name, Properties props)
        {
            string prefix = (name == null) ? string.Empty : name + '.';

            string[] files = PropertiesUtils.GetStringArray(props, prefix + "rules");
            env = TokenSequencePattern.GetNewEnv();
            env.Bind("options", options);
            if (PropertiesUtils.GetBool(props, prefix + "caseInsensitive"))
            {
                System.Console.Error.WriteLine("using case insensitive!");
                env.SetDefaultStringMatchFlags(NodePattern.CaseInsensitive | Pattern.UnicodeCase);
                env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase);
            }
            if (files.Length != 0)
            {
                extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files);
            }
            else
            {
                extractor = null;
            }
            verbose = PropertiesUtils.GetBool(props, prefix + "verbose", false);
            options.setTokenOffsets   = PropertiesUtils.GetBool(props, prefix + "setTokenOffsets", options.setTokenOffsets);
            options.extractWithTokens = PropertiesUtils.GetBool(props, prefix + "extractWithTokens", options.extractWithTokens);
            options.flatten           = PropertiesUtils.GetBool(props, prefix + "flatten", options.flatten);
            string matchedExpressionsAnnotationKeyName = props.GetProperty(prefix + "matchedExpressionsAnnotationKey");

            if (matchedExpressionsAnnotationKeyName != null)
            {
                options.matchedExpressionsAnnotationKey = EnvLookup.LookupAnnotationKeyWithClassname(env, matchedExpressionsAnnotationKeyName);
                if (options.matchedExpressionsAnnotationKey == null)
                {
                    string propName = prefix + "matchedExpressionsAnnotationKey";
                    throw new Exception("Cannot determine annotation key for " + propName + '=' + matchedExpressionsAnnotationKeyName);
                }
            }
        }
コード例 #4
0
 public TokensRegexAnnotator(params string[] files)
 {
     env       = TokenSequencePattern.GetNewEnv();
     extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files);
     verbose   = false;
 }
コード例 #5
0
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string rules;

            if (args.Length > 0)
            {
                rules = args[0];
            }
            else
            {
                rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
            }
            PrintWriter @out;

            if (args.Length > 2)
            {
                @out = new PrintWriter(args[2]);
            }
            else
            {
                @out = new PrintWriter(System.Console.Out);
            }
            CoreMapExpressionExtractor <MatchedExpression> extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(TokenSequencePattern.GetNewEnv(), rules);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation;

            if (args.Length > 1)
            {
                annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1]));
            }
            else
            {
                annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
            }
            pipeline.Annotate(annotation);
            // An Annotation is a Map and you can get and use the various analyses individually.
            @out.Println();
            // The toString() method on an Annotation just prints the text of the Annotation
            // But you can see what is in it with other methods like toShorterString()
            @out.Println("The top level annotation");
            @out.Println(annotation.ToShorterString());
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                @out.Println("Sentence #" + ++i);
                foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
                {
                    @out.Println("  Token: " + "word=" + token.Get(typeof(CoreAnnotations.TextAnnotation)) + ",  pos=" + token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)) + ", ne=" + token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)));
                }
                IList <MatchedExpression> matchedExpressions = extractor.ExtractExpressions(sentence);
                foreach (MatchedExpression matched in matchedExpressions)
                {
                    // Print out matched text and value
                    @out.Println("Matched expression: " + matched.GetText() + " with value " + matched.GetValue());
                    // Print out token information
                    ICoreMap cm = matched.GetAnnotation();
                    foreach (CoreLabel token_1 in cm.Get(typeof(CoreAnnotations.TokensAnnotation)))
                    {
                        string word  = token_1.Get(typeof(CoreAnnotations.TextAnnotation));
                        string lemma = token_1.Get(typeof(CoreAnnotations.LemmaAnnotation));
                        string pos   = token_1.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                        string ne    = token_1.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                        @out.Println("  Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
                    }
                }
            }
            @out.Flush();
        }
 private void InitEnv()
 {
     env = TokenSequencePattern.GetNewEnv();
     env.SetDefaultResultsAnnotationExtractor(TimeExpression.TimeExpressionConverter);
     env.SetDefaultTokensAnnotationKey(typeof(CoreAnnotations.NumerizedTokensAnnotation));
     env.SetDefaultResultAnnotationKey(typeof(TimeExpression.Annotation));
     env.SetDefaultNestedResultsAnnotationKey(typeof(TimeExpression.ChildrenAnnotation));
     env.SetDefaultTokensAggregators(CoreMapAttributeAggregator.DefaultNumericTokensAggregators);
     env.Bind("nested", typeof(TimeExpression.ChildrenAnnotation));
     env.Bind("time", new TimeFormatter.TimePatternExtractRuleCreator());
     // Do case insensitive matching
     env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase);
     env.Bind("options", options);
     env.Bind("TIME_REF", SUTime.TimeRef);
     env.Bind("TIME_REF_UNKNOWN", SUTime.TimeRefUnknown);
     env.Bind("TIME_UNKNOWN", SUTime.TimeUnknown);
     env.Bind("TIME_NONE", SUTime.TimeNone);
     env.Bind("ERA_AD", SUTime.EraAd);
     env.Bind("ERA_BC", SUTime.EraBc);
     env.Bind("ERA_UNKNOWN", SUTime.EraUnknown);
     env.Bind("HALFDAY_AM", SUTime.HalfdayAm);
     env.Bind("HALFDAY_PM", SUTime.HalfdayPm);
     env.Bind("HALFDAY_UNKNOWN", SUTime.HalfdayUnknown);
     env.Bind("RESOLVE_TO_THIS", SUTime.ResolveToThis);
     env.Bind("RESOLVE_TO_PAST", SUTime.ResolveToPast);
     env.Bind("RESOLVE_TO_FUTURE", SUTime.ResolveToFuture);
     env.Bind("RESOLVE_TO_CLOSEST", SUTime.ResolveToClosest);
     env.Bind("numcomptype", typeof(CoreAnnotations.NumericCompositeTypeAnnotation));
     env.Bind("numcompvalue", typeof(CoreAnnotations.NumericCompositeValueAnnotation));
     env.Bind("temporal", typeof(TimeExpression.Annotation));
     //    env.bind("tags", SequenceMatchRules.Tags.TagsAnnotation.class);
     env.Bind("::IS_TIMEX_DATE", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Date));
     env.Bind("::IS_TIMEX_DURATION", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Duration));
     env.Bind("::IS_TIMEX_TIME", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Time));
     env.Bind("::IS_TIMEX_SET", new GenericTimeExpressionPatterns.TimexTypeMatchNodePattern(SUTime.TimexType.Set));
     env.Bind("::IS_TIME_UNIT", new GenericTimeExpressionPatterns.MatchedExpressionValueTypeMatchNodePattern("TIMEUNIT"));
     env.Bind("::MONTH", new GenericTimeExpressionPatterns.MatchedExpressionValueTypeMatchNodePattern("MONTH_OF_YEAR"));
     env.Bind("::DAYOFWEEK", new GenericTimeExpressionPatterns.MatchedExpressionValueTypeMatchNodePattern("DAY_OF_WEEK"));
     // BINDINGS for parsing from file!!!!!!!
     foreach (SUTime.TemporalOp t in SUTime.TemporalOp.Values())
     {
         env.Bind(t.ToString(), new Expressions.PrimitiveValue <SUTime.TemporalOp>("TemporalOp", t));
     }
     foreach (SUTime.TimeUnit t_1 in SUTime.TimeUnit.Values())
     {
         if (!t_1.Equals(SUTime.TimeUnit.Unknown))
         {
             //env.bind(t.name(), new SequenceMatchRules.PrimitiveValue<SUTime.Temporal>("DURATION", t.getDuration(), "TIMEUNIT"));
             env.Bind(t_1.ToString(), new Expressions.PrimitiveValue <SUTime.Temporal>("TIMEUNIT", t_1.GetDuration()));
         }
     }
     foreach (SUTime.StandardTemporalType t_2 in SUTime.StandardTemporalType.Values())
     {
         env.Bind(t_2.ToString(), new Expressions.PrimitiveValue <SUTime.StandardTemporalType>("TemporalType", t_2));
     }
     env.Bind("Duration", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_124("Duration")));
     // New so we get different time ids
     // TODO: Check args
     // TODO: Handle Strings...
     // TODO: This should already be in durations....
     //String durationUnitString = (durationUnitTokens != null)? durationUnitTokens.get(0).get(CoreAnnotations.TextAnnotation.class):null;
     //SUTime.Duration durationUnit = getDuration(durationUnitString);
     // TODO: Handle inexactness
     // Create duration range...
     // Add begin and end times
     env.Bind("DayOfWeek", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_212("DayOfWeek")));
     env.Bind("MonthOfYear", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_235("MonthOfYear")));
     env.Bind("MakePeriodicTemporalSet", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_258("MakePeriodicTemporalSet")));
     // First argument is the temporal acting as the base of the periodic set
     // Second argument is the quantifier (string)
     // Third argument is the multiple (how much to scale the natural period)
     /*"P1X"*/
     env.Bind("TemporalCompose", new Expressions.PrimitiveValue <IValueFunction>(Expressions.TypeFunction, new _NamedValueFunction_328("TemporalCompose")));
 }