public KBPTokensregexExtractor(string tokensregexDir, bool verbose)
 {
     if (verbose)
     {
         logger.Log("Creating TokensRegexExtractor");
     }
     // Create extractors
     foreach (KBPRelationExtractor.RelationType rel in KBPRelationExtractor.RelationType.Values())
     {
         string relFileNameComponent = rel.canonicalName.ReplaceAll(":", "_");
         string path = tokensregexDir + File.separator + relFileNameComponent.ReplaceAll("/", "SLASH") + ".rules";
         if (IOUtils.ExistsInClasspathOrFileSystem(path))
         {
             IList <string> listFiles = new List <string>();
             listFiles.Add(tokensregexDir + File.separator + "defs.rules");
             listFiles.Add(path);
             if (verbose)
             {
                 logger.Log("Rule files for relation " + rel + " is " + path);
             }
             Env env = TokenSequencePattern.GetNewEnv();
             env.Bind("collapseExtractionRules", true);
             env.Bind("verbose", verbose);
             CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, listFiles).KeepTemporaryTags();
             rules[rel] = extr;
         }
     }
 }
        public virtual Pair <string, double> Classify(KBPRelationExtractor.KBPInput input)
        {
            // Annotate Sentence
            ICoreMap          sentenceAsMap = input.sentence.AsCoreMap(null);
            IList <CoreLabel> tokens        = sentenceAsMap.Get(typeof(CoreAnnotations.TokensAnnotation));

            // Annotate where the subject is
            foreach (int i in input.subjectSpan)
            {
                tokens[i].Set(typeof(KBPTokensregexExtractor.Subject), "true");
                if ("O".Equals(tokens[i].Ner()))
                {
                    tokens[i].SetNER(input.subjectType.name);
                }
            }
            // Annotate where the object is
            foreach (int i_1 in input.objectSpan)
            {
                tokens[i_1].Set(typeof(KBPTokensregexExtractor.Object), "true");
                if ("O".Equals(tokens[i_1].Ner()))
                {
                    tokens[i_1].SetNER(input.objectType.name);
                }
            }
            // Run Rules
            foreach (KBPRelationExtractor.RelationType rel in KBPRelationExtractor.RelationType.Values())
            {
                if (rules.Contains(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.Contains(input.objectType))
                {
                    CoreMapExpressionExtractor extractor   = rules[rel];
                    IList <MatchedExpression>  extractions = extractor.ExtractExpressions(sentenceAsMap);
                    if (extractions != null && extractions.Count > 0)
                    {
                        MatchedExpression best = MatchedExpression.GetBestMatched(extractions, MatchedExpression.ExprWeightScorer);
                        // Un-Annotate Sentence
                        foreach (CoreLabel token in tokens)
                        {
                            token.Remove(typeof(KBPTokensregexExtractor.Subject));
                            token.Remove(typeof(KBPTokensregexExtractor.Object));
                        }
                        return(Pair.MakePair(rel.canonicalName, best.GetWeight()));
                    }
                }
            }
            // Un-Annotate Sentence
            foreach (CoreLabel token_1 in tokens)
            {
                token_1.Remove(typeof(KBPTokensregexExtractor.Subject));
                token_1.Remove(typeof(KBPTokensregexExtractor.Object));
            }
            return(Pair.MakePair(KBPRelationExtractorConstants.NoRelation, 1.0));
        }
 public virtual void Init(Options options)
 {
     this.options = options;
     // NumberNormalizer.setVerbose(options.verbose); // cdm 2016: Try omitting this: Don't we want to see errors?
     CoreMapExpressionExtractor.SetVerbose(options.verbose);
     if (options.grammarFilename == null)
     {
         options.grammarFilename = Options.DefaultGrammarFiles;
         logger.Warning("Time rules file is not specified: using default rules at " + options.grammarFilename);
     }
     logger.Info("Using following SUTime rules: " + options.grammarFilename);
     timexPatterns            = new GenericTimeExpressionPatterns(options);
     this.expressionExtractor = timexPatterns.CreateExtractor();
 }
        public TokensRegexAnnotator(string name, Properties props)
        {
            string prefix = (name == null) ? string.Empty : name + '.';

            string[] files = PropertiesUtils.GetStringArray(props, prefix + "rules");
            env = TokenSequencePattern.GetNewEnv();
            env.Bind("options", options);
            if (PropertiesUtils.GetBool(props, prefix + "caseInsensitive"))
            {
                System.Console.Error.WriteLine("using case insensitive!");
                env.SetDefaultStringMatchFlags(NodePattern.CaseInsensitive | Pattern.UnicodeCase);
                env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase);
            }
            if (files.Length != 0)
            {
                extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files);
            }
            else
            {
                extractor = null;
            }
            verbose = PropertiesUtils.GetBool(props, prefix + "verbose", false);
            options.setTokenOffsets   = PropertiesUtils.GetBool(props, prefix + "setTokenOffsets", options.setTokenOffsets);
            options.extractWithTokens = PropertiesUtils.GetBool(props, prefix + "extractWithTokens", options.extractWithTokens);
            options.flatten           = PropertiesUtils.GetBool(props, prefix + "flatten", options.flatten);
            string matchedExpressionsAnnotationKeyName = props.GetProperty(prefix + "matchedExpressionsAnnotationKey");

            if (matchedExpressionsAnnotationKeyName != null)
            {
                options.matchedExpressionsAnnotationKey = EnvLookup.LookupAnnotationKeyWithClassname(env, matchedExpressionsAnnotationKeyName);
                if (options.matchedExpressionsAnnotationKey == null)
                {
                    string propName = prefix + "matchedExpressionsAnnotationKey";
                    throw new Exception("Cannot determine annotation key for " + propName + '=' + matchedExpressionsAnnotationKeyName);
                }
            }
        }
 public TokensRegexAnnotator(params string[] files)
 {
     env       = TokenSequencePattern.GetNewEnv();
     extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files);
     verbose   = false;
 }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string rules;

            if (args.Length > 0)
            {
                rules = args[0];
            }
            else
            {
                rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt";
            }
            PrintWriter @out;

            if (args.Length > 2)
            {
                @out = new PrintWriter(args[2]);
            }
            else
            {
                @out = new PrintWriter(System.Console.Out);
            }
            CoreMapExpressionExtractor <MatchedExpression> extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(TokenSequencePattern.GetNewEnv(), rules);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation;

            if (args.Length > 1)
            {
                annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1]));
            }
            else
            {
                annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three");
            }
            pipeline.Annotate(annotation);
            // An Annotation is a Map and you can get and use the various analyses individually.
            @out.Println();
            // The toString() method on an Annotation just prints the text of the Annotation
            // But you can see what is in it with other methods like toShorterString()
            @out.Println("The top level annotation");
            @out.Println(annotation.ToShorterString());
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                @out.Println("Sentence #" + ++i);
                foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
                {
                    @out.Println("  Token: " + "word=" + token.Get(typeof(CoreAnnotations.TextAnnotation)) + ",  pos=" + token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)) + ", ne=" + token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)));
                }
                IList <MatchedExpression> matchedExpressions = extractor.ExtractExpressions(sentence);
                foreach (MatchedExpression matched in matchedExpressions)
                {
                    // Print out matched text and value
                    @out.Println("Matched expression: " + matched.GetText() + " with value " + matched.GetValue());
                    // Print out token information
                    ICoreMap cm = matched.GetAnnotation();
                    foreach (CoreLabel token_1 in cm.Get(typeof(CoreAnnotations.TokensAnnotation)))
                    {
                        string word  = token_1.Get(typeof(CoreAnnotations.TextAnnotation));
                        string lemma = token_1.Get(typeof(CoreAnnotations.LemmaAnnotation));
                        string pos   = token_1.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                        string ne    = token_1.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                        @out.Println("  Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne);
                    }
                }
            }
            @out.Flush();
        }
        private CoreMapExpressionExtractor <MatchedExpression> CreateExtractor()
        {
            IList <string> filenames = StringUtils.Split(options.grammarFilename, "\\s*[,;]\\s*");

            return(CoreMapExpressionExtractor.CreateExtractorFromFiles(env, filenames));
        }
 public virtual void Init(Options options)
 {
     this.options = options;
     InitEnv();
     extractor = CreateExtractor();
 }
        public virtual CoreMapExpressionExtractor CreateExtractor()
        {
            IList <string> filenames = StringUtils.Split(options.grammarFilename, "\\s*[,;]\\s*");

            return(CoreMapExpressionExtractor.CreateExtractorFromFiles(env, filenames));
        }