public KBPTokensregexExtractor(string tokensregexDir, bool verbose) { if (verbose) { logger.Log("Creating TokensRegexExtractor"); } // Create extractors foreach (KBPRelationExtractor.RelationType rel in KBPRelationExtractor.RelationType.Values()) { string relFileNameComponent = rel.canonicalName.ReplaceAll(":", "_"); string path = tokensregexDir + File.separator + relFileNameComponent.ReplaceAll("/", "SLASH") + ".rules"; if (IOUtils.ExistsInClasspathOrFileSystem(path)) { IList <string> listFiles = new List <string>(); listFiles.Add(tokensregexDir + File.separator + "defs.rules"); listFiles.Add(path); if (verbose) { logger.Log("Rule files for relation " + rel + " is " + path); } Env env = TokenSequencePattern.GetNewEnv(); env.Bind("collapseExtractionRules", true); env.Bind("verbose", verbose); CoreMapExpressionExtractor extr = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, listFiles).KeepTemporaryTags(); rules[rel] = extr; } } }
public virtual Pair <string, double> Classify(KBPRelationExtractor.KBPInput input) { // Annotate Sentence ICoreMap sentenceAsMap = input.sentence.AsCoreMap(null); IList <CoreLabel> tokens = sentenceAsMap.Get(typeof(CoreAnnotations.TokensAnnotation)); // Annotate where the subject is foreach (int i in input.subjectSpan) { tokens[i].Set(typeof(KBPTokensregexExtractor.Subject), "true"); if ("O".Equals(tokens[i].Ner())) { tokens[i].SetNER(input.subjectType.name); } } // Annotate where the object is foreach (int i_1 in input.objectSpan) { tokens[i_1].Set(typeof(KBPTokensregexExtractor.Object), "true"); if ("O".Equals(tokens[i_1].Ner())) { tokens[i_1].SetNER(input.objectType.name); } } // Run Rules foreach (KBPRelationExtractor.RelationType rel in KBPRelationExtractor.RelationType.Values()) { if (rules.Contains(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.Contains(input.objectType)) { CoreMapExpressionExtractor extractor = rules[rel]; IList <MatchedExpression> extractions = extractor.ExtractExpressions(sentenceAsMap); if (extractions != null && extractions.Count > 0) { MatchedExpression best = MatchedExpression.GetBestMatched(extractions, MatchedExpression.ExprWeightScorer); // Un-Annotate Sentence foreach (CoreLabel token in tokens) { token.Remove(typeof(KBPTokensregexExtractor.Subject)); token.Remove(typeof(KBPTokensregexExtractor.Object)); } return(Pair.MakePair(rel.canonicalName, best.GetWeight())); } } } // Un-Annotate Sentence foreach (CoreLabel token_1 in tokens) { token_1.Remove(typeof(KBPTokensregexExtractor.Subject)); token_1.Remove(typeof(KBPTokensregexExtractor.Object)); } return(Pair.MakePair(KBPRelationExtractorConstants.NoRelation, 1.0)); }
public virtual void Init(Options options) { this.options = options; // NumberNormalizer.setVerbose(options.verbose); // cdm 2016: Try omitting this: Don't we want to see errors? CoreMapExpressionExtractor.SetVerbose(options.verbose); if (options.grammarFilename == null) { options.grammarFilename = Options.DefaultGrammarFiles; logger.Warning("Time rules file is not specified: using default rules at " + options.grammarFilename); } logger.Info("Using following SUTime rules: " + options.grammarFilename); timexPatterns = new GenericTimeExpressionPatterns(options); this.expressionExtractor = timexPatterns.CreateExtractor(); }
public TokensRegexAnnotator(string name, Properties props) { string prefix = (name == null) ? string.Empty : name + '.'; string[] files = PropertiesUtils.GetStringArray(props, prefix + "rules"); env = TokenSequencePattern.GetNewEnv(); env.Bind("options", options); if (PropertiesUtils.GetBool(props, prefix + "caseInsensitive")) { System.Console.Error.WriteLine("using case insensitive!"); env.SetDefaultStringMatchFlags(NodePattern.CaseInsensitive | Pattern.UnicodeCase); env.SetDefaultStringPatternFlags(Pattern.CaseInsensitive | Pattern.UnicodeCase); } if (files.Length != 0) { extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files); } else { extractor = null; } verbose = PropertiesUtils.GetBool(props, prefix + "verbose", false); options.setTokenOffsets = PropertiesUtils.GetBool(props, prefix + "setTokenOffsets", options.setTokenOffsets); options.extractWithTokens = PropertiesUtils.GetBool(props, prefix + "extractWithTokens", options.extractWithTokens); options.flatten = PropertiesUtils.GetBool(props, prefix + "flatten", options.flatten); string matchedExpressionsAnnotationKeyName = props.GetProperty(prefix + "matchedExpressionsAnnotationKey"); if (matchedExpressionsAnnotationKeyName != null) { options.matchedExpressionsAnnotationKey = EnvLookup.LookupAnnotationKeyWithClassname(env, matchedExpressionsAnnotationKeyName); if (options.matchedExpressionsAnnotationKey == null) { string propName = prefix + "matchedExpressionsAnnotationKey"; throw new Exception("Cannot determine annotation key for " + propName + '=' + matchedExpressionsAnnotationKeyName); } } }
public TokensRegexAnnotator(params string[] files) { env = TokenSequencePattern.GetNewEnv(); extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(env, files); verbose = false; }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string rules; if (args.Length > 0) { rules = args[0]; } else { rules = "edu/stanford/nlp/ling/tokensregex/demo/rules/expr.rules.txt"; } PrintWriter @out; if (args.Length > 2) { @out = new PrintWriter(args[2]); } else { @out = new PrintWriter(System.Console.Out); } CoreMapExpressionExtractor <MatchedExpression> extractor = CoreMapExpressionExtractor.CreateExtractorFromFiles(TokenSequencePattern.GetNewEnv(), rules); StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner")); Annotation annotation; if (args.Length > 1) { annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1])); } else { annotation = new Annotation("( ( five plus three plus four ) * 2 ) divided by three"); } pipeline.Annotate(annotation); // An Annotation is a Map and you can get and use the various analyses individually. @out.Println(); // The toString() method on an Annotation just prints the text of the Annotation // But you can see what is in it with other methods like toShorterString() @out.Println("The top level annotation"); @out.Println(annotation.ToShorterString()); IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); int i = 0; foreach (ICoreMap sentence in sentences) { @out.Println("Sentence #" + ++i); foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation))) { @out.Println(" Token: " + "word=" + token.Get(typeof(CoreAnnotations.TextAnnotation)) + ", pos=" + token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)) + ", ne=" + token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation))); } IList <MatchedExpression> matchedExpressions = extractor.ExtractExpressions(sentence); foreach (MatchedExpression matched in matchedExpressions) { // Print out matched text and value @out.Println("Matched expression: " + matched.GetText() + " with value " + matched.GetValue()); // Print out token information ICoreMap cm = matched.GetAnnotation(); foreach (CoreLabel token_1 in cm.Get(typeof(CoreAnnotations.TokensAnnotation))) { string word = token_1.Get(typeof(CoreAnnotations.TextAnnotation)); string lemma = token_1.Get(typeof(CoreAnnotations.LemmaAnnotation)); string pos = token_1.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); string ne = token_1.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation)); @out.Println(" Matched token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne); } } } @out.Flush(); }
private CoreMapExpressionExtractor <MatchedExpression> CreateExtractor() { IList <string> filenames = StringUtils.Split(options.grammarFilename, "\\s*[,;]\\s*"); return(CoreMapExpressionExtractor.CreateExtractorFromFiles(env, filenames)); }
public virtual void Init(Options options) { this.options = options; InitEnv(); extractor = CreateExtractor(); }
public virtual CoreMapExpressionExtractor CreateExtractor() { IList <string> filenames = StringUtils.Split(options.grammarFilename, "\\s*[,;]\\s*"); return(CoreMapExpressionExtractor.CreateExtractorFromFiles(env, filenames)); }