public virtual void SetExtractRules(SequenceMatchRules.IExtractRule <ICoreMap, T> basicExtractRule, SequenceMatchRules.IExtractRule <IList <ICoreMap>, T> compositeExtractRule, IPredicate <T> filterRule) { CoreMapExpressionExtractor.Stage <T> stage = new CoreMapExpressionExtractor.Stage <T>(); stage.basicExtractRule = basicExtractRule; stage.compositeExtractRule = compositeExtractRule; stage.filterRule = filterRule; this.stages.Clear(); this.stages[1] = stage; }
public virtual IList <T> ExtractExpressions(ICoreMap annotation) { // Extract potential expressions IList <T> matchedExpressions = new List <T>(); IList <int> stageIds = new List <int>(stages.Keys); stageIds.Sort(); foreach (int stageId in stageIds) { CoreMapExpressionExtractor.Stage <T> stage = stages[stageId]; SequenceMatchRules.IExtractRule <ICoreMap, T> basicExtractRule = stage.basicExtractRule; if (stage.clearMatched) { matchedExpressions.Clear(); } if (basicExtractRule != null) { basicExtractRule.Extract(annotation, matchedExpressions); if (verbose && matchedExpressions != null) { log.Info("extractExpressions() extracting with " + basicExtractRule + " from " + annotation + " gives " + matchedExpressions); } AnnotateExpressions(annotation, matchedExpressions); matchedExpressions = MatchedExpression.RemoveNullValues(matchedExpressions); matchedExpressions = MatchedExpression.RemoveNested(matchedExpressions); matchedExpressions = MatchedExpression.RemoveOverlapping(matchedExpressions); } IList <ICoreMap> merged = MatchedExpression.ReplaceMergedUsingTokenOffsets(annotation.Get(tokensAnnotationKey), matchedExpressions); SequenceMatchRules.IExtractRule <IList <ICoreMap>, T> compositeExtractRule = stage.compositeExtractRule; if (compositeExtractRule != null) { Pair <IList <ICoreMap>, IList <T> > p = ApplyCompositeRule(compositeExtractRule, merged, matchedExpressions, stage.limitIters); merged = p.First(); matchedExpressions = p.Second(); } matchedExpressions = FilterInvalidExpressions(stage.filterRule, matchedExpressions); } matchedExpressions.Sort(MatchedExpression.ExprTokenOffsetsNestedFirstComparator); if (!keepTags) { CleanupTags(annotation); } return(matchedExpressions); }
/// <summary>Add specified rules to this extractor.</summary> /// <param name="rules"/> public virtual void AppendRules(IList <SequenceMatchRules.IRule> rules) { if (verbose) { log.Info("Read " + rules.Count + " rules"); } // Put rules into stages if (collapseExtractionRules) { rules = Collapse(rules); if (verbose) { log.Info("Collapsing into " + rules.Count + " rules"); } } foreach (SequenceMatchRules.IRule r in rules) { if (r is SequenceMatchRules.AssignmentRule) { // Nothing to do // Assignments are added to environment as they are parsed ((SequenceMatchRules.AssignmentRule)r).Evaluate(env); } else { if (r is SequenceMatchRules.AnnotationExtractRule) { SequenceMatchRules.AnnotationExtractRule aer = (SequenceMatchRules.AnnotationExtractRule)r; CoreMapExpressionExtractor.Stage <T> stage = stages[aer.stage]; if (stage == null) { stages[aer.stage] = stage = new CoreMapExpressionExtractor.Stage <T>(); stage.stageId = aer.stage; bool clearMatched = (bool)env.GetDefaults()["stage.clearMatched"]; if (clearMatched != null) { stage.clearMatched = clearMatched; } int limitIters = (int)env.GetDefaults()["stage.limitIters"]; if (limitIters != null) { stage.limitIters = limitIters; } } if (aer.active) { if (SequenceMatchRules.FilterRuleType.Equals(aer.ruleType)) { stage.AddFilterRule(aer); } else { if (aer.isComposite) { // if (SequenceMatchRules.COMPOSITE_RULE_TYPE.equals(aer.ruleType)) { stage.AddCompositeRule(aer); } else { stage.AddBasicRule(aer); } } } else { log.Debug("Ignoring inactive rule: " + aer.name); } } } } }