// just static main public static void Main(string[] args) { string treeString = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))"; // Typically the tree is constructed by parsing or reading a // treebank. This is just for example purposes Tree tree = Tree.ValueOf(treeString); // This creates English uncollapsed dependencies as a // SemanticGraph. If you are creating many SemanticGraphs, you // should use a GrammaticalStructureFactory and use it to generate // the intermediate GrammaticalStructure instead SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree); // Alternatively, this could have been the Chinese params or any // other language supported. As of 2014, only English and Chinese ITreebankLangParserParams @params = new EnglishTreebankParserParams(); IGrammaticalStructureFactory gsf = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder()); GrammaticalStructure gs = gsf.NewGrammaticalStructure(tree); log.Info(graph); SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B"); SemgrexMatcher matcher = semgrex.Matcher(graph); // This will produce two results on the given tree: "likes" is an // ancestor of both "dog" and "my" via the nsubj relation while (matcher.Find()) { log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B")); } }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord newNode = AddDep.FromCheapString(nodeString); sg.AddVertex(newNode); AddNamedNode(newNode, nodeName); }
/// <summary> /// Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this /// pattern. /// </summary> /// <remarks> /// Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this /// pattern. /// NOTE: Predicate tests are still active here, and any named nodes required for evaluation must be /// present. /// </remarks> /// <exception cref="System.Exception"/> public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg, SemgrexPattern overridePattern) { SemgrexMatcher matcher = overridePattern.Matcher(sg); ICollection <SemanticGraph> generated = new List <SemanticGraph>(); while (matcher.Find()) { if (predicateTest != null) { if (!predicateTest.Test(matcher)) { continue; } } // We reset the named node map with each edit set, since these edits // should exist in a separate graph for each unique Semgrex match. nodeMap = Generics.NewHashMap(); SemanticGraph tgt = new SemanticGraph(sg); foreach (SsurgeonEdit edit in editScript) { edit.Evaluate(tgt, matcher); } generated.Add(tgt); } return(generated); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord tgtNode = GetNamedNode(nodeName, sm); foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(tgtNode)) { sg.RemoveEdge(edge); } }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IList <IndexedWord> newRoots = new List <IndexedWord>(); foreach (string name in newRootNames) { newRoots.Add(GetNamedNode(name, sm)); } sg.SetRoots(newRoots); }
/// <summary>Used to retrieve the named node.</summary> /// <remarks> /// Used to retrieve the named node. If not found in the SemgrexMatcher, check the /// owning pattern object, as this could've been a created node. /// </remarks> public virtual IndexedWord GetNamedNode(string nodeName, SemgrexMatcher sm) { IndexedWord ret = sm.GetNode(nodeName); if ((ret == null) && GetOwningPattern() != null) { return(GetOwningPattern().GetNamedNode(nodeName)); } return(ret); }
/// <exception cref="System.Exception"/> public virtual bool Test(SemgrexMatcher matcher) { foreach (ISsurgPred term in this) { if (term.Test(matcher)) { return(true); } } return(false); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { string relation = sm.GetRelnString(edgeName); IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation)); if (edge != null) { sg.RemoveEdge(edge); } }
/// <summary>TODO: figure out how to specify where in the sentence this node goes.</summary> /// <remarks> /// TODO: figure out how to specify where in the sentence this node goes. /// TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel. /// TODO: bombproof if this gov, dep, and reln already exist. /// </remarks> public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = sm.GetNode(govNodeName); IndexedWord newNode = new IndexedWord(newNodePrototype); int newIndex = SemanticGraphUtils.LeftMostChildVertice(govNode, sg).Index(); // cheap En-specific hack for placing copula (beginning of governing phrase) newNode.SetDocID(govNode.DocID()); newNode.SetIndex(newIndex); newNode.SetSentIndex(govNode.SentIndex()); sg.AddVertex(newNode); sg.AddEdge(govNode, newNode, relation, weight, false); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IList <IndexedWord> nodes = new List <IndexedWord>(sg.VertexSet()); foreach (IndexedWord node in nodes) { IList <IndexedWord> rootPath = sg.GetPathToRoot(node); if (rootPath == null) { sg.RemoveVertex(node); } } }
/// <summary>Returns whether any of the given patterns match this tree.</summary> private bool Matches(ICoreMap sentence, ICollection <SemgrexPattern> rulesForRel, KBPRelationExtractor.KBPInput input, SemanticGraph graph) { if (graph == null || graph.IsEmpty()) { return(false); } IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); foreach (int i in input.subjectSpan) { if ("O".Equals(tokens[i].Ner())) { tokens[i].SetNER(input.subjectType.name); } } foreach (int i_1 in input.objectSpan) { if ("O".Equals(tokens[i_1].Ner())) { tokens[i_1].SetNER(input.objectType.name); } } foreach (SemgrexPattern p in rulesForRel) { try { SemgrexMatcher n = p.Matcher(graph); while (n.Find()) { IndexedWord entity = n.GetNode("entity"); IndexedWord slot = n.GetNode("slot"); bool hasSubject = entity.Index() >= input.subjectSpan.Start() + 1 && entity.Index() <= input.subjectSpan.End(); bool hasObject = slot.Index() >= input.objectSpan.Start() + 1 && slot.Index() <= input.objectSpan.End(); if (hasSubject && hasObject) { return(true); } } } catch (Exception) { //Happens when graph has no roots return(false); } } return(false); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord seedNode = GetNamedNode(destroyNodeName, sm); // TODO: do not execute if seedNode if not in graph (or just error?) if (sg.ContainsVertex(seedNode)) { ICollection <IndexedWord> nodesToDestroy = Crawl(seedNode, sg); foreach (IndexedWord node in nodesToDestroy) { sg.RemoveVertex(node); } // After destroy nodes, need to reset the roots, since it's possible a root node // was destroyed. sg.ResetRoots(); } }
/// <summary>Executes the given sequence of edits against the SemanticGraph.</summary> /// <remarks> /// Executes the given sequence of edits against the SemanticGraph. /// NOTE: because the graph could be destructively modified, the matcher may be invalid, and /// thus the pattern will only be executed against the first match. Repeat this routine on the returned /// SemanticGraph to reapply on other matches. /// TODO: create variant that returns set of expansions while matcher.find() returns true /// </remarks> /// <param name="sg">SemanticGraph to operate over (NOT destroyed/modified).</param> /// <returns>True if a match was found and executed, otherwise false.</returns> /// <exception cref="System.Exception"/> public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg) { ICollection <SemanticGraph> generated = new List <SemanticGraph>(); SemgrexMatcher matcher = semgrexPattern.Matcher(sg); while (matcher.Find()) { // NOTE: Semgrex can match two named nodes to the same node. In this case, we simply, // check the named nodes, and if there are any collisions, we throw out this match. ICollection <string> nodeNames = matcher.GetNodeNames(); ICollection <IndexedWord> seen = Generics.NewHashSet(); foreach (string name in nodeNames) { IndexedWord curr = matcher.GetNode(name); if (seen.Contains(curr)) { goto nextMatch_break; } seen.Add(curr); } // System.out.println("REDUNDANT NODES FOUDN IN SEMGREX MATCH"); // if we do have to test, assemble the tests and arguments based off of the current // match and test. If false, continue, else execute as normal. if (predicateTest != null) { if (!predicateTest.Test(matcher)) { continue; } } // SemanticGraph tgt = new SemanticGraph(sg); // Generate a new graph, since we don't want to mutilate the original graph. // We use the same nodes, since the matcher operates off of those. SemanticGraph tgt = SemanticGraphFactory.DuplicateKeepNodes(sg); nodeMap = Generics.NewHashMap(); foreach (SsurgeonEdit edit in editScript) { edit.Evaluate(tgt, matcher); } generated.Add(tgt); nextMatch_continue :; } nextMatch_break :; return(generated); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord rootNode = this.GetNamedNode(rootName, sm); ICollection <IndexedWord> subgraphNodeSet = sg.GetSubgraphVertices(rootNode); if (!sg.IsDag(rootNode)) { /* Check if there is a cycle going back to the root. */ foreach (IndexedWord child in sg.GetChildren(rootNode)) { ICollection <IndexedWord> reachableSet = sg.GetSubgraphVertices(child); if (reachableSet.Contains(rootNode)) { throw new ArgumentException("Subtree cannot contain cycle leading back to root node!"); } } } IList <IndexedWord> sortedSubgraphNodes = Generics.NewArrayList(subgraphNodeSet); sortedSubgraphNodes.Sort(); IndexedWord newNode = new IndexedWord(rootNode.DocID(), rootNode.SentIndex(), rootNode.Index()); /* Copy all attributes from rootNode. */ foreach (Type key in newNode.BackingLabel().KeySet()) { newNode.Set(key, rootNode.Get(key)); } newNode.SetValue(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " ")); newNode.SetWord(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " ")); newNode.SetLemma(StringUtils.Join(sortedSubgraphNodes.Stream().Map(null), " ")); if (sg.GetRoots().Contains(rootNode)) { sg.GetRoots().Remove(rootNode); sg.AddRoot(rootNode); } foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(rootNode)) { sg.AddEdge(edge.GetGovernor(), newNode, edge.GetRelation(), edge.GetWeight(), edge.IsExtra()); } foreach (IndexedWord node in sortedSubgraphNodes) { sg.RemoveVertex(node); } }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { bool govWild = govName.Equals(WildcardNode); bool depWild = depName.Equals(WildcardNode); IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); if (govNode != null && depNode != null) { SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, relation); if (edge != null) { bool successFlag = sg.RemoveEdge(edge); } } else { if (depNode != null && govWild) { // dep known, wildcard gov foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(depNode)) { if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge)) { sg.RemoveEdge(edge); } } } else { if (govNode != null && depWild) { // gov known, wildcard dep foreach (SemanticGraphEdge edge in sg.OutgoingEdgeIterable(govNode)) { if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge)) { sg.RemoveEdge(edge); } } } } } }
// System.out.println("the string is " + StringUtils.join(focuss, ";")); /* * Given a SemanticGraph g and a SemgrexPattern pattern * And a bunch of other parameters, * run the pattern matcher (get SemgrexMatcher m) * Iterate through to get matching words/phrases * * Next, gets matchedGraphsForPattern.get(pattern), * a list of matched (String, semgraph) pairs * and adds the new graph and tokens if matched. * * I need to clarify what's going on with tokens. */ public virtual ICollection <IndexedWord> GetSemGrexPatternNodes(SemanticGraph g, IList <string> tokens, ICollection <string> outputNodes, ICollection <IntPair> outputIndices, SemgrexPattern pattern, bool findSubTrees, ICollection <ExtractedPhrase > extractedPhrases, bool lowercase, IPredicate <CoreLabel> acceptWord) { ICollection <IndexedWord> foundWordsParents = new HashSet <IndexedWord>(); SemgrexMatcher m = pattern.Matcher(g, lowercase); while (m.Find()) { IndexedWord w = m.GetNode("node"); //System.out.println("found a match for " + pattern.pattern()); IndexedWord parent = m.GetNode("parent"); bool ifSatisfiedMaxDepth = CheckIfSatisfiedMaxDepth(g, parent, w, new IntPair(maxDepth, 0)); if (ifSatisfiedMaxDepth == false) { continue; } if (Debug > 3) { IList <Pair <string, SemanticGraph> > matchedGraphs = matchedGraphsForPattern[pattern]; if (matchedGraphs == null) { matchedGraphs = new List <Pair <string, SemanticGraph> >(); } matchedGraphs.Add(new Pair <string, SemanticGraph>(StringUtils.Join(tokens, " "), g)); //if (DEBUG >= 3) // System.out.println("matched pattern is " + pattern); matchedGraphsForPattern[pattern] = matchedGraphs; } foundWordsParents.Add(parent); // String relationName = m.getRelnString("reln"); // System.out.println("word is " + w.lemma() + " and " + w.tag()); List <IndexedWord> seenNodes = new List <IndexedWord>(); IList <string> cutoffrelations = new List <string>(); // if (elementStr.equalsIgnoreCase("technique")) // cutoffrelations = cutoffRelationsForTech; // if (elementStr.equalsIgnoreCase("app")) // cutoffrelations = this.cuttoffRelationsForApp; //System.out.println("g is "); //g.prettyPrint(); PrintSubGraph(g, w, cutoffrelations, tokens, outputNodes, outputIndices, seenNodes, new List <IndexedWord>(), findSubTrees, extractedPhrases, pattern, acceptWord); } return(foundWordsParents); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord govNode = GetNamedNode(govName, sm); IndexedWord depNode = GetNamedNode(depName, sm); SemanticGraphEdge existingEdge = sg.GetEdge(govNode, depNode, relation); if (existingEdge == null) { // When adding the edge, check to see if the gov/dep nodes are presently in the graph. // if (!sg.ContainsVertex(govNode)) { sg.AddVertex(govNode); } if (!sg.ContainsVertex(depNode)) { sg.AddVertex(depNode); } sg.AddEdge(govNode, depNode, relation, weight, false); } }
/// <summary> /// Find the operators in this sentence, annotating the head word (only!) of each operator with the /// <see cref="OperatorAnnotation"/> /// . /// </summary> /// <param name="sentence"> /// As in /// <see cref="DoOneSentence(Edu.Stanford.Nlp.Pipeline.Annotation, Edu.Stanford.Nlp.Util.ICoreMap)"/> /// </param> private void AnnotateOperators(ICoreMap sentence) { SemanticGraph tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); if (tree == null) { tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } foreach (SemgrexPattern pattern in Patterns) { SemgrexMatcher matcher = pattern.Matcher(tree); while (matcher.Find()) { // Get terms IndexedWord properSubject = matcher.GetNode("Subject"); IndexedWord quantifier; IndexedWord subject; bool namedEntityQuantifier = false; if (properSubject != null) { quantifier = subject = properSubject; namedEntityQuantifier = true; } else { quantifier = matcher.GetNode("quantifier"); subject = matcher.GetNode("subject"); } IndexedWord @object = matcher.GetNode("object"); // Validate quantifier // At the end of this Optional <Triple <Operator, int, int> > quantifierInfo; if (namedEntityQuantifier) { // named entities have the "all" semantics by default. if (!neQuantifiers) { continue; } quantifierInfo = Optional.Of(Triple.MakeTriple(Operator.ImplicitNamedEntity, quantifier.Index(), quantifier.Index())); } else { // note: empty quantifier span given // find the quantifier, and return some info about it. quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, @object == null || subject == null); } // Awful hacks to regularize the subject of things like "one of" and "there are" // (fix up 'there are') if ("be".Equals(subject == null ? null : subject.Lemma())) { bool hasExpl = false; IndexedWord newSubject = null; foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject)) { if ("nsubj".Equals(outgoingEdge.GetRelation().ToString())) { newSubject = outgoingEdge.GetDependent(); } else { if ("expl".Equals(outgoingEdge.GetRelation().ToString())) { hasExpl = true; } } } if (hasExpl) { subject = newSubject; } } // (fix up '$n$ of') if ("CD".Equals(subject == null ? null : subject.Tag())) { foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject)) { string rel = outgoingEdge.GetRelation().ToString(); if (rel.StartsWith("nmod")) { subject = outgoingEdge.GetDependent(); } } } // Set tokens if (quantifierInfo.IsPresent()) { // Compute span IndexedWord pivot = matcher.GetNode("pivot"); if (pivot == null) { pivot = @object; } OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, pivot, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), subject, namedEntityQuantifier, @object, tokens.Count); // Set annotation CoreLabel token = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[quantifier.Index() - 1]; OperatorSpec oldScope = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation)); if (oldScope == null || oldScope.QuantifierLength() < scope.QuantifierLength() || oldScope.instance != scope.instance) { token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope); } else { token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), OperatorSpec.Merge(oldScope, scope)); } } } } // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap. // In these cases, take the longer quantifier match. IList <OperatorSpec> quantifiers = new List <OperatorSpec>(); for (int i = 0; i < tokens.Count; ++i) { CoreLabel token = tokens[i]; OperatorSpec @operator; if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null) { if (i == 0 && @operator.instance == Operator.No && tokens.Count > 2 && "PRP".Equals(tokens[1].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)))) { // This is pragmatically not a negation -- ignore it // For example, "no I don't like candy" or "no you like cats" token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation)); } else { quantifiers.Add(@operator); } } } quantifiers.Sort(null); foreach (OperatorSpec quantifier_1 in quantifiers) { for (int i_1 = quantifier_1.quantifierBegin; i_1 < quantifier_1.quantifierEnd; ++i_1) { if (i_1 != quantifier_1.quantifierHead) { tokens[i_1].Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation)); } } } }
/// <summary> /// Given a matching instance (via the SemgrexMatcher), performs an in-place /// modification on the given SemanticGraph. /// </summary> public abstract void Evaluate(SemanticGraph sg, SemgrexMatcher sm);
//using quote-removed depparses public virtual void DependencyParses(Annotation doc) { IList <ICoreMap> quotes = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation)); IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap quote in quotes) { if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null) { continue; } Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote); if (range == null) { continue; } //search for mentions in the first run Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range); List <string> names = namesAndNameIndices.first; List <Pair <int, int> > nameIndices = namesAndNameIndices.second; SemanticGraph graph = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)); SemgrexMatcher matcher = subjVerbPattern.Matcher(graph); IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >(); //TODO: check and see if this is necessary while (matcher.Find()) { IndexedWord subj = matcher.GetNode("SUBJ"); IndexedWord verb = matcher.GetNode("VERB"); subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb)); } IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*"); foreach (IndexedWord iw in vbs) { // does it have an nsubj child? ICollection <IndexedWord> children = graph.GetChildren(iw); IList <IndexedWord> deps = Generics.NewArrayList(); IndexedWord nsubj = null; foreach (IndexedWord child in children) { SemanticGraphEdge sge = graph.GetEdge(iw, child); if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB")) { deps.Add(child); } else { if (sge.GetRelation().GetShortName().Equals("nsubj")) { nsubj = child; } } } if (nsubj != null) { foreach (IndexedWord dep in deps) { subjVerbPairs.Add(new Pair(nsubj, dep)); } } } //look for a speech verb foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs) { IndexedWord verb = SVPair.second; IndexedWord subj = SVPair.first; //check if subj and verb outside of quote int verbTokPos = TokenToLocation(verb.BackingLabel()); int subjTokPos = TokenToLocation(verb.BackingLabel()); if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma())) { if (subj.Tag().Equals("NNP")) { int startChar = subj.BeginPosition(); for (int i = 0; i < names.Count; i++) { Pair <int, int> nameIndex = nameIndices[i]; //avoid names that don't actually exist in if (RangeContainsCharIndex(nameIndex, startChar)) { FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name); break; } } } else { if (subj.Tag().Equals("PRP")) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun); break; } else { if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word())) { int loc = TokenToLocation(subj.BackingLabel()); FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun); break; } } } } } } }
/// <summary>Returns all of the entailed shortened clauses (as per natural logic) from the given clause.</summary> /// <remarks> /// Returns all of the entailed shortened clauses (as per natural logic) from the given clause. /// This runs the forward entailment component of the OpenIE system only. /// It is usually chained together with the clause splitting component: /// <see cref="ClausesInSentence(Edu.Stanford.Nlp.Util.ICoreMap)"/> /// . /// </remarks> /// <param name="clause">The premise clause, as a sentence fragment in itself.</param> /// <returns>A list of entailed clauses.</returns> public virtual IList <SentenceFragment> EntailmentsFromClause(SentenceFragment clause) { if (clause.parseTree.IsEmpty()) { return(Java.Util.Collections.EmptyList()); } else { // Get the forward entailments IList <SentenceFragment> list = new List <SentenceFragment>(); if (entailmentsPerSentence > 0) { Sharpen.Collections.AddAll(list, forwardEntailer.Apply(clause.parseTree, true).Search().Stream().Map(null).Collect(Collectors.ToList())); } list.Add(clause); // A special case for adjective entailments IList <SentenceFragment> adjFragments = new List <SentenceFragment>(); SemgrexMatcher matcher = adjectivePattern.Matcher(clause.parseTree); while (matcher.Find()) { // (get nodes) IndexedWord subj = matcher.GetNode("subj"); IndexedWord be = matcher.GetNode("be"); IndexedWord adj = matcher.GetNode("adj"); IndexedWord obj = matcher.GetNode("obj"); IndexedWord pobj = matcher.GetNode("pobj"); string prep = matcher.GetRelnString("prep"); // (if the adjective, or any earlier adjective, is privative, then all bets are off) foreach (SemanticGraphEdge edge in clause.parseTree.OutgoingEdgeIterable(obj)) { if ("amod".Equals(edge.GetRelation().ToString()) && edge.GetDependent().Index() <= adj.Index() && Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(edge.GetDependent().Word().ToLower())) { goto OUTER_continue; } } // (create the core tree) SemanticGraph tree = new SemanticGraph(); tree.AddRoot(adj); tree.AddVertex(subj); tree.AddVertex(be); tree.AddEdge(adj, be, GrammaticalRelation.ValueOf(Language.English, "cop"), double.NegativeInfinity, false); tree.AddEdge(adj, subj, GrammaticalRelation.ValueOf(Language.English, "nsubj"), double.NegativeInfinity, false); // (add pp attachment, if it existed) if (pobj != null) { System.Diagnostics.Debug.Assert(prep != null); tree.AddEdge(adj, pobj, GrammaticalRelation.ValueOf(Language.English, prep), double.NegativeInfinity, false); } // (check for monotonicity) if (adj.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards() && be.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)).IsUpwards()) { // (add tree) adjFragments.Add(new SentenceFragment(tree, clause.assumedTruth, false)); } OUTER_continue :; } OUTER_break :; Sharpen.Collections.AddAll(list, adjFragments); return(list); } }
/// <summary> /// Annotate any unary quantifiers that weren't found in the main /// <see cref="AnnotateOperators(Edu.Stanford.Nlp.Util.ICoreMap)"/> /// method. /// </summary> /// <param name="sentence">The sentence to annotate.</param> private static void AnnotateUnaries(ICoreMap sentence) { // Get tree and tokens SemanticGraph tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); if (tree == null) { tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); // Get operator exists mask bool[] isOperator = new bool[tokens.Count]; for (int i = 0; i < isOperator.Length; ++i) { OperatorSpec spec = tokens[i].Get(typeof(NaturalLogicAnnotations.OperatorAnnotation)); if (spec != null) { for (int k = spec.quantifierBegin; k < spec.quantifierEnd; ++k) { isOperator[k] = true; } } } // Match Semgrex SemgrexMatcher matcher = UnaryPattern.Matcher(tree); while (matcher.Find()) { // Get relevant nodes IndexedWord quantifier = matcher.GetNode("quantifier"); string word = quantifier.Word().ToLower(); if (word.Equals("a") || word.Equals("an") || word.Equals("the") || "CD".Equals(quantifier.Tag())) { continue; } // These are absurdly common, and uninformative, and we're just going to shoot ourselves in the foot from parsing errors and idiomatic expressions. IndexedWord subject = matcher.GetNode("subject"); // ... If there is not already an operator there if (!isOperator[quantifier.Index() - 1]) { Optional <Triple <Operator, int, int> > quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, true); // ... and if we found a quantifier span if (quantifierInfo.IsPresent()) { // Then add the unary operator! OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, subject, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), null, false, null, tokens.Count); CoreLabel token = tokens[quantifier.Index() - 1]; token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope); } } } // Match TokensRegex TokenSequenceMatcher tokenMatcher = DoubtPattern.Matcher(tokens); while (tokenMatcher.Find()) { IList <CoreLabel> doubt = (IList <CoreLabel>)tokenMatcher.GroupNodes("$doubt"); IList <CoreLabel> target = (IList <CoreLabel>)tokenMatcher.GroupNodes("$target"); foreach (CoreLabel word in doubt) { OperatorSpec spec = new OperatorSpec(Operator.GeneralNegPolarity, word.Index() - 1, word.Index(), target[0].Index() - 1, target[target.Count - 1].Index(), 0, 0, tokens.Count); word.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), spec); } } }
/// <exception cref="System.Exception"/> public virtual bool Test(SemgrexMatcher matcher) { return(Evaluate(matcher.GetNode(matchName))); }
/// <summary> /// Create a dataset of subject/object pairs, such that a sequence of splits that segments this /// subject and object is a correct sequence. /// </summary> /// <param name="depparse">The dependency parse of the sentence.</param> /// <param name="traceTargets">The set of spans corresponding to targets of traces.</param> /// <param name="traceSources">The set of indices in a sentence corresponding to the sources of traces.</param> /// <returns>A dataset of subject/object spans.</returns> private static ICollection <Pair <Span, Span> > SubjectObjectPairs(SemanticGraph depparse, IList <CoreLabel> tokens, IDictionary <int, Span> traceTargets, IDictionary <int, int> traceSources) { // log(StringUtils.join(tokens.stream().map(CoreLabel::word), " ")); IList <Pair <Span, Span> > data = new List <Pair <Span, Span> >(); foreach (SemgrexPattern vpPattern in segmenter.VpPatterns) { SemgrexMatcher matcher = vpPattern.Matcher(depparse); while (matcher.Find()) { // Get the verb and object IndexedWord verb = matcher.GetNode("verb"); IndexedWord @object = matcher.GetNode("object"); if (verb != null && @object != null) { // See if there is already a subject attached bool hasSubject = false; foreach (SemanticGraphEdge edge in depparse.OutgoingEdgeIterable(verb)) { if (edge.GetRelation().ToString().Contains("subj")) { hasSubject = true; } } foreach (SemanticGraphEdge edge_1 in depparse.OutgoingEdgeIterable(@object)) { if (edge_1.GetRelation().ToString().Contains("subj")) { hasSubject = true; } } if (!hasSubject) { // Get the spans for the verb and object Optional <IList <IndexedWord> > verbChunk = segmenter.GetValidChunk(depparse, verb, segmenter.ValidAdverbArcs, Optional.Empty(), true); Optional <IList <IndexedWord> > objectChunk = segmenter.GetValidChunk(depparse, @object, segmenter.ValidObjectArcs, Optional.Empty(), true); if (verbChunk.IsPresent() && objectChunk.IsPresent()) { verbChunk.Get().Sort(IComparer.ComparingInt(null)); objectChunk.Get().Sort(IComparer.ComparingInt(null)); // Find a trace int traceId = -1; Span verbSpan = ToSpan(verbChunk.Get()); Span traceSpan = Span.FromValues(verbSpan.Start() - 1, verbSpan.End() + 1); foreach (KeyValuePair <int, int> entry in traceSources) { if (traceSpan.Contains(entry.Value)) { traceId = entry.Key; } } //noinspection StatementWithEmptyBody if (traceId < 0) { } else { // Register the VP as an unknown VP // List<CoreLabel> vpChunk = new ArrayList<>(); // vpChunk.addAll(verbChunk.get()); // vpChunk.addAll(objectChunk.get()); // Collections.sort(vpChunk, (a, b) -> a.index() - b.index()); // debug("could not find trace for " + vpChunk); // Add the obj chunk Span subjectSpan = traceTargets[traceId]; Span objectSpan = ToSpan(objectChunk.Get()); if (subjectSpan != null) { // debug("(" + // StringUtils.join(tokens.subList(subjectSpan.start(), subjectSpan.end()).stream().map(CoreLabel::word), " ") + "; " + // verb.word() + "; " + // StringUtils.join(tokens.subList(objectSpan.start(), objectSpan.end()).stream().map(CoreLabel::word), " ") + // ")"); data.Add(Pair.MakePair(subjectSpan, objectSpan)); } } } } } } } // Run vanilla pattern splits foreach (SemgrexPattern vpPattern_1 in segmenter.VerbPatterns) { SemgrexMatcher matcher = vpPattern_1.Matcher(depparse); while (matcher.Find()) { // Get the verb and object IndexedWord subject = matcher.GetNode("subject"); IndexedWord @object = matcher.GetNode("object"); if (subject != null && @object != null) { Optional <IList <IndexedWord> > subjectChunk = segmenter.GetValidChunk(depparse, subject, segmenter.ValidSubjectArcs, Optional.Empty(), true); Optional <IList <IndexedWord> > objectChunk = segmenter.GetValidChunk(depparse, @object, segmenter.ValidObjectArcs, Optional.Empty(), true); if (subjectChunk.IsPresent() && objectChunk.IsPresent()) { Span subjectSpan = ToSpan(subjectChunk.Get()); Span objectSpan = ToSpan(objectChunk.Get()); data.Add(Pair.MakePair(subjectSpan, objectSpan)); } } } } return(data); }