/// <summary> /// Annotate any unary quantifiers that weren't found in the main /// <see cref="AnnotateOperators(Edu.Stanford.Nlp.Util.ICoreMap)"/> /// method. /// </summary> /// <param name="sentence">The sentence to annotate.</param> private static void AnnotateUnaries(ICoreMap sentence) { // Get tree and tokens SemanticGraph tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); if (tree == null) { tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); // Get operator exists mask bool[] isOperator = new bool[tokens.Count]; for (int i = 0; i < isOperator.Length; ++i) { OperatorSpec spec = tokens[i].Get(typeof(NaturalLogicAnnotations.OperatorAnnotation)); if (spec != null) { for (int k = spec.quantifierBegin; k < spec.quantifierEnd; ++k) { isOperator[k] = true; } } } // Match Semgrex SemgrexMatcher matcher = UnaryPattern.Matcher(tree); while (matcher.Find()) { // Get relevant nodes IndexedWord quantifier = matcher.GetNode("quantifier"); string word = quantifier.Word().ToLower(); if (word.Equals("a") || word.Equals("an") || word.Equals("the") || "CD".Equals(quantifier.Tag())) { continue; } // These are absurdly common, and uninformative, and we're just going to shoot ourselves in the foot from parsing errors and idiomatic expressions. IndexedWord subject = matcher.GetNode("subject"); // ... If there is not already an operator there if (!isOperator[quantifier.Index() - 1]) { Optional <Triple <Operator, int, int> > quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, true); // ... and if we found a quantifier span if (quantifierInfo.IsPresent()) { // Then add the unary operator! OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, subject, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), null, false, null, tokens.Count); CoreLabel token = tokens[quantifier.Index() - 1]; token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope); } } } // Match TokensRegex TokenSequenceMatcher tokenMatcher = DoubtPattern.Matcher(tokens); while (tokenMatcher.Find()) { IList <CoreLabel> doubt = (IList <CoreLabel>)tokenMatcher.GroupNodes("$doubt"); IList <CoreLabel> target = (IList <CoreLabel>)tokenMatcher.GroupNodes("$target"); foreach (CoreLabel word in doubt) { OperatorSpec spec = new OperatorSpec(Operator.GeneralNegPolarity, word.Index() - 1, word.Index(), target[0].Index() - 1, target[target.Count - 1].Index(), 0, 0, tokens.Count); word.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), spec); } } }
/// <summary>Annotate every token for its polarity, based on the operators found.</summary> /// <remarks> /// Annotate every token for its polarity, based on the operators found. This function will set the /// <see cref="PolarityAnnotation"/> /// for every token. /// </remarks> /// <param name="sentence"> /// As in /// <see cref="DoOneSentence(Edu.Stanford.Nlp.Pipeline.Annotation, Edu.Stanford.Nlp.Util.ICoreMap)"/> /// </param> private static void AnnotatePolarity(ICoreMap sentence) { // Collect all the operators in this sentence IList <OperatorSpec> operators = new List <OperatorSpec>(); IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); foreach (CoreLabel token in tokens) { OperatorSpec specOrNull = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation)); if (specOrNull != null) { operators.Add(specOrNull); } } // Make sure every node of the dependency tree has a polarity. // This is separate from the code below in case the tokens in the dependency // tree don't correspond to the tokens in the sentence. This happens at least // when the constituency parser craps out on a long sentence, and the // dependency tree is put together haphazardly. if (sentence.ContainsKey(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation))) { foreach (IndexedWord token_1 in sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)).VertexSet()) { token_1.Set(typeof(NaturalLogicAnnotations.PolarityAnnotation), Polarity.Default); } } if (sentence.ContainsKey(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation))) { foreach (IndexedWord token_1 in sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)).VertexSet()) { token_1.Set(typeof(NaturalLogicAnnotations.PolarityAnnotation), Polarity.Default); } } if (sentence.ContainsKey(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation))) { foreach (IndexedWord token_1 in sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)).VertexSet()) { token_1.Set(typeof(NaturalLogicAnnotations.PolarityAnnotation), Polarity.Default); } } // Set polarity for each token for (int i = 0; i < tokens.Count; ++i) { CoreLabel token_1 = tokens[i]; // Get operators in scope IList <Triple <int, Monotonicity, MonotonicityType> > inScope = new List <Triple <int, Monotonicity, MonotonicityType> >(4); foreach (OperatorSpec @operator in operators) { if (i >= @operator.subjectBegin && i < @operator.subjectEnd) { inScope.Add(Triple.MakeTriple(@operator.subjectEnd - @operator.subjectBegin, @operator.instance.subjMono, @operator.instance.subjType)); } else { if (i >= @operator.objectBegin && i < @operator.objectEnd) { inScope.Add(Triple.MakeTriple(@operator.objectEnd - @operator.objectBegin, @operator.instance.objMono, @operator.instance.objType)); } } } // Sort the operators by their scope (approximated by the size of their argument span) inScope.Sort(null); // Create polarity IList <Pair <Monotonicity, MonotonicityType> > info = new List <Pair <Monotonicity, MonotonicityType> >(inScope.Count); foreach (Triple <int, Monotonicity, MonotonicityType> term in inScope) { info.Add(Pair.MakePair(term.second, term.third)); } Polarity polarity = new Polarity(info); // Set polarity token_1.Set(typeof(NaturalLogicAnnotations.PolarityAnnotation), polarity); } // Set the PolarityDirectionAnnotation foreach (CoreLabel token_2 in tokens) { Polarity polarity = token_2.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation)); if (polarity != null) { if (polarity.IsUpwards()) { token_2.Set(typeof(NaturalLogicAnnotations.PolarityDirectionAnnotation), "up"); } else { if (polarity.IsDownwards()) { token_2.Set(typeof(NaturalLogicAnnotations.PolarityDirectionAnnotation), "down"); } else { token_2.Set(typeof(NaturalLogicAnnotations.PolarityDirectionAnnotation), "flat"); } } } } }
/// <summary> /// Find the operators in this sentence, annotating the head word (only!) of each operator with the /// <see cref="OperatorAnnotation"/> /// . /// </summary> /// <param name="sentence"> /// As in /// <see cref="DoOneSentence(Edu.Stanford.Nlp.Pipeline.Annotation, Edu.Stanford.Nlp.Util.ICoreMap)"/> /// </param> private void AnnotateOperators(ICoreMap sentence) { SemanticGraph tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); if (tree == null) { tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } foreach (SemgrexPattern pattern in Patterns) { SemgrexMatcher matcher = pattern.Matcher(tree); while (matcher.Find()) { // Get terms IndexedWord properSubject = matcher.GetNode("Subject"); IndexedWord quantifier; IndexedWord subject; bool namedEntityQuantifier = false; if (properSubject != null) { quantifier = subject = properSubject; namedEntityQuantifier = true; } else { quantifier = matcher.GetNode("quantifier"); subject = matcher.GetNode("subject"); } IndexedWord @object = matcher.GetNode("object"); // Validate quantifier // At the end of this Optional <Triple <Operator, int, int> > quantifierInfo; if (namedEntityQuantifier) { // named entities have the "all" semantics by default. if (!neQuantifiers) { continue; } quantifierInfo = Optional.Of(Triple.MakeTriple(Operator.ImplicitNamedEntity, quantifier.Index(), quantifier.Index())); } else { // note: empty quantifier span given // find the quantifier, and return some info about it. quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, @object == null || subject == null); } // Awful hacks to regularize the subject of things like "one of" and "there are" // (fix up 'there are') if ("be".Equals(subject == null ? null : subject.Lemma())) { bool hasExpl = false; IndexedWord newSubject = null; foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject)) { if ("nsubj".Equals(outgoingEdge.GetRelation().ToString())) { newSubject = outgoingEdge.GetDependent(); } else { if ("expl".Equals(outgoingEdge.GetRelation().ToString())) { hasExpl = true; } } } if (hasExpl) { subject = newSubject; } } // (fix up '$n$ of') if ("CD".Equals(subject == null ? null : subject.Tag())) { foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject)) { string rel = outgoingEdge.GetRelation().ToString(); if (rel.StartsWith("nmod")) { subject = outgoingEdge.GetDependent(); } } } // Set tokens if (quantifierInfo.IsPresent()) { // Compute span IndexedWord pivot = matcher.GetNode("pivot"); if (pivot == null) { pivot = @object; } OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, pivot, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), subject, namedEntityQuantifier, @object, tokens.Count); // Set annotation CoreLabel token = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[quantifier.Index() - 1]; OperatorSpec oldScope = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation)); if (oldScope == null || oldScope.QuantifierLength() < scope.QuantifierLength() || oldScope.instance != scope.instance) { token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope); } else { token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), OperatorSpec.Merge(oldScope, scope)); } } } } // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap. // In these cases, take the longer quantifier match. IList <OperatorSpec> quantifiers = new List <OperatorSpec>(); for (int i = 0; i < tokens.Count; ++i) { CoreLabel token = tokens[i]; OperatorSpec @operator; if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null) { if (i == 0 && @operator.instance == Operator.No && tokens.Count > 2 && "PRP".Equals(tokens[1].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)))) { // This is pragmatically not a negation -- ignore it // For example, "no I don't like candy" or "no you like cats" token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation)); } else { quantifiers.Add(@operator); } } } quantifiers.Sort(null); foreach (OperatorSpec quantifier_1 in quantifiers) { for (int i_1 = quantifier_1.quantifierBegin; i_1 < quantifier_1.quantifierEnd; ++i_1) { if (i_1 != quantifier_1.quantifierHead) { tokens[i_1].Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation)); } } } }