public virtual void TestCoreLabelSetWordBehavior() { CoreLabel foo = new CoreLabel(); foo.Set(typeof(CoreAnnotations.TextAnnotation), "foo"); foo.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), "B"); foo.Set(typeof(CoreAnnotations.LemmaAnnotation), "fool"); // Lemma gets removed with word ArrayCoreMap copy = new ArrayCoreMap(foo); NUnit.Framework.Assert.AreEqual(copy, foo); foo.SetWord("foo"); NUnit.Framework.Assert.AreEqual(copy, foo); // same word set foo.SetWord("bar"); NUnit.Framework.Assert.IsFalse(copy.Equals(foo)); // lemma removed foo.SetWord("foo"); NUnit.Framework.Assert.IsFalse(copy.Equals(foo)); // still removed foo.Set(typeof(CoreAnnotations.LemmaAnnotation), "fool"); NUnit.Framework.Assert.AreEqual(copy, foo); // back to normal // Hash code is consistent int hashCode = foo.GetHashCode(); NUnit.Framework.Assert.AreEqual(copy.GetHashCode(), hashCode); foo.SetWord("bar"); NUnit.Framework.Assert.IsFalse(hashCode == foo.GetHashCode()); foo.SetWord("foo"); NUnit.Framework.Assert.IsFalse(hashCode == foo.GetHashCode()); // Hash code doesn't care between a value of null and the key not existing NUnit.Framework.Assert.IsTrue(foo.Lemma() == null); int lemmalessHashCode = foo.GetHashCode(); foo.Remove(typeof(CoreAnnotations.LemmaAnnotation)); NUnit.Framework.Assert.AreEqual(lemmalessHashCode, foo.GetHashCode()); foo.SetLemma(null); NUnit.Framework.Assert.AreEqual(lemmalessHashCode, foo.GetHashCode()); foo.SetLemma("fool"); NUnit.Framework.Assert.AreEqual(hashCode, foo.GetHashCode()); // Check equals foo.SetWord("bar"); foo.SetWord("foo"); ArrayCoreMap nulledCopy = new ArrayCoreMap(foo); NUnit.Framework.Assert.AreEqual(nulledCopy, foo); foo.Remove(typeof(CoreAnnotations.LemmaAnnotation)); NUnit.Framework.Assert.AreEqual(nulledCopy, foo); }
public virtual void Annotate(Annotation annotation) { if (annotation.ContainsKey(typeof(CoreAnnotations.SentencesAnnotation))) { // TODO: parallelize IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap sentence in sentences) { Tree binarized = sentence.Get(typeof(TreeCoreAnnotations.BinarizedTreeAnnotation)); if (binarized == null) { throw new AssertionError("Binarized sentences not built by parser"); } Tree collapsedUnary = transformer.TransformTree(binarized); SentimentCostAndGradient scorer = new SentimentCostAndGradient(model, null); scorer.ForwardPropagateTree(collapsedUnary); sentence.Set(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree), collapsedUnary); int sentiment = RNNCoreAnnotations.GetPredictedClass(collapsedUnary); sentence.Set(typeof(SentimentCoreAnnotations.SentimentClass), SentimentUtils.SentimentString(model, sentiment)); Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); if (tree != null) { collapsedUnary.SetSpans(); // map the sentiment annotations onto the tree IDictionary <IntPair, string> spanSentiment = Generics.NewHashMap(); foreach (Tree bt in collapsedUnary) { IntPair p = bt.GetSpan(); int sen = RNNCoreAnnotations.GetPredictedClass(bt); string sentStr = SentimentUtils.SentimentString(model, sen); if (!spanSentiment.Contains(p)) { // we'll take the first = highest one discovered spanSentiment[p] = sentStr; } } if (((CoreLabel)tree.Label()).ContainsKey(typeof(CoreAnnotations.SpanAnnotation))) { throw new InvalidOperationException("This code assumes you don't have SpanAnnotation"); } tree.SetSpans(); foreach (Tree t in tree) { IntPair p = t.GetSpan(); string str = spanSentiment[p]; if (str != null) { CoreLabel cl = (CoreLabel)t.Label(); cl.Set(typeof(SentimentCoreAnnotations.SentimentClass), str); cl.Remove(typeof(CoreAnnotations.SpanAnnotation)); } } } } } else { throw new Exception("unable to find sentences in: " + annotation); } }
/// <summary> /// Handles contractions like del and al, marked by the lexer /// del => de + l => de + el /// al => a + l => a + el /// con[mts]igo => con + [mts]i /// </summary> private CoreLabel ProcessContraction(CoreLabel cl) { cl.Remove(typeof(CoreAnnotations.ParentAnnotation)); string word = cl.Word(); string first; string second; int secondOffset = 0; int secondLength = 0; string lowered = word.ToLower(); switch (lowered) { case "del": case "al": { first = Sharpen.Runtime.Substring(word, 0, lowered.Length - 1); char lastChar = word[lowered.Length - 1]; if (char.IsLowerCase(lastChar)) { second = "el"; } else { second = "EL"; } secondOffset = 1; secondLength = lowered.Length - 1; break; } case "conmigo": case "consigo": { first = Sharpen.Runtime.Substring(word, 0, 3); second = word[3] + "í"; secondOffset = 3; secondLength = 4; break; } case "contigo": { first = Sharpen.Runtime.Substring(word, 0, 3); second = Sharpen.Runtime.Substring(word, 3, 5); secondOffset = 3; secondLength = 4; break; } default: { throw new ArgumentException("Invalid contraction provided to processContraction"); } } int secondStart = cl.BeginPosition() + secondOffset; int secondEnd = secondStart + secondLength; compoundBuffer.Add(CopyCoreLabel(cl, second, secondStart, secondEnd)); return(CopyCoreLabel(cl, first, cl.BeginPosition(), secondStart)); }
/// <summary>Splits a contraction marked by the lexer.</summary> /// <remarks> /// Splits a contraction marked by the lexer. /// au => a + u => à + le /// aux => a + ux => à + les /// des => de + s => de + les /// du => d + u => de + le /// </remarks> private CoreLabel ProcessContraction(CoreLabel cl) { cl.Remove(typeof(CoreAnnotations.ParentAnnotation)); string word = cl.Word(); string first; string second; int secondOffset = 0; int secondLength = 0; string lowered = word.ToLower(); switch (lowered) { case "au": { first = "à"; second = "le"; secondOffset = 1; secondLength = 1; break; } case "aux": { first = "à"; second = "les"; secondOffset = 1; secondLength = 2; break; } case "du": { first = "de"; second = "le"; secondOffset = 1; secondLength = 1; break; } default: { throw new ArgumentException("Invalid contraction provided to processContraction"); } } int secondStart = cl.BeginPosition() + secondOffset; int secondEnd = secondStart + secondLength; compoundBuffer.Add(CopyCoreLabel(cl, second, secondStart, secondEnd)); return(CopyCoreLabel(cl, first, cl.BeginPosition(), secondStart)); }
/// <summary>Splits a compound marked by the lexer.</summary> private CoreLabel ProcessCompound(CoreLabel cl) { cl.Remove(typeof(CoreAnnotations.ParentAnnotation)); string[] parts = cl.Word().ReplaceAll("-", " - ").Split("\\s+"); foreach (string part in parts) { CoreLabel newLabel = new CoreLabel(cl); newLabel.SetWord(part); newLabel.SetValue(part); newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part); compoundBuffer.Add(newLabel); } return(compoundBuffer.Remove(0)); }
/// <summary>Splits a compound marked by the lexer.</summary> private CoreLabel ProcessCompound(CoreLabel cl) { cl.Remove(typeof(CoreAnnotations.ParentAnnotation)); string[] parts = pSpace.Split(pDash.Matcher(cl.Word()).ReplaceAll(" - ")); int lengthAccum = 0; foreach (string part in parts) { CoreLabel newLabel = new CoreLabel(cl); newLabel.SetWord(part); newLabel.SetValue(part); newLabel.SetBeginPosition(cl.BeginPosition() + lengthAccum); newLabel.SetEndPosition(cl.BeginPosition() + lengthAccum + part.Length); newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part); compoundBuffer.Add(newLabel); lengthAccum += part.Length; } return(compoundBuffer.Remove(0)); }
/// <summary> /// Handles verbs with attached suffixes, marked by the lexer: /// Escribamosela => Escribamo + se + la => escribamos + se + la /// Sentaos => senta + os => sentad + os /// Damelo => da + me + lo /// </summary> private CoreLabel ProcessVerb(CoreLabel cl) { cl.Remove(typeof(CoreAnnotations.ParentAnnotation)); SpanishVerbStripper.StrippedVerb stripped = verbStripper.SeparatePronouns(cl.Word()); if (stripped == null) { return(cl); } // Split the CoreLabel into separate labels, tracking changing begin + end // positions. int stemEnd = cl.BeginPosition() + stripped.GetOriginalStem().Length; int lengthRemoved = 0; foreach (string pronoun in stripped.GetPronouns()) { int beginOffset = stemEnd + lengthRemoved; compoundBuffer.Add(CopyCoreLabel(cl, pronoun, beginOffset)); lengthRemoved += pronoun.Length; } CoreLabel stem = CopyCoreLabel(cl, stripped.GetStem(), cl.BeginPosition(), stemEnd); stem.SetOriginalText(stripped.GetOriginalStem()); return(stem); }
/// <summary> /// Find the operators in this sentence, annotating the head word (only!) of each operator with the /// <see cref="OperatorAnnotation"/> /// . /// </summary> /// <param name="sentence"> /// As in /// <see cref="DoOneSentence(Edu.Stanford.Nlp.Pipeline.Annotation, Edu.Stanford.Nlp.Util.ICoreMap)"/> /// </param> private void AnnotateOperators(ICoreMap sentence) { SemanticGraph tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); if (tree == null) { tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); } foreach (SemgrexPattern pattern in Patterns) { SemgrexMatcher matcher = pattern.Matcher(tree); while (matcher.Find()) { // Get terms IndexedWord properSubject = matcher.GetNode("Subject"); IndexedWord quantifier; IndexedWord subject; bool namedEntityQuantifier = false; if (properSubject != null) { quantifier = subject = properSubject; namedEntityQuantifier = true; } else { quantifier = matcher.GetNode("quantifier"); subject = matcher.GetNode("subject"); } IndexedWord @object = matcher.GetNode("object"); // Validate quantifier // At the end of this Optional <Triple <Operator, int, int> > quantifierInfo; if (namedEntityQuantifier) { // named entities have the "all" semantics by default. if (!neQuantifiers) { continue; } quantifierInfo = Optional.Of(Triple.MakeTriple(Operator.ImplicitNamedEntity, quantifier.Index(), quantifier.Index())); } else { // note: empty quantifier span given // find the quantifier, and return some info about it. quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, @object == null || subject == null); } // Awful hacks to regularize the subject of things like "one of" and "there are" // (fix up 'there are') if ("be".Equals(subject == null ? null : subject.Lemma())) { bool hasExpl = false; IndexedWord newSubject = null; foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject)) { if ("nsubj".Equals(outgoingEdge.GetRelation().ToString())) { newSubject = outgoingEdge.GetDependent(); } else { if ("expl".Equals(outgoingEdge.GetRelation().ToString())) { hasExpl = true; } } } if (hasExpl) { subject = newSubject; } } // (fix up '$n$ of') if ("CD".Equals(subject == null ? null : subject.Tag())) { foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject)) { string rel = outgoingEdge.GetRelation().ToString(); if (rel.StartsWith("nmod")) { subject = outgoingEdge.GetDependent(); } } } // Set tokens if (quantifierInfo.IsPresent()) { // Compute span IndexedWord pivot = matcher.GetNode("pivot"); if (pivot == null) { pivot = @object; } OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, pivot, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), subject, namedEntityQuantifier, @object, tokens.Count); // Set annotation CoreLabel token = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[quantifier.Index() - 1]; OperatorSpec oldScope = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation)); if (oldScope == null || oldScope.QuantifierLength() < scope.QuantifierLength() || oldScope.instance != scope.instance) { token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope); } else { token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), OperatorSpec.Merge(oldScope, scope)); } } } } // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap. // In these cases, take the longer quantifier match. IList <OperatorSpec> quantifiers = new List <OperatorSpec>(); for (int i = 0; i < tokens.Count; ++i) { CoreLabel token = tokens[i]; OperatorSpec @operator; if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null) { if (i == 0 && @operator.instance == Operator.No && tokens.Count > 2 && "PRP".Equals(tokens[1].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)))) { // This is pragmatically not a negation -- ignore it // For example, "no I don't like candy" or "no you like cats" token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation)); } else { quantifiers.Add(@operator); } } } quantifiers.Sort(null); foreach (OperatorSpec quantifier_1 in quantifiers) { for (int i_1 = quantifier_1.quantifierBegin; i_1 < quantifier_1.quantifierEnd; ++i_1) { if (i_1 != quantifier_1.quantifierHead) { tokens[i_1].Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation)); } } } }