コード例 #1
0
        public virtual void TestCoreLabelSetWordBehavior()
        {
            CoreLabel foo = new CoreLabel();

            foo.Set(typeof(CoreAnnotations.TextAnnotation), "foo");
            foo.Set(typeof(CoreAnnotations.PartOfSpeechAnnotation), "B");
            foo.Set(typeof(CoreAnnotations.LemmaAnnotation), "fool");
            // Lemma gets removed with word
            ArrayCoreMap copy = new ArrayCoreMap(foo);

            NUnit.Framework.Assert.AreEqual(copy, foo);
            foo.SetWord("foo");
            NUnit.Framework.Assert.AreEqual(copy, foo);
            // same word set
            foo.SetWord("bar");
            NUnit.Framework.Assert.IsFalse(copy.Equals(foo));
            // lemma removed
            foo.SetWord("foo");
            NUnit.Framework.Assert.IsFalse(copy.Equals(foo));
            // still removed
            foo.Set(typeof(CoreAnnotations.LemmaAnnotation), "fool");
            NUnit.Framework.Assert.AreEqual(copy, foo);
            // back to normal
            // Hash code is consistent
            int hashCode = foo.GetHashCode();

            NUnit.Framework.Assert.AreEqual(copy.GetHashCode(), hashCode);
            foo.SetWord("bar");
            NUnit.Framework.Assert.IsFalse(hashCode == foo.GetHashCode());
            foo.SetWord("foo");
            NUnit.Framework.Assert.IsFalse(hashCode == foo.GetHashCode());
            // Hash code doesn't care between a value of null and the key not existing
            NUnit.Framework.Assert.IsTrue(foo.Lemma() == null);
            int lemmalessHashCode = foo.GetHashCode();

            foo.Remove(typeof(CoreAnnotations.LemmaAnnotation));
            NUnit.Framework.Assert.AreEqual(lemmalessHashCode, foo.GetHashCode());
            foo.SetLemma(null);
            NUnit.Framework.Assert.AreEqual(lemmalessHashCode, foo.GetHashCode());
            foo.SetLemma("fool");
            NUnit.Framework.Assert.AreEqual(hashCode, foo.GetHashCode());
            // Check equals
            foo.SetWord("bar");
            foo.SetWord("foo");
            ArrayCoreMap nulledCopy = new ArrayCoreMap(foo);

            NUnit.Framework.Assert.AreEqual(nulledCopy, foo);
            foo.Remove(typeof(CoreAnnotations.LemmaAnnotation));
            NUnit.Framework.Assert.AreEqual(nulledCopy, foo);
        }
コード例 #2
0
 public virtual void Annotate(Annotation annotation)
 {
     if (annotation.ContainsKey(typeof(CoreAnnotations.SentencesAnnotation)))
     {
         // TODO: parallelize
         IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
         foreach (ICoreMap sentence in sentences)
         {
             Tree binarized = sentence.Get(typeof(TreeCoreAnnotations.BinarizedTreeAnnotation));
             if (binarized == null)
             {
                 throw new AssertionError("Binarized sentences not built by parser");
             }
             Tree collapsedUnary             = transformer.TransformTree(binarized);
             SentimentCostAndGradient scorer = new SentimentCostAndGradient(model, null);
             scorer.ForwardPropagateTree(collapsedUnary);
             sentence.Set(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree), collapsedUnary);
             int sentiment = RNNCoreAnnotations.GetPredictedClass(collapsedUnary);
             sentence.Set(typeof(SentimentCoreAnnotations.SentimentClass), SentimentUtils.SentimentString(model, sentiment));
             Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
             if (tree != null)
             {
                 collapsedUnary.SetSpans();
                 // map the sentiment annotations onto the tree
                 IDictionary <IntPair, string> spanSentiment = Generics.NewHashMap();
                 foreach (Tree bt in collapsedUnary)
                 {
                     IntPair p       = bt.GetSpan();
                     int     sen     = RNNCoreAnnotations.GetPredictedClass(bt);
                     string  sentStr = SentimentUtils.SentimentString(model, sen);
                     if (!spanSentiment.Contains(p))
                     {
                         // we'll take the first = highest one discovered
                         spanSentiment[p] = sentStr;
                     }
                 }
                 if (((CoreLabel)tree.Label()).ContainsKey(typeof(CoreAnnotations.SpanAnnotation)))
                 {
                     throw new InvalidOperationException("This code assumes you don't have SpanAnnotation");
                 }
                 tree.SetSpans();
                 foreach (Tree t in tree)
                 {
                     IntPair p   = t.GetSpan();
                     string  str = spanSentiment[p];
                     if (str != null)
                     {
                         CoreLabel cl = (CoreLabel)t.Label();
                         cl.Set(typeof(SentimentCoreAnnotations.SentimentClass), str);
                         cl.Remove(typeof(CoreAnnotations.SpanAnnotation));
                     }
                 }
             }
         }
     }
     else
     {
         throw new Exception("unable to find sentences in: " + annotation);
     }
 }
コード例 #3
0
        /// <summary>
        /// Handles contractions like del and al, marked by the lexer
        /// del =&gt; de + l =&gt; de + el
        /// al =&gt; a + l =&gt; a + el
        /// con[mts]igo =&gt; con + [mts]i
        /// </summary>
        private CoreLabel ProcessContraction(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string word = cl.Word();
            string first;
            string second;
            int    secondOffset = 0;
            int    secondLength = 0;
            string lowered      = word.ToLower();

            switch (lowered)
            {
            case "del":
            case "al":
            {
                first = Sharpen.Runtime.Substring(word, 0, lowered.Length - 1);
                char lastChar = word[lowered.Length - 1];
                if (char.IsLowerCase(lastChar))
                {
                    second = "el";
                }
                else
                {
                    second = "EL";
                }
                secondOffset = 1;
                secondLength = lowered.Length - 1;
                break;
            }

            case "conmigo":
            case "consigo":
            {
                first        = Sharpen.Runtime.Substring(word, 0, 3);
                second       = word[3] + "í";
                secondOffset = 3;
                secondLength = 4;
                break;
            }

            case "contigo":
            {
                first        = Sharpen.Runtime.Substring(word, 0, 3);
                second       = Sharpen.Runtime.Substring(word, 3, 5);
                secondOffset = 3;
                secondLength = 4;
                break;
            }

            default:
            {
                throw new ArgumentException("Invalid contraction provided to processContraction");
            }
            }
            int secondStart = cl.BeginPosition() + secondOffset;
            int secondEnd   = secondStart + secondLength;

            compoundBuffer.Add(CopyCoreLabel(cl, second, secondStart, secondEnd));
            return(CopyCoreLabel(cl, first, cl.BeginPosition(), secondStart));
        }
コード例 #4
0
        /// <summary>Splits a contraction marked by the lexer.</summary>
        /// <remarks>
        /// Splits a contraction marked by the lexer.
        /// au =&gt; a + u =&gt; à + le
        /// aux =&gt; a + ux =&gt; à + les
        /// des =&gt; de + s =&gt; de + les
        /// du =&gt; d + u =&gt; de + le
        /// </remarks>
        private CoreLabel ProcessContraction(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string word = cl.Word();
            string first;
            string second;
            int    secondOffset = 0;
            int    secondLength = 0;
            string lowered      = word.ToLower();

            switch (lowered)
            {
            case "au":
            {
                first        = "à";
                second       = "le";
                secondOffset = 1;
                secondLength = 1;
                break;
            }

            case "aux":
            {
                first        = "à";
                second       = "les";
                secondOffset = 1;
                secondLength = 2;
                break;
            }

            case "du":
            {
                first        = "de";
                second       = "le";
                secondOffset = 1;
                secondLength = 1;
                break;
            }

            default:
            {
                throw new ArgumentException("Invalid contraction provided to processContraction");
            }
            }
            int secondStart = cl.BeginPosition() + secondOffset;
            int secondEnd   = secondStart + secondLength;

            compoundBuffer.Add(CopyCoreLabel(cl, second, secondStart, secondEnd));
            return(CopyCoreLabel(cl, first, cl.BeginPosition(), secondStart));
        }
コード例 #5
0
 /// <summary>Splits a compound marked by the lexer.</summary>
 private CoreLabel ProcessCompound(CoreLabel cl)
 {
     cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
     string[] parts = cl.Word().ReplaceAll("-", " - ").Split("\\s+");
     foreach (string part in parts)
     {
         CoreLabel newLabel = new CoreLabel(cl);
         newLabel.SetWord(part);
         newLabel.SetValue(part);
         newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part);
         compoundBuffer.Add(newLabel);
     }
     return(compoundBuffer.Remove(0));
 }
コード例 #6
0
        /// <summary>Splits a compound marked by the lexer.</summary>
        private CoreLabel ProcessCompound(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            string[] parts       = pSpace.Split(pDash.Matcher(cl.Word()).ReplaceAll(" - "));
            int      lengthAccum = 0;

            foreach (string part in parts)
            {
                CoreLabel newLabel = new CoreLabel(cl);
                newLabel.SetWord(part);
                newLabel.SetValue(part);
                newLabel.SetBeginPosition(cl.BeginPosition() + lengthAccum);
                newLabel.SetEndPosition(cl.BeginPosition() + lengthAccum + part.Length);
                newLabel.Set(typeof(CoreAnnotations.OriginalTextAnnotation), part);
                compoundBuffer.Add(newLabel);
                lengthAccum += part.Length;
            }
            return(compoundBuffer.Remove(0));
        }
コード例 #7
0
        /// <summary>
        /// Handles verbs with attached suffixes, marked by the lexer:
        /// Escribamosela =&gt; Escribamo + se + la =&gt; escribamos + se + la
        /// Sentaos =&gt; senta + os =&gt; sentad + os
        /// Damelo =&gt; da + me + lo
        /// </summary>
        private CoreLabel ProcessVerb(CoreLabel cl)
        {
            cl.Remove(typeof(CoreAnnotations.ParentAnnotation));
            SpanishVerbStripper.StrippedVerb stripped = verbStripper.SeparatePronouns(cl.Word());
            if (stripped == null)
            {
                return(cl);
            }
            // Split the CoreLabel into separate labels, tracking changing begin + end
            // positions.
            int stemEnd       = cl.BeginPosition() + stripped.GetOriginalStem().Length;
            int lengthRemoved = 0;

            foreach (string pronoun in stripped.GetPronouns())
            {
                int beginOffset = stemEnd + lengthRemoved;
                compoundBuffer.Add(CopyCoreLabel(cl, pronoun, beginOffset));
                lengthRemoved += pronoun.Length;
            }
            CoreLabel stem = CopyCoreLabel(cl, stripped.GetStem(), cl.BeginPosition(), stemEnd);

            stem.SetOriginalText(stripped.GetOriginalStem());
            return(stem);
        }
コード例 #8
0
        /// <summary>
        /// Find the operators in this sentence, annotating the head word (only!) of each operator with the
        /// <see cref="OperatorAnnotation"/>
        /// .
        /// </summary>
        /// <param name="sentence">
        /// As in
        /// <see cref="DoOneSentence(Edu.Stanford.Nlp.Pipeline.Annotation, Edu.Stanford.Nlp.Util.ICoreMap)"/>
        /// </param>
        private void AnnotateOperators(ICoreMap sentence)
        {
            SemanticGraph     tree   = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));

            if (tree == null)
            {
                tree = sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            }
            foreach (SemgrexPattern pattern in Patterns)
            {
                SemgrexMatcher matcher = pattern.Matcher(tree);
                while (matcher.Find())
                {
                    // Get terms
                    IndexedWord properSubject = matcher.GetNode("Subject");
                    IndexedWord quantifier;
                    IndexedWord subject;
                    bool        namedEntityQuantifier = false;
                    if (properSubject != null)
                    {
                        quantifier            = subject = properSubject;
                        namedEntityQuantifier = true;
                    }
                    else
                    {
                        quantifier = matcher.GetNode("quantifier");
                        subject    = matcher.GetNode("subject");
                    }
                    IndexedWord @object = matcher.GetNode("object");
                    // Validate quantifier
                    // At the end of this
                    Optional <Triple <Operator, int, int> > quantifierInfo;
                    if (namedEntityQuantifier)
                    {
                        // named entities have the "all" semantics by default.
                        if (!neQuantifiers)
                        {
                            continue;
                        }
                        quantifierInfo = Optional.Of(Triple.MakeTriple(Operator.ImplicitNamedEntity, quantifier.Index(), quantifier.Index()));
                    }
                    else
                    {
                        // note: empty quantifier span given
                        // find the quantifier, and return some info about it.
                        quantifierInfo = ValidateQuantifierByHead(sentence, quantifier, @object == null || subject == null);
                    }
                    // Awful hacks to regularize the subject of things like "one of" and "there are"
                    // (fix up 'there are')
                    if ("be".Equals(subject == null ? null : subject.Lemma()))
                    {
                        bool        hasExpl    = false;
                        IndexedWord newSubject = null;
                        foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject))
                        {
                            if ("nsubj".Equals(outgoingEdge.GetRelation().ToString()))
                            {
                                newSubject = outgoingEdge.GetDependent();
                            }
                            else
                            {
                                if ("expl".Equals(outgoingEdge.GetRelation().ToString()))
                                {
                                    hasExpl = true;
                                }
                            }
                        }
                        if (hasExpl)
                        {
                            subject = newSubject;
                        }
                    }
                    // (fix up '$n$ of')
                    if ("CD".Equals(subject == null ? null : subject.Tag()))
                    {
                        foreach (SemanticGraphEdge outgoingEdge in tree.OutgoingEdgeIterable(subject))
                        {
                            string rel = outgoingEdge.GetRelation().ToString();
                            if (rel.StartsWith("nmod"))
                            {
                                subject = outgoingEdge.GetDependent();
                            }
                        }
                    }
                    // Set tokens
                    if (quantifierInfo.IsPresent())
                    {
                        // Compute span
                        IndexedWord pivot = matcher.GetNode("pivot");
                        if (pivot == null)
                        {
                            pivot = @object;
                        }
                        OperatorSpec scope = ComputeScope(tree, quantifierInfo.Get().first, pivot, Pair.MakePair(quantifierInfo.Get().second, quantifierInfo.Get().third), subject, namedEntityQuantifier, @object, tokens.Count);
                        // Set annotation
                        CoreLabel    token    = sentence.Get(typeof(CoreAnnotations.TokensAnnotation))[quantifier.Index() - 1];
                        OperatorSpec oldScope = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                        if (oldScope == null || oldScope.QuantifierLength() < scope.QuantifierLength() || oldScope.instance != scope.instance)
                        {
                            token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), scope);
                        }
                        else
                        {
                            token.Set(typeof(NaturalLogicAnnotations.OperatorAnnotation), OperatorSpec.Merge(oldScope, scope));
                        }
                    }
                }
            }
            // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap.
            // In these cases, take the longer quantifier match.
            IList <OperatorSpec> quantifiers = new List <OperatorSpec>();

            for (int i = 0; i < tokens.Count; ++i)
            {
                CoreLabel    token = tokens[i];
                OperatorSpec @operator;
                if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null)
                {
                    if (i == 0 && @operator.instance == Operator.No && tokens.Count > 2 && "PRP".Equals(tokens[1].Get(typeof(CoreAnnotations.PartOfSpeechAnnotation))))
                    {
                        // This is pragmatically not a negation -- ignore it
                        // For example, "no I don't like candy" or "no you like cats"
                        token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                    }
                    else
                    {
                        quantifiers.Add(@operator);
                    }
                }
            }
            quantifiers.Sort(null);
            foreach (OperatorSpec quantifier_1 in quantifiers)
            {
                for (int i_1 = quantifier_1.quantifierBegin; i_1 < quantifier_1.quantifierEnd; ++i_1)
                {
                    if (i_1 != quantifier_1.quantifierHead)
                    {
                        tokens[i_1].Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                    }
                }
            }
        }