예제 #1
0
        private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            int beginIdx = headword.Index() - 1;
            int endIdx   = headword.Index();

            // handle "you all", "they both" etc
            if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both"))
            {
                IndexedWord       c    = dep.GetNodeByIndex(headword.Index() + 1);
                SemanticGraphEdge edge = dep.GetEdge(headword, c);
                if (edge != null)
                {
                    endIdx++;
                }
            }
            IntPair mSpan = new IntPair(beginIdx, endIdx);

            if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet)))
            {
                int     dummyMentionId = -1;
                Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)));
                m.headIndex  = headword.Index() - 1;
                m.headWord   = sent[m.headIndex];
                m.headString = m.headWord.Word().ToLower(Locale.English);
                mentions.Add(m);
                mentionSpanSet.Add(mSpan);
            }
            // when pronoun is a part of conjunction (e.g., you and I)
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IntPair npSpan = GetNPSpan(headword, dep, sent);
                beginIdx = npSpan.Get(0);
                endIdx   = npSpan.Get(1) + 1;
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            }
        }
예제 #2
0
 /// <summary>
 /// Returns true if
 /// <paramref name="word"/>
 /// has an infinitival "to" attached to it.
 /// </summary>
 private static bool HasTo(SemanticGraph sg, IndexedWord word)
 {
     /* Check for infinitival to. */
     if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.Marker))
     {
         foreach (IndexedWord marker in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.Marker))
         {
             if (Sharpen.Runtime.EqualsIgnoreCase(marker.Value(), "to"))
             {
                 return(true);
             }
         }
     }
     return(false);
 }
예제 #3
0
        /// <summary>
        /// Returns true if
        /// <paramref name="word"/>
        /// has an inflection of "be" as an auxiliary.
        /// </summary>
        private static bool HasBeAux(SemanticGraph sg, IndexedWord word)
        {
            foreach (IndexedWord aux in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier))
            {
                if (aux.Value().Matches(BeRegex))
                {
                    return(true);
                }
            }
            /* Check if head of conjunction has an auxiliary in case the word is part of a conjunction */
            IndexedWord gov = sg.GetParent(word);

            if (gov != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(gov, word);
                if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()))
                {
                    return(HasBeAux(sg, gov));
                }
            }
            return(false);
        }
예제 #4
0
        private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            // pronoun
            if (headword.Tag().StartsWith("PRP"))
            {
                ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet);
                return;
            }
            // add NP mention
            IntPair npSpan   = GetNPSpan(headword, dep, sent);
            int     beginIdx = npSpan.Get(0);
            int     endIdx   = npSpan.Get(1) + 1;

            if (",".Equals(sent[endIdx - 1].Word()))
            {
                endIdx--;
            }
            // try not to have span that ends with ,
            if ("IN".Equals(sent[beginIdx].Tag()))
            {
                beginIdx++;
            }
            // try to remove first IN.
            AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            //
            // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above)
            //
            // to make sure we find the first conjunction
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);
                foreach (IndexedWord c in conjChildren)
                {
                    if (c.Index() < conjChild.Index())
                    {
                        conjChild = c;
                    }
                }
                IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep);
                for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--)
                {
                    if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,"))
                    {
                        if (headword.Index() - 1 < endIdxFirstElement)
                        {
                            AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
                        }
                        break;
                    }
                }
            }
        }