示例#1
0
        private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1);
            int endIdx   = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1);

            // no copula relation
            if (cop == null)
            {
                return(new IntPair(beginIdx, endIdx));
            }
            // if we have copula relation
            IList <IndexedWord> children = dep.GetChildList(headword);
            int copIdx = children.IndexOf(cop);

            if (copIdx + 1 < children.Count)
            {
                beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1);
            }
            else
            {
                beginIdx = headword.Index() - 1;
            }
            return(new IntPair(beginIdx, endIdx));
        }
示例#2
0
        /// <summary>Determine the person of "was".</summary>
        private static string WasPerson(SemanticGraph sg, IndexedWord word)
        {
            IndexedWord subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalSubject);

            if (subj == null)
            {
                subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalPassiveSubject);
            }
            if (subj != null)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(subj.Word(), "i"))
                {
                    /* "I" is the subject of "was". */
                    return("1");
                }
            }
            IndexedWord parent = sg.GetParent(word);

            if (parent == null)
            {
                return(subj != null ? "3" : null);
            }
            SemanticGraphEdge edge = sg.GetEdge(parent, word);

            if (edge == null)
            {
                return(subj != null ? "3" : null);
            }
            if (UniversalEnglishGrammaticalRelations.AuxModifier.Equals(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.AuxPassiveModifier.Equals(edge.GetRelation()))
            {
                return(WasPerson(sg, parent));
            }
            if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()))
            {
                /* Check if the subject of the head of a conjunction is "I". */
                return(WasPerson(sg, parent));
            }
            return("3");
        }
示例#3
0
        /// <summary>
        /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?)
        /// e.g., you are the person -&gt; return "the person"
        /// </summary>
        private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            int headwordIdx = headword.Index() - 1;
            IList <IndexedWord> children = dep.GetChildList(headword);
            //    if(children.size()==0) return new IntPair(headwordIdx, headwordIdx);    // the headword is the only word
            // check if we have copula relation
            IndexedWord cop      = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            int         startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1;
            // children which will be inside of NP
            IList <IndexedWord> insideNP = Generics.NewArrayList();

            for (int i = startIdx; i < children.Count; i++)
            {
                IndexedWord       child = children[i];
                SemanticGraphEdge edge  = dep.GetEdge(headword, child);
                if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct"))
                {
                    continue;
                }
                else
                {
                    // skip
                    insideNP.Add(child);
                }
            }
            if (insideNP.Count == 0)
            {
                return(new IntPair(headwordIdx, headwordIdx));
            }
            // the headword is the only word
            Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep);
            Pair <IndexedWord, IndexedWord> lastChildLeftRight  = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1);
            int endIdx   = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1);

            return(new IntPair(beginIdx, endIdx));
        }
示例#4
0
        private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            // pronoun
            if (headword.Tag().StartsWith("PRP"))
            {
                ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet);
                return;
            }
            // add NP mention
            IntPair npSpan   = GetNPSpan(headword, dep, sent);
            int     beginIdx = npSpan.Get(0);
            int     endIdx   = npSpan.Get(1) + 1;

            if (",".Equals(sent[endIdx - 1].Word()))
            {
                endIdx--;
            }
            // try not to have span that ends with ,
            if ("IN".Equals(sent[beginIdx].Tag()))
            {
                beginIdx++;
            }
            // try to remove first IN.
            AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            //
            // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above)
            //
            // to make sure we find the first conjunction
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);
                foreach (IndexedWord c in conjChildren)
                {
                    if (c.Index() < conjChild.Index())
                    {
                        conjChild = c;
                    }
                }
                IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep);
                for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--)
                {
                    if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,"))
                    {
                        if (headword.Index() - 1 < endIdxFirstElement)
                        {
                            AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
                        }
                        break;
                    }
                }
            }
        }