Пример #1
0
        private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1);
            int endIdx   = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1);

            // no copula relation
            if (cop == null)
            {
                return(new IntPair(beginIdx, endIdx));
            }
            // if we have copula relation
            IList <IndexedWord> children = dep.GetChildList(headword);
            int copIdx = children.IndexOf(cop);

            if (copIdx + 1 < children.Count)
            {
                beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1);
            }
            else
            {
                beginIdx = headword.Index() - 1;
            }
            return(new IntPair(beginIdx, endIdx));
        }
Пример #2
0
        /// <summary>TODO: figure out how to specify where in the sentence this node goes.</summary>
        /// <remarks>
        /// TODO: figure out how to specify where in the sentence this node goes.
        /// TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel.
        /// TODO: bombproof if this gov, dep, and reln already exist.
        /// </remarks>
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord govNode  = sm.GetNode(govNodeName);
            IndexedWord newNode  = new IndexedWord(newNodePrototype);
            int         newIndex = SemanticGraphUtils.LeftMostChildVertice(govNode, sg).Index();

            // cheap En-specific hack for placing copula (beginning of governing phrase)
            newNode.SetDocID(govNode.DocID());
            newNode.SetIndex(newIndex);
            newNode.SetSentIndex(govNode.SentIndex());
            sg.AddVertex(newNode);
            sg.AddEdge(govNode, newNode, relation, weight, false);
        }
Пример #3
0
        private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            // pronoun
            if (headword.Tag().StartsWith("PRP"))
            {
                ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet);
                return;
            }
            // add NP mention
            IntPair npSpan   = GetNPSpan(headword, dep, sent);
            int     beginIdx = npSpan.Get(0);
            int     endIdx   = npSpan.Get(1) + 1;

            if (",".Equals(sent[endIdx - 1].Word()))
            {
                endIdx--;
            }
            // try not to have span that ends with ,
            if ("IN".Equals(sent[beginIdx].Tag()))
            {
                beginIdx++;
            }
            // try to remove first IN.
            AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            //
            // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above)
            //
            // to make sure we find the first conjunction
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);
                foreach (IndexedWord c in conjChildren)
                {
                    if (c.Index() < conjChild.Index())
                    {
                        conjChild = c;
                    }
                }
                IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep);
                for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--)
                {
                    if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,"))
                    {
                        if (headword.Index() - 1 < endIdxFirstElement)
                        {
                            AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
                        }
                        break;
                    }
                }
            }
        }