コード例 #1
0
        private void ExtractNPorPRPFromDependency(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel>   sent       = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph       basic      = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            IList <IndexedWord> nounsOrPrp = basic.GetAllNodesByPartOfSpeechPattern("N.*|PRP.*|DT");
            // DT is for "this, these, etc"
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            foreach (IndexedWord w in nounsOrPrp)
            {
                SemanticGraphEdge   edge = basic.GetEdge(basic.GetParent(w), w);
                GrammaticalRelation rel  = null;
                string shortname         = "root";
                // if edge is null, it's root
                if (edge != null)
                {
                    rel       = edge.GetRelation();
                    shortname = rel.GetShortName();
                }
                // TODO: what to remove? remove more?
                if (shortname.Matches("det|compound"))
                {
                    //        // for debug  ---------------
                    //        Tree t = tree.getLeaves().get(w.index()-1);
                    //        for(Tree p : tree.pathNodeToNode(t, tree)) {
                    //          if(p.label().value().equals("NP")) {
                    //            HeadFinder headFinder = new SemanticHeadFinder();
                    //            Tree head = headFinder.determineHead(p);
                    //            if(head == t.parent(tree)) {
                    //              log.info();
                    //            }
                    //            break;
                    //          }
                    //        } // for debug -------------
                    continue;
                }
                else
                {
                    ExtractMentionForHeadword(w, basic, s, mentions, mentionSpanSet, namedEntitySpanSet);
                }
            }
        }
コード例 #2
0
        //using quote-removed depparses
        public virtual void DependencyParses(Annotation doc)
        {
            IList <ICoreMap>  quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <CoreLabel> tokens    = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                //search for mentions in the first run
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string>           names       = namesAndNameIndices.first;
                List <Pair <int, int> > nameIndices = namesAndNameIndices.second;
                SemanticGraph           graph       = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
                SemgrexMatcher          matcher     = subjVerbPattern.Matcher(graph);
                IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >();
                //TODO: check and see if this is necessary
                while (matcher.Find())
                {
                    IndexedWord subj = matcher.GetNode("SUBJ");
                    IndexedWord verb = matcher.GetNode("VERB");
                    subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb));
                }
                IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*");
                foreach (IndexedWord iw in vbs)
                {
                    // does it have an nsubj child?
                    ICollection <IndexedWord> children = graph.GetChildren(iw);
                    IList <IndexedWord>       deps     = Generics.NewArrayList();
                    IndexedWord nsubj = null;
                    foreach (IndexedWord child in children)
                    {
                        SemanticGraphEdge sge = graph.GetEdge(iw, child);
                        if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB"))
                        {
                            deps.Add(child);
                        }
                        else
                        {
                            if (sge.GetRelation().GetShortName().Equals("nsubj"))
                            {
                                nsubj = child;
                            }
                        }
                    }
                    if (nsubj != null)
                    {
                        foreach (IndexedWord dep in deps)
                        {
                            subjVerbPairs.Add(new Pair(nsubj, dep));
                        }
                    }
                }
                //look for a speech verb
                foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs)
                {
                    IndexedWord verb = SVPair.second;
                    IndexedWord subj = SVPair.first;
                    //check if subj and verb outside of quote
                    int verbTokPos = TokenToLocation(verb.BackingLabel());
                    int subjTokPos = TokenToLocation(verb.BackingLabel());
                    if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma()))
                    {
                        if (subj.Tag().Equals("NNP"))
                        {
                            int startChar = subj.BeginPosition();
                            for (int i = 0; i < names.Count; i++)
                            {
                                Pair <int, int> nameIndex = nameIndices[i];
                                //avoid names that don't actually exist in
                                if (RangeContainsCharIndex(nameIndex, startChar))
                                {
                                    FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name);
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (subj.Tag().Equals("PRP"))
                            {
                                int loc = TokenToLocation(subj.BackingLabel());
                                FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun);
                                break;
                            }
                            else
                            {
                                if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word()))
                                {
                                    int loc = TokenToLocation(subj.BackingLabel());
                                    FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun);
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }