Пример #1
0
        /// <summary>Determine the case of the pronoun "you" or "it".</summary>
        private static string PronounCase(SemanticGraph sg, IndexedWord word)
        {
            word = sg.GetNodeByIndex(word.Index());
            IndexedWord parent = sg.GetParent(word);

            if (parent != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(parent, word);
                if (edge != null)
                {
                    if (UniversalEnglishGrammaticalRelations.Object.IsAncestor(edge.GetRelation()))
                    {
                        /* "you" is an object. */
                        return("Acc");
                    }
                    else
                    {
                        if (UniversalEnglishGrammaticalRelations.NominalModifier.IsAncestor(edge.GetRelation()) || edge.GetRelation() == GrammaticalRelation.Root)
                        {
                            if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.CaseMarker))
                            {
                                /* "you" is the head of a prepositional phrase. */
                                return("Acc");
                            }
                        }
                    }
                }
            }
            return("Nom");
        }
Пример #2
0
        /// <summary>Extracts features from relative and interrogative pronouns.</summary>
        private static Dictionary <string, string> GetRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word)
        {
            Dictionary <string, string> features = new Dictionary <string, string>();

            if (word.Tag().StartsWith("W"))
            {
                bool        isRel  = false;
                IndexedWord parent = sg.GetParent(word);
                if (parent != null)
                {
                    IndexedWord parentParent = sg.GetParent(parent);
                    if (parentParent != null)
                    {
                        SemanticGraphEdge edge = sg.GetEdge(parentParent, parent);
                        isRel = edge.GetRelation().Equals(UniversalEnglishGrammaticalRelations.RelativeClauseModifier);
                    }
                }
                if (isRel)
                {
                    features["PronType"] = "Rel";
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "that"))
                    {
                        features["PronType"] = "Dem";
                    }
                    else
                    {
                        features["PronType"] = "Int";
                    }
                }
            }
            return(features);
        }
Пример #3
0
        private static Dictionary <string, string> GetGraphFeatures(SemanticGraph sg, IndexedWord word)
        {
            Dictionary <string, string> features = new Dictionary <string, string>();

            /* Determine the case of "you". */
            if (word.Tag().Equals("PRP") && (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "you") || Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "it")))
            {
                features["Case"] = PronounCase(sg, word);
            }
            /* Determine the person of "was". */
            if (word.Tag().Equals("VBD") && Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "was"))
            {
                string person = WasPerson(sg, word);
                if (person != null)
                {
                    features["Person"] = person;
                }
            }
            /* Determine features of relative and interrogative pronouns. */
            features.PutAll(GetRelAndIntPronFeatures(sg, word));
            /* Determine features of gerunds and present participles. */
            if (word.Tag().Equals("VBG"))
            {
                if (HasBeAux(sg, word))
                {
                    features["VerbForm"] = "Part";
                    features["Tense"]    = "Pres";
                }
                else
                {
                    features["VerbForm"] = "Ger";
                }
            }
            /* Determine whether reflexive pronoun is reflexive or intensive. */
            if (word.Value().Matches(SelfRegex) && word.Tag().Equals("PRP"))
            {
                IndexedWord parent = sg.GetParent(word);
                if (parent != null)
                {
                    SemanticGraphEdge edge = sg.GetEdge(parent, word);
                    if (edge.GetRelation() != UniversalEnglishGrammaticalRelations.NpAdverbialModifier)
                    {
                        features["Case"]   = "Acc";
                        features["Reflex"] = "Yes";
                    }
                }
            }
            /* Voice feature. */
            if (word.Tag().Equals("VBN"))
            {
                if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxPassiveModifier))
                {
                    features["Voice"] = "Pass";
                }
            }
            return(features);
        }
Пример #4
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            string            relation = sm.GetRelnString(edgeName);
            IndexedWord       govNode  = GetNamedNode(govName, sm);
            IndexedWord       depNode  = GetNamedNode(depName, sm);
            SemanticGraphEdge edge     = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation));

            if (edge != null)
            {
                sg.RemoveEdge(edge);
            }
        }
Пример #5
0
        private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            int beginIdx = headword.Index() - 1;
            int endIdx   = headword.Index();

            // handle "you all", "they both" etc
            if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both"))
            {
                IndexedWord       c    = dep.GetNodeByIndex(headword.Index() + 1);
                SemanticGraphEdge edge = dep.GetEdge(headword, c);
                if (edge != null)
                {
                    endIdx++;
                }
            }
            IntPair mSpan = new IntPair(beginIdx, endIdx);

            if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet)))
            {
                int     dummyMentionId = -1;
                Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)));
                m.headIndex  = headword.Index() - 1;
                m.headWord   = sent[m.headIndex];
                m.headString = m.headWord.Word().ToLower(Locale.English);
                mentions.Add(m);
                mentionSpanSet.Add(mSpan);
            }
            // when pronoun is a part of conjunction (e.g., you and I)
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IntPair npSpan = GetNPSpan(headword, dep, sent);
                beginIdx = npSpan.Get(0);
                endIdx   = npSpan.Get(1) + 1;
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            }
        }
Пример #6
0
        /// <summary>
        /// Returns true if
        /// <paramref name="word"/>
        /// has an auxiliary verb attached to it.
        /// </summary>
        private static bool HasAux(SemanticGraph sg, IndexedWord word)
        {
            if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier))
            {
                return(true);
            }
            IndexedWord gov = sg.GetParent(word);

            if (gov != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(gov, word);
                if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.Copula.Equals(edge.GetRelation()))
                {
                    return(HasAux(sg, gov));
                }
            }
            return(false);
        }
Пример #7
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            bool        govWild = govName.Equals(WildcardNode);
            bool        depWild = depName.Equals(WildcardNode);
            IndexedWord govNode = GetNamedNode(govName, sm);
            IndexedWord depNode = GetNamedNode(depName, sm);

            if (govNode != null && depNode != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, relation);
                if (edge != null)
                {
                    bool successFlag = sg.RemoveEdge(edge);
                }
            }
            else
            {
                if (depNode != null && govWild)
                {
                    // dep known, wildcard gov
                    foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(depNode))
                    {
                        if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge))
                        {
                            sg.RemoveEdge(edge);
                        }
                    }
                }
                else
                {
                    if (govNode != null && depWild)
                    {
                        // gov known, wildcard dep
                        foreach (SemanticGraphEdge edge in sg.OutgoingEdgeIterable(govNode))
                        {
                            if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge))
                            {
                                sg.RemoveEdge(edge);
                            }
                        }
                    }
                }
            }
        }
Пример #8
0
        private void ExtractNPorPRPFromDependency(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel>   sent       = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph       basic      = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            IList <IndexedWord> nounsOrPrp = basic.GetAllNodesByPartOfSpeechPattern("N.*|PRP.*|DT");
            // DT is for "this, these, etc"
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            foreach (IndexedWord w in nounsOrPrp)
            {
                SemanticGraphEdge   edge = basic.GetEdge(basic.GetParent(w), w);
                GrammaticalRelation rel  = null;
                string shortname         = "root";
                // if edge is null, it's root
                if (edge != null)
                {
                    rel       = edge.GetRelation();
                    shortname = rel.GetShortName();
                }
                // TODO: what to remove? remove more?
                if (shortname.Matches("det|compound"))
                {
                    //        // for debug  ---------------
                    //        Tree t = tree.getLeaves().get(w.index()-1);
                    //        for(Tree p : tree.pathNodeToNode(t, tree)) {
                    //          if(p.label().value().equals("NP")) {
                    //            HeadFinder headFinder = new SemanticHeadFinder();
                    //            Tree head = headFinder.determineHead(p);
                    //            if(head == t.parent(tree)) {
                    //              log.info();
                    //            }
                    //            break;
                    //          }
                    //        } // for debug -------------
                    continue;
                }
                else
                {
                    ExtractMentionForHeadword(w, basic, s, mentions, mentionSpanSet, namedEntitySpanSet);
                }
            }
        }
Пример #9
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord       govNode      = GetNamedNode(govName, sm);
            IndexedWord       depNode      = GetNamedNode(depName, sm);
            SemanticGraphEdge existingEdge = sg.GetEdge(govNode, depNode, relation);

            if (existingEdge == null)
            {
                // When adding the edge, check to see if the gov/dep nodes are presently in the graph.
                //
                if (!sg.ContainsVertex(govNode))
                {
                    sg.AddVertex(govNode);
                }
                if (!sg.ContainsVertex(depNode))
                {
                    sg.AddVertex(depNode);
                }
                sg.AddEdge(govNode, depNode, relation, weight, false);
            }
        }
Пример #10
0
        /// <summary>
        /// Returns true if
        /// <paramref name="word"/>
        /// has an inflection of "be" as an auxiliary.
        /// </summary>
        private static bool HasBeAux(SemanticGraph sg, IndexedWord word)
        {
            foreach (IndexedWord aux in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier))
            {
                if (aux.Value().Matches(BeRegex))
                {
                    return(true);
                }
            }
            /* Check if head of conjunction has an auxiliary in case the word is part of a conjunction */
            IndexedWord gov = sg.GetParent(word);

            if (gov != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(gov, word);
                if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()))
                {
                    return(HasBeAux(sg, gov));
                }
            }
            return(false);
        }
Пример #11
0
        /// <summary>Determine the person of "was".</summary>
        private static string WasPerson(SemanticGraph sg, IndexedWord word)
        {
            IndexedWord subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalSubject);

            if (subj == null)
            {
                subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalPassiveSubject);
            }
            if (subj != null)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(subj.Word(), "i"))
                {
                    /* "I" is the subject of "was". */
                    return("1");
                }
            }
            IndexedWord parent = sg.GetParent(word);

            if (parent == null)
            {
                return(subj != null ? "3" : null);
            }
            SemanticGraphEdge edge = sg.GetEdge(parent, word);

            if (edge == null)
            {
                return(subj != null ? "3" : null);
            }
            if (UniversalEnglishGrammaticalRelations.AuxModifier.Equals(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.AuxPassiveModifier.Equals(edge.GetRelation()))
            {
                return(WasPerson(sg, parent));
            }
            if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()))
            {
                /* Check if the subject of the head of a conjunction is "I". */
                return(WasPerson(sg, parent));
            }
            return("3");
        }
Пример #12
0
        /// <summary>
        /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?)
        /// e.g., you are the person -&gt; return "the person"
        /// </summary>
        private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            int headwordIdx = headword.Index() - 1;
            IList <IndexedWord> children = dep.GetChildList(headword);
            //    if(children.size()==0) return new IntPair(headwordIdx, headwordIdx);    // the headword is the only word
            // check if we have copula relation
            IndexedWord cop      = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            int         startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1;
            // children which will be inside of NP
            IList <IndexedWord> insideNP = Generics.NewArrayList();

            for (int i = startIdx; i < children.Count; i++)
            {
                IndexedWord       child = children[i];
                SemanticGraphEdge edge  = dep.GetEdge(headword, child);
                if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct"))
                {
                    continue;
                }
                else
                {
                    // skip
                    insideNP.Add(child);
                }
            }
            if (insideNP.Count == 0)
            {
                return(new IntPair(headwordIdx, headwordIdx));
            }
            // the headword is the only word
            Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep);
            Pair <IndexedWord, IndexedWord> lastChildLeftRight  = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1);
            int endIdx   = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1);

            return(new IntPair(beginIdx, endIdx));
        }
Пример #13
0
        //using quote-removed depparses
        public virtual void DependencyParses(Annotation doc)
        {
            IList <ICoreMap>  quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <CoreLabel> tokens    = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                //search for mentions in the first run
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string>           names       = namesAndNameIndices.first;
                List <Pair <int, int> > nameIndices = namesAndNameIndices.second;
                SemanticGraph           graph       = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
                SemgrexMatcher          matcher     = subjVerbPattern.Matcher(graph);
                IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >();
                //TODO: check and see if this is necessary
                while (matcher.Find())
                {
                    IndexedWord subj = matcher.GetNode("SUBJ");
                    IndexedWord verb = matcher.GetNode("VERB");
                    subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb));
                }
                IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*");
                foreach (IndexedWord iw in vbs)
                {
                    // does it have an nsubj child?
                    ICollection <IndexedWord> children = graph.GetChildren(iw);
                    IList <IndexedWord>       deps     = Generics.NewArrayList();
                    IndexedWord nsubj = null;
                    foreach (IndexedWord child in children)
                    {
                        SemanticGraphEdge sge = graph.GetEdge(iw, child);
                        if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB"))
                        {
                            deps.Add(child);
                        }
                        else
                        {
                            if (sge.GetRelation().GetShortName().Equals("nsubj"))
                            {
                                nsubj = child;
                            }
                        }
                    }
                    if (nsubj != null)
                    {
                        foreach (IndexedWord dep in deps)
                        {
                            subjVerbPairs.Add(new Pair(nsubj, dep));
                        }
                    }
                }
                //look for a speech verb
                foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs)
                {
                    IndexedWord verb = SVPair.second;
                    IndexedWord subj = SVPair.first;
                    //check if subj and verb outside of quote
                    int verbTokPos = TokenToLocation(verb.BackingLabel());
                    int subjTokPos = TokenToLocation(verb.BackingLabel());
                    if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma()))
                    {
                        if (subj.Tag().Equals("NNP"))
                        {
                            int startChar = subj.BeginPosition();
                            for (int i = 0; i < names.Count; i++)
                            {
                                Pair <int, int> nameIndex = nameIndices[i];
                                //avoid names that don't actually exist in
                                if (RangeContainsCharIndex(nameIndex, startChar))
                                {
                                    FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name);
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (subj.Tag().Equals("PRP"))
                            {
                                int loc = TokenToLocation(subj.BackingLabel());
                                FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun);
                                break;
                            }
                            else
                            {
                                if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word()))
                                {
                                    int loc = TokenToLocation(subj.BackingLabel());
                                    FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun);
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
Пример #14
0
        public virtual string PrintSemanticGraph(SemanticGraph sg, bool unescapeParenthesis)
        {
            bool          isTree = SemanticGraphUtils.IsTree(sg);
            StringBuilder sb     = new StringBuilder();

            /* Print comments. */
            foreach (string comment in sg.GetComments())
            {
                sb.Append(comment).Append("\n");
            }
            foreach (IndexedWord token in sg.VertexListSorted())
            {
                /* Check for multiword tokens. */
                if (token.ContainsKey(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation)))
                {
                    IntPair tokenSpan = token.Get(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation));
                    if (tokenSpan.GetSource() == token.Index())
                    {
                        string range = string.Format("%d-%d", tokenSpan.GetSource(), tokenSpan.GetTarget());
                        sb.Append(string.Format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.OriginalText()));
                    }
                }
                /* Try to find main governor and additional dependencies. */
                string govIdx = null;
                GrammaticalRelation         reln = null;
                Dictionary <string, string> enhancedDependencies = new Dictionary <string, string>();
                foreach (IndexedWord parent in sg.GetParents(token))
                {
                    SemanticGraphEdge edge = sg.GetEdge(parent, token);
                    if (govIdx == null && !edge.IsExtra())
                    {
                        govIdx = parent.ToCopyIndex();
                        reln   = edge.GetRelation();
                    }
                    enhancedDependencies[parent.ToCopyIndex()] = edge.GetRelation().ToString();
                }
                string additionalDepsString = isTree ? "_" : CoNLLUUtils.ToExtraDepsString(enhancedDependencies);
                string word           = token.Word();
                string featuresString = CoNLLUUtils.ToFeatureString(token.Get(typeof(CoreAnnotations.CoNLLUFeats)));
                string pos            = token.GetString <CoreAnnotations.PartOfSpeechAnnotation>("_");
                string upos           = token.GetString <CoreAnnotations.CoarseTagAnnotation>("_");
                string misc           = token.GetString <CoreAnnotations.CoNLLUMisc>("_");
                string lemma          = token.GetString <CoreAnnotations.LemmaAnnotation>("_");
                string relnName       = reln == null ? "_" : reln.ToString();
                /* Root. */
                if (govIdx == null && sg.GetRoots().Contains(token))
                {
                    govIdx               = "0";
                    relnName             = GrammaticalRelation.Root.ToString();
                    additionalDepsString = isTree ? "_" : "0:" + relnName;
                }
                else
                {
                    if (govIdx == null)
                    {
                        govIdx   = "_";
                        relnName = "_";
                    }
                }
                if (unescapeParenthesis)
                {
                    word  = word.ReplaceAll(LrbPattern, "(");
                    word  = word.ReplaceAll(RrbPattern, ")");
                    lemma = lemma.ReplaceAll(LrbPattern, "(");
                    lemma = lemma.ReplaceAll(RrbPattern, ")");
                }
                sb.Append(string.Format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.ToCopyIndex(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
            }
            sb.Append("\n");
            return(sb.ToString());
        }