Beispiel #1
0
        private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1);
            int endIdx   = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1);

            // no copula relation
            if (cop == null)
            {
                return(new IntPair(beginIdx, endIdx));
            }
            // if we have copula relation
            IList <IndexedWord> children = dep.GetChildList(headword);
            int copIdx = children.IndexOf(cop);

            if (copIdx + 1 < children.Count)
            {
                beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1);
            }
            else
            {
                beginIdx = headword.Index() - 1;
            }
            return(new IntPair(beginIdx, endIdx));
        }
Beispiel #2
0
        /// <summary>TODO: figure out how to specify where in the sentence this node goes.</summary>
        /// <remarks>
        /// TODO: figure out how to specify where in the sentence this node goes.
        /// TODO: determine if we should be copying an IndexedWord, or working just with a FeatureLabel.
        /// TODO: bombproof if this gov, dep, and reln already exist.
        /// </remarks>
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord govNode  = sm.GetNode(govNodeName);
            IndexedWord newNode  = new IndexedWord(newNodePrototype);
            int         newIndex = SemanticGraphUtils.LeftMostChildVertice(govNode, sg).Index();

            // cheap En-specific hack for placing copula (beginning of governing phrase)
            newNode.SetDocID(govNode.DocID());
            newNode.SetIndex(newIndex);
            newNode.SetSentIndex(govNode.SentIndex());
            sg.AddVertex(newNode);
            sg.AddEdge(govNode, newNode, relation, weight, false);
        }
 public virtual void TestCreateSemgrexPattern()
 {
     try
     {
         SemanticGraph graph = SemanticGraph.ValueOf("[ate subj>Bill]");
         Func <IndexedWord, string> transformNode = null;
         string pat = SemanticGraphUtils.SemgrexFromGraphOrderedNodes(graph, null, null, transformNode);
         NUnit.Framework.Assert.AreEqual("{word: ate; tag: null; ner: null}=ate  >subj=E1 {word: bill; tag: null; ner: null}=Bill", pat.Trim());
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
Beispiel #4
0
        /// <summary>
        /// Similar to the expandFromPatterns, but performs an exhaustive
        /// search, performing simplifications on the graphs until exhausted.
        /// </summary>
        /// <remarks>
        /// Similar to the expandFromPatterns, but performs an exhaustive
        /// search, performing simplifications on the graphs until exhausted.
        /// TODO: ensure cycles do not occur
        /// NOTE: put in an arbitrary depth limit of 3, to prevent churning way too much (heuristic)
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual ICollection <SemanticGraph> ExhaustFromPatterns(IList <SsurgeonPattern> patternList, SemanticGraph sg)
        {
            ICollection <SemanticGraph> generated = ExhaustFromPatterns(patternList, sg, 1);

            if (generated.Count > 1)
            {
                if (log != null)
                {
                    log.Info("Before remove dupe, size=" + generated.Count);
                }
                generated = SemanticGraphUtils.RemoveDuplicates(generated, sg);
                if (log != null)
                {
                    log.Info("AFTER remove dupe, size=" + generated.Count);
                }
            }
            return(generated);
        }
Beispiel #5
0
        /// <summary>
        /// Given a list of SsurgeonPattern edit scripts, and a SemanticGraph
        /// to operate over, returns a list of expansions of that graph, with
        /// the result of each edit applied against a copy of the graph.
        /// </summary>
        /// <exception cref="System.Exception"/>
        public virtual IList <SemanticGraph> ExpandFromPatterns(IList <SsurgeonPattern> patternList, SemanticGraph sg)
        {
            IList <SemanticGraph> retList = new List <SemanticGraph>();

            foreach (SsurgeonPattern pattern in patternList)
            {
                ICollection <SemanticGraph> generated = pattern.Execute(sg);
                foreach (SemanticGraph orderedGraph in generated)
                {
                    //orderedGraph.vertexList(true);
                    //orderedGraph.edgeList(true);
                    retList.Add(orderedGraph);
                    System.Console.Out.WriteLine("\ncompact = " + orderedGraph.ToCompactString());
                    System.Console.Out.WriteLine("regular=" + orderedGraph);
                }
                if (generated.Count > 0)
                {
                    if (log != null)
                    {
                        log.Info("* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *");
                        log.Info("Pre remove duplicates, num=" + generated.Count);
                    }
                    SemanticGraphUtils.RemoveDuplicates(generated, sg);
                    if (log != null)
                    {
                        log.Info("Expand from patterns");
                        if (logPrefix != null)
                        {
                            log.Info(logPrefix);
                        }
                        log.Info("Pattern = '" + pattern.GetUID() + "' generated " + generated.Count + " matches");
                        log.Info("= = = = = = = = = =\nSrc graph:\n" + sg + "\n= = = = = = = = = =\n");
                        int index = 1;
                        foreach (SemanticGraph genSg in generated)
                        {
                            log.Info("REWRITE " + (index++));
                            log.Info(genSg.ToString());
                            log.Info(". . . . .\n");
                        }
                    }
                }
            }
            return(retList);
        }
Beispiel #6
0
        /// <summary>
        /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?)
        /// e.g., you are the person -&gt; return "the person"
        /// </summary>
        private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            int headwordIdx = headword.Index() - 1;
            IList <IndexedWord> children = dep.GetChildList(headword);
            //    if(children.size()==0) return new IntPair(headwordIdx, headwordIdx);    // the headword is the only word
            // check if we have copula relation
            IndexedWord cop      = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            int         startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1;
            // children which will be inside of NP
            IList <IndexedWord> insideNP = Generics.NewArrayList();

            for (int i = startIdx; i < children.Count; i++)
            {
                IndexedWord       child = children[i];
                SemanticGraphEdge edge  = dep.GetEdge(headword, child);
                if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct"))
                {
                    continue;
                }
                else
                {
                    // skip
                    insideNP.Add(child);
                }
            }
            if (insideNP.Count == 0)
            {
                return(new IntPair(headwordIdx, headwordIdx));
            }
            // the headword is the only word
            Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep);
            Pair <IndexedWord, IndexedWord> lastChildLeftRight  = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1);
            int endIdx   = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1);

            return(new IntPair(beginIdx, endIdx));
        }
Beispiel #7
0
        public virtual string PrintSemanticGraph(SemanticGraph sg, bool unescapeParenthesis)
        {
            bool          isTree = SemanticGraphUtils.IsTree(sg);
            StringBuilder sb     = new StringBuilder();

            /* Print comments. */
            foreach (string comment in sg.GetComments())
            {
                sb.Append(comment).Append("\n");
            }
            foreach (IndexedWord token in sg.VertexListSorted())
            {
                /* Check for multiword tokens. */
                if (token.ContainsKey(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation)))
                {
                    IntPair tokenSpan = token.Get(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation));
                    if (tokenSpan.GetSource() == token.Index())
                    {
                        string range = string.Format("%d-%d", tokenSpan.GetSource(), tokenSpan.GetTarget());
                        sb.Append(string.Format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.OriginalText()));
                    }
                }
                /* Try to find main governor and additional dependencies. */
                string govIdx = null;
                GrammaticalRelation         reln = null;
                Dictionary <string, string> enhancedDependencies = new Dictionary <string, string>();
                foreach (IndexedWord parent in sg.GetParents(token))
                {
                    SemanticGraphEdge edge = sg.GetEdge(parent, token);
                    if (govIdx == null && !edge.IsExtra())
                    {
                        govIdx = parent.ToCopyIndex();
                        reln   = edge.GetRelation();
                    }
                    enhancedDependencies[parent.ToCopyIndex()] = edge.GetRelation().ToString();
                }
                string additionalDepsString = isTree ? "_" : CoNLLUUtils.ToExtraDepsString(enhancedDependencies);
                string word           = token.Word();
                string featuresString = CoNLLUUtils.ToFeatureString(token.Get(typeof(CoreAnnotations.CoNLLUFeats)));
                string pos            = token.GetString <CoreAnnotations.PartOfSpeechAnnotation>("_");
                string upos           = token.GetString <CoreAnnotations.CoarseTagAnnotation>("_");
                string misc           = token.GetString <CoreAnnotations.CoNLLUMisc>("_");
                string lemma          = token.GetString <CoreAnnotations.LemmaAnnotation>("_");
                string relnName       = reln == null ? "_" : reln.ToString();
                /* Root. */
                if (govIdx == null && sg.GetRoots().Contains(token))
                {
                    govIdx               = "0";
                    relnName             = GrammaticalRelation.Root.ToString();
                    additionalDepsString = isTree ? "_" : "0:" + relnName;
                }
                else
                {
                    if (govIdx == null)
                    {
                        govIdx   = "_";
                        relnName = "_";
                    }
                }
                if (unescapeParenthesis)
                {
                    word  = word.ReplaceAll(LrbPattern, "(");
                    word  = word.ReplaceAll(RrbPattern, ")");
                    lemma = lemma.ReplaceAll(LrbPattern, "(");
                    lemma = lemma.ReplaceAll(RrbPattern, ")");
                }
                sb.Append(string.Format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.ToCopyIndex(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
            }
            sb.Append("\n");
            return(sb.ToString());
        }
Beispiel #8
0
        private void ExtractMentionForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            // pronoun
            if (headword.Tag().StartsWith("PRP"))
            {
                ExtractPronounForHeadword(headword, dep, s, mentions, mentionSpanSet, namedEntitySpanSet);
                return;
            }
            // add NP mention
            IntPair npSpan   = GetNPSpan(headword, dep, sent);
            int     beginIdx = npSpan.Get(0);
            int     endIdx   = npSpan.Get(1) + 1;

            if (",".Equals(sent[endIdx - 1].Word()))
            {
                endIdx--;
            }
            // try not to have span that ends with ,
            if ("IN".Equals(sent[beginIdx].Tag()))
            {
                beginIdx++;
            }
            // try to remove first IN.
            AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            //
            // extract the first element in conjunction (A and B -> extract A here "A and B", "B" will be extracted above)
            //
            // to make sure we find the first conjunction
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IndexedWord conjChild = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);
                foreach (IndexedWord c in conjChildren)
                {
                    if (c.Index() < conjChild.Index())
                    {
                        conjChild = c;
                    }
                }
                IndexedWord left = SemanticGraphUtils.LeftMostChildVertice(conjChild, dep);
                for (int endIdxFirstElement = left.Index() - 1; endIdxFirstElement > beginIdx; endIdxFirstElement--)
                {
                    if (!sent[endIdxFirstElement - 1].Tag().Matches("CC|,"))
                    {
                        if (headword.Index() - 1 < endIdxFirstElement)
                        {
                            AddMention(beginIdx, endIdxFirstElement, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
                        }
                        break;
                    }
                }
            }
        }