コード例 #1
0
        // just static main
        public static void Main(string[] args)
        {
            string treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
            // Typically the tree is constructed by parsing or reading a
            // treebank.  This is just for example purposes
            Tree tree = Tree.ValueOf(treeString);
            // This creates English uncollapsed dependencies as a
            // SemanticGraph.  If you are creating many SemanticGraphs, you
            // should use a GrammaticalStructureFactory and use it to generate
            // the intermediate GrammaticalStructure instead
            SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree);
            // Alternatively, this could have been the Chinese params or any
            // other language supported.  As of 2014, only English and Chinese
            ITreebankLangParserParams    @params = new EnglishTreebankParserParams();
            IGrammaticalStructureFactory gsf     = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder());
            GrammaticalStructure         gs      = gsf.NewGrammaticalStructure(tree);

            log.Info(graph);
            SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B");
            SemgrexMatcher matcher = semgrex.Matcher(graph);

            // This will produce two results on the given tree: "likes" is an
            // ancestor of both "dog" and "my" via the nsubj relation
            while (matcher.Find())
            {
                log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B"));
            }
        }
コード例 #2
0
        protected internal static void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph dependency   = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
コード例 #3
0
ファイル: Mediator.cs プロジェクト: hanifhn/NLIC
        public XmlDocument Mediate(SemanticGraph semanticGraph)
        {
            var intermediateContract = new XmlDocument();
            var xmlRootNode          = semanticGraph.XmlNode(intermediateContract);

            foreach (var semanticCluster in semanticGraph.SemanticClusters)
            {
                if (semanticCluster.HasDifferentParent())
                {
                    semanticCluster.SetRealParent(semanticGraph, intermediateContract);
                }
                else
                {
                    var newChildNode = semanticCluster.XmlNode(semanticGraph, intermediateContract);
                    if (newChildNode != null)
                    {
                        xmlRootNode.AppendChild(newChildNode);
                    }
                }
            }

            intermediateContract.AppendChild(xmlRootNode);

            return(intermediateContract);
        }
コード例 #4
0
        public virtual void TestPartition()
        {
            SemanticGraph graph = MakeComplicatedGraph();

            RunTest("{}=a >> {word:E}", graph, "A", "B", "C", "D");
            RunTest("{}=a >> {word:E} : {}=a >> {word:B}", graph, "A");
        }
コード例 #5
0
        public static void OutputResults(SemgrexPattern pattern, SemanticGraph graph, params string[] ignored)
        {
            System.Console.Out.WriteLine("Matching pattern " + pattern + " to\n" + graph + "  :" + (pattern.Matcher(graph).Matches() ? "matches" : "doesn't match"));
            System.Console.Out.WriteLine();
            pattern.PrettyPrint();
            System.Console.Out.WriteLine();
            SemgrexMatcher matcher = pattern.Matcher(graph);

            while (matcher.Find())
            {
                System.Console.Out.WriteLine("  " + matcher.GetMatch());
                ICollection <string> nodeNames = matcher.GetNodeNames();
                if (nodeNames != null && nodeNames.Count > 0)
                {
                    foreach (string name in nodeNames)
                    {
                        System.Console.Out.WriteLine("    " + name + ": " + matcher.GetNode(name));
                    }
                }
                ICollection <string> relNames = matcher.GetRelationNames();
                if (relNames != null)
                {
                    foreach (string name in relNames)
                    {
                        System.Console.Out.WriteLine("    " + name + ": " + matcher.GetRelnString(name));
                    }
                }
            }
        }
コード例 #6
0
        /// <summary>Parse a JSON formatted tree into a SemanticGraph.</summary>
        /// <param name="jsonString">
        /// The JSON string tree to parse, e.g:
        /// "[{\"\"dependent\"\": 7, \"\"dep\"\": \"\"root\"\", \"\"governorgloss\"\": \"\"root\"\", \"\"governor\"\": 0, \"\"dependentgloss\"\": \"\"sport\"\"}, {\"\"dependent\"\": 1, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 2, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"is\"\"}, {\"\"dependent\"\": 3, \"\"dep\"\": \"\"neg\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"not\"\"}, {\"\"dependent\"\": 4, \"\"dep\"\": \"\"det\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"a\"\"}, {\"\"dependent\"\": 5, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"physical\"\", \"\"governor\"\": 6, \"\"dependentgloss\"\": \"\"predominantly\"\"}, {\"\"dependent\"\": 6, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"physical\"\"}, {\"\"dependent\"\": 9, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"yet\"\"}, {\"\"dependent\"\": 10, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"neither\"\"}, {\"\"dependent\"\": 11, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"are\"\"}, {\"\"dependent\"\": 12, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"shooting\"\"}, {\"\"dependent\"\": 13, \"\"dep\"\": \"\"cc\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"and\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"conj:and\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 16, \"\"dep\"\": \"\"nsubjpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"which\"\"}, {\"\"dependent\"\": 18, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"fact\"\", \"\"governor\"\": 19, \"\"dependentgloss\"\": \"\"in\"\"}, {\"\"dependent\"\": 19, \"\"dep\"\": \"\"nmod:in\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"fact\"\"}, {\"\"dependent\"\": 21, \"\"dep\"\": \"\"aux\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"has\"\"}, {\"\"dependent\"\": 22, \"\"dep\"\": \"\"auxpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"been\"\"}, {\"\"dependent\"\": 23, \"\"dep\"\": \"\"dep\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"nicknamed\"\"}, {\"\"dependent\"\": 25, \"\"dep\"\": \"\"dobj\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 26, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"ice\"\", \"\"governor\"\": 27, \"\"dependentgloss\"\": \"\"on\"\"}, {\"\"dependent\"\": 27, \"\"dep\"\": \"\"nmod:on\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"ice\"\"}, {\"\"dependent\"\": 29, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"5\"\"}]");
        /// </param>
        /// <param name="tokens">The tokens of the sentence, to form the backing labels of the tree.</param>
        /// <returns>A semantic graph of the sentence, according to the given tree.</returns>
        public static SemanticGraph ParseJsonTree(string jsonString, IList <CoreLabel> tokens)
        {
            // Escape quoted string parts
            IJsonReader   json  = Javax.Json.Json.CreateReader(new StringReader(jsonString));
            SemanticGraph tree  = new SemanticGraph();
            IJsonArray    array = json.ReadArray();

            if (array == null || array.IsEmpty())
            {
                return(tree);
            }
            IndexedWord[] vertices = new IndexedWord[tokens.Count + 2];
            // Add edges
            for (int i = 0; i < array.Count; i++)
            {
                IJsonObject entry = array.GetJsonObject(i);
                // Parse row
                int dependentIndex = entry.GetInt("dependent");
                if (vertices[dependentIndex] == null)
                {
                    if (dependentIndex > tokens.Count)
                    {
                        // Bizarre mismatch in sizes; the malt parser seems to do this often
                        return(new SemanticGraph());
                    }
                    vertices[dependentIndex] = new IndexedWord(tokens[dependentIndex - 1]);
                }
                IndexedWord dependent     = vertices[dependentIndex];
                int         governorIndex = entry.GetInt("governor");
                if (governorIndex > tokens.Count)
                {
                    // Bizarre mismatch in sizes; the malt parser seems to do this often
                    return(new SemanticGraph());
                }
                if (vertices[governorIndex] == null && governorIndex > 0)
                {
                    vertices[governorIndex] = new IndexedWord(tokens[governorIndex - 1]);
                }
                IndexedWord governor = vertices[governorIndex];
                string      relation = entry.GetString("dep");
                // Process row
                if (governorIndex == 0)
                {
                    tree.AddRoot(dependent);
                }
                else
                {
                    tree.AddVertex(dependent);
                    if (!tree.ContainsVertex(governor))
                    {
                        tree.AddVertex(governor);
                    }
                    if (!"ref".Equals(relation))
                    {
                        tree.AddEdge(governor, dependent, GrammaticalRelation.ValueOf(Language.English, relation), double.NegativeInfinity, false);
                    }
                }
            }
            return(tree);
        }
コード例 #7
0
        // TODO: implement referencing regexes
        public static SemanticGraph MakeComplicatedGraph()
        {
            SemanticGraph graph = new SemanticGraph();

            string[]      words = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J" };
            IndexedWord[] nodes = new IndexedWord[words.Length];
            for (int i = 0; i < words.Length; ++i)
            {
                IndexedWord word = new IndexedWord("test", 1, i + 1);
                word.SetWord(words[i]);
                word.SetValue(words[i]);
                nodes[i] = word;
                graph.AddVertex(word);
            }
            graph.SetRoot(nodes[0]);
            // this graph isn't supposed to make sense
            graph.AddEdge(nodes[0], nodes[1], EnglishGrammaticalRelations.Modifier, 1.0, false);
            graph.AddEdge(nodes[0], nodes[2], EnglishGrammaticalRelations.DirectObject, 1.0, false);
            graph.AddEdge(nodes[0], nodes[3], EnglishGrammaticalRelations.IndirectObject, 1.0, false);
            graph.AddEdge(nodes[1], nodes[4], EnglishGrammaticalRelations.Marker, 1.0, false);
            graph.AddEdge(nodes[2], nodes[4], EnglishGrammaticalRelations.Expletive, 1.0, false);
            graph.AddEdge(nodes[3], nodes[4], EnglishGrammaticalRelations.AdjectivalComplement, 1.0, false);
            graph.AddEdge(nodes[4], nodes[5], EnglishGrammaticalRelations.AdjectivalModifier, 1.0, false);
            graph.AddEdge(nodes[4], nodes[6], EnglishGrammaticalRelations.AdverbialModifier, 1.0, false);
            graph.AddEdge(nodes[4], nodes[8], EnglishGrammaticalRelations.Modifier, 1.0, false);
            graph.AddEdge(nodes[5], nodes[7], EnglishGrammaticalRelations.PossessionModifier, 1.0, false);
            graph.AddEdge(nodes[6], nodes[7], EnglishGrammaticalRelations.PossessiveModifier, 1.0, false);
            graph.AddEdge(nodes[7], nodes[8], EnglishGrammaticalRelations.Agent, 1.0, false);
            graph.AddEdge(nodes[8], nodes[9], EnglishGrammaticalRelations.Determiner, 1.0, false);
            return(graph);
        }
コード例 #8
0
 /// <summary>Determine if a tree is cyclic.</summary>
 /// <param name="tree">The tree to check.</param>
 /// <returns>True if the tree has at least once cycle in it.</returns>
 public static bool IsCyclic(SemanticGraph tree)
 {
     foreach (IndexedWord vertex in tree.VertexSet())
     {
         if (tree.GetRoots().Contains(vertex))
         {
             continue;
         }
         IndexedWord node = tree.IncomingEdgeIterator(vertex).Current.GetGovernor();
         ICollection <IndexedWord> seen = new HashSet <IndexedWord>();
         seen.Add(vertex);
         while (node != null)
         {
             if (seen.Contains(node))
             {
                 return(true);
             }
             seen.Add(node);
             if (tree.IncomingEdgeIterator(node).MoveNext())
             {
                 node = tree.IncomingEdgeIterator(node).Current.GetGovernor();
             }
             else
             {
                 node = null;
             }
         }
     }
     return(false);
 }
コード例 #9
0
        internal static IDictionary <int, ICollection <DepPattern> > GetPatternsForAllPhrases(DataInstance sent, ICollection <CandidatePhrase> commonWords)
        {
            SemanticGraph graph = ((DataInstanceDep)sent).GetGraph();
            IDictionary <int, ICollection <DepPattern> > pats4Sent = new Dictionary <int, ICollection <DepPattern> >();

            if (graph == null || graph.IsEmpty())
            {
                System.Console.Out.WriteLine("graph is empty or null!");
                return(null);
            }
            ICollection <IndexedWord> allNodes;

            try
            {
                allNodes = graph.GetLeafVertices();
            }
            catch (ArgumentException)
            {
                return(null);
            }
            foreach (IndexedWord w in allNodes)
            {
                //because index starts at 1!!!!
                pats4Sent[w.Index() - 1] = GetContext(w, graph, commonWords, sent);
            }
            return(pats4Sent);
        }
コード例 #10
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord newNode = AddDep.FromCheapString(nodeString);

            sg.AddVertex(newNode);
            AddNamedNode(newNode, nodeName);
        }
コード例 #11
0
        /// <summary>Strip away case edges, if the incoming edge is a preposition.</summary>
        /// <remarks>
        /// Strip away case edges, if the incoming edge is a preposition.
        /// This replicates the behavior of the old Stanford dependencies on universal dependencies.
        /// </remarks>
        /// <param name="tree">The tree to modify in place.</param>
        public static void StripPrepCases(SemanticGraph tree)
        {
            // Find incoming case edges that have an 'nmod' incoming edge
            IList <SemanticGraphEdge> toClean = new List <SemanticGraphEdge>();

            foreach (SemanticGraphEdge edge in tree.EdgeIterable())
            {
                if ("case".Equals(edge.GetRelation().ToString()))
                {
                    bool isPrepTarget = false;
                    foreach (SemanticGraphEdge incoming in tree.IncomingEdgeIterable(edge.GetGovernor()))
                    {
                        if ("nmod".Equals(incoming.GetRelation().GetShortName()))
                        {
                            isPrepTarget = true;
                            break;
                        }
                    }
                    if (isPrepTarget && !tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext())
                    {
                        toClean.Add(edge);
                    }
                }
            }
            // Delete these edges
            foreach (SemanticGraphEdge edge_1 in toClean)
            {
                tree.RemoveEdge(edge_1);
                tree.RemoveVertex(edge_1.GetDependent());
                System.Diagnostics.Debug.Assert(IsTree(tree));
            }
        }
        //public ExtractPhraseFromPattern(Namespace curNS) {
        //  this.curNS = curNS;
        //}
        private bool CheckIfSatisfiedMaxDepth(SemanticGraph g, IndexedWord parent, IndexedWord child, IntPair depths)
        {
            if (depths.Get(0) == int.MaxValue)
            {
                return(true);
            }
            if (parent.Equals(child))
            {
                return(true);
            }
            bool foundInMaxDepth = false;

            foreach (IndexedWord c in g.GetChildren(parent))
            {
                if (c.Equals(child))
                {
                    return(true);
                }
            }
            depths.Set(1, depths.Get(1) + 1);
            if (depths.Get(1) >= depths.Get(0))
            {
                return(false);
            }
            foreach (IndexedWord c_1 in g.GetChildren(parent))
            {
                foundInMaxDepth = CheckIfSatisfiedMaxDepth(g, c_1, child, depths);
                if (foundInMaxDepth == true)
                {
                    return(foundInMaxDepth);
                }
            }
            return(false);
        }
 // get descendants that have this relation
 private void DescendantsWithReln(SemanticGraph g, IndexedWord w, string relation, IList <IndexedWord> seenNodes, IList <IndexedWord> descendantSet)
 {
     if (seenNodes.Contains(w))
     {
         return;
     }
     seenNodes.Add(w);
     if (descendantSet.Contains(w))
     {
         return;
     }
     if (ignoreCommonTags && ignoreTags.Contains(w.Tag().Trim()))
     {
         return;
     }
     foreach (IndexedWord child in g.GetChildren(w))
     {
         foreach (SemanticGraphEdge edge in g.GetAllEdges(w, child))
         {
             if (edge.GetRelation().ToString().Equals(relation))
             {
                 descendantSet.Add(child);
             }
         }
         DescendantsWithReln(g, child, relation, seenNodes, descendantSet);
     }
 }
コード例 #14
0
        private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1);
            int endIdx   = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1);

            // no copula relation
            if (cop == null)
            {
                return(new IntPair(beginIdx, endIdx));
            }
            // if we have copula relation
            IList <IndexedWord> children = dep.GetChildList(headword);
            int copIdx = children.IndexOf(cop);

            if (copIdx + 1 < children.Count)
            {
                beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1);
            }
            else
            {
                beginIdx = headword.Index() - 1;
            }
            return(new IntPair(beginIdx, endIdx));
        }
コード例 #15
0
        /// <summary>
        /// Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
        /// pattern.
        /// </summary>
        /// <remarks>
        /// Executes the Ssurgeon edit, but with the given Semgrex Pattern, instead of the one attached to this
        /// pattern.
        /// NOTE: Predicate tests are still active here, and any named nodes required for evaluation must be
        /// present.
        /// </remarks>
        /// <exception cref="System.Exception"/>
        public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg, SemgrexPattern overridePattern)
        {
            SemgrexMatcher matcher = overridePattern.Matcher(sg);
            ICollection <SemanticGraph> generated = new List <SemanticGraph>();

            while (matcher.Find())
            {
                if (predicateTest != null)
                {
                    if (!predicateTest.Test(matcher))
                    {
                        continue;
                    }
                }
                // We reset the named node map with each edit set, since these edits
                // should exist in a separate graph for each unique Semgrex match.
                nodeMap = Generics.NewHashMap();
                SemanticGraph tgt = new SemanticGraph(sg);
                foreach (SsurgeonEdit edit in editScript)
                {
                    edit.Evaluate(tgt, matcher);
                }
                generated.Add(tgt);
            }
            return(generated);
        }
コード例 #16
0
        protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence)
        {
            SemanticGraph sg = sentence.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            Tree          t  = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            featureAnnotator.AddFeatures(sg, t, false, true);
        }
コード例 #17
0
        /// <summary>Generate the training features from the CoNLL input file.</summary>
        /// <returns>Dataset of feature vectors</returns>
        /// <exception cref="System.Exception"/>
        private static GeneralDataset <string, string> GenerateFeatureVectors(Properties props)
        {
            GeneralDataset <string, string> dataset = new Dataset <string, string>();
            Dictionaries  dict     = new Dictionaries(props);
            DocumentMaker docMaker = new DocumentMaker(props, dict);
            Document      document;

            while ((document = docMaker.NextDoc()) != null)
            {
                SetTokenIndices(document);
                IDictionary <int, CorefCluster> entities = document.goldCorefClusters;
                // Generate features for coreferent mentions with class label 1
                foreach (CorefCluster entity in entities.Values)
                {
                    foreach (Mention mention in entity.GetCorefMentions())
                    {
                        // Ignore verbal mentions
                        if (mention.headWord.Tag().StartsWith("V"))
                        {
                            continue;
                        }
                        IndexedWord head = mention.enhancedDependency.GetNodeByIndexSafe(mention.headWord.Index());
                        if (head == null)
                        {
                            continue;
                        }
                        List <string> feats = mention.GetSingletonFeatures(dict);
                        dataset.Add(new BasicDatum <string, string>(feats, "1"));
                    }
                }
                // Generate features for singletons with class label 0
                List <CoreLabel> gold_heads = new List <CoreLabel>();
                foreach (Mention gold_men in document.goldMentionsByID.Values)
                {
                    gold_heads.Add(gold_men.headWord);
                }
                foreach (Mention predicted_men in document.predictedMentionsByID.Values)
                {
                    SemanticGraph dep  = predicted_men.enhancedDependency;
                    IndexedWord   head = dep.GetNodeByIndexSafe(predicted_men.headWord.Index());
                    if (head == null || !dep.VertexSet().Contains(head))
                    {
                        continue;
                    }
                    // Ignore verbal mentions
                    if (predicted_men.headWord.Tag().StartsWith("V"))
                    {
                        continue;
                    }
                    // If the mention is in the gold set, it is not a singleton and thus ignore
                    if (gold_heads.Contains(predicted_men.headWord))
                    {
                        continue;
                    }
                    dataset.Add(new BasicDatum <string, string>(predicted_men.GetSingletonFeatures(dict), "0"));
                }
            }
            dataset.SummaryStatistics();
            return(dataset);
        }
コード例 #18
0
        private void FinishSentence(ICoreMap sentence, IList <Tree> trees)
        {
            if (treeMap != null)
            {
                IList <Tree> mappedTrees = Generics.NewLinkedList();
                foreach (Tree tree in trees)
                {
                    Tree mappedTree = treeMap.Apply(tree);
                    mappedTrees.Add(mappedTree);
                }
                trees = mappedTrees;
            }
            ParserAnnotatorUtils.FillInParseAnnotations(Verbose, BuildGraphs, gsf, sentence, trees, extraDependencies);
            if (saveBinaryTrees)
            {
                TreeBinarizer binarizer = TreeBinarizer.SimpleTreeBinarizer(parser.GetTLPParams().HeadFinder(), parser.TreebankLanguagePack());
                Tree          binarized = binarizer.TransformTree(trees[0]);
                Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(binarized);
                sentence.Set(typeof(TreeCoreAnnotations.BinarizedTreeAnnotation), binarized);
            }
            // for some reason in some corner cases nodes aren't having sentenceIndex set
            // do a pass and make sure all nodes have sentenceIndex set
            SemanticGraph sg = sentence.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation));

            if (sg != null)
            {
                foreach (IndexedWord iw in sg.VertexSet())
                {
                    if (iw.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)) == null && sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)) != null)
                    {
                        iw.SetSentIndex(sentence.Get(typeof(CoreAnnotations.SentenceIndexAnnotation)));
                    }
                }
            }
        }
コード例 #19
0
        public virtual void ExtractNPorPRP(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            tree.IndexLeaves();
            SemanticGraph basicDependency    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhancedDependency == null)
            {
                enhancedDependency = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            TregexPattern tgrepPattern = npOrPrpMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);

            while (matcher.Find())
            {
                Tree         t        = matcher.GetMatch();
                IList <Tree> mLeaves  = t.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                //if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with ,
                IntPair mSpan = new IntPair(beginIdx, endIdx);
                if (!mentionSpanSet.Contains(mSpan) && (lang == Locale.Chinese || !InsideNE(mSpan, namedEntitySpanSet)))
                {
                    //      if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet) || t.value().startsWith("PRP")) ) {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basicDependency, enhancedDependency, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)), t);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
コード例 #20
0
        private void FormatSGNodeOnelineHelper(SemanticGraph sg, IndexedWord node, StringBuilder sb, ICollection <IndexedWord> usedOneline)
        {
            usedOneline.Add(node);
            bool isntLeaf = (sg.OutDegree(node) > 0);

            if (isntLeaf)
            {
                sb.Append(Lparen);
            }
            sb.Append(FormatLabel(node));
            foreach (SemanticGraphEdge depcy in sg.GetOutEdgesSorted(node))
            {
                IndexedWord dep = depcy.GetDependent();
                sb.Append(Space);
                if (showRelns)
                {
                    sb.Append(depcy.GetRelation());
                    sb.Append(Colon);
                }
                if (!usedOneline.Contains(dep) && !used.Contains(dep))
                {
                    // avoid infinite loop
                    FormatSGNodeOnelineHelper(sg, dep, sb, usedOneline);
                }
                else
                {
                    sb.Append(FormatLabel(dep));
                }
            }
            if (isntLeaf)
            {
                sb.Append(Rparen);
            }
        }
コード例 #21
0
        public virtual void TestComplicatedGraph()
        {
            SemanticGraph graph = MakeComplicatedGraph();

            RunTest("{} < {word:A}", graph, "B", "C", "D");
            RunTest("{} > {word:E}", graph, "B", "C", "D");
            RunTest("{} > {word:J}", graph, "I");
            RunTest("{} < {word:E}", graph, "F", "G", "I");
            RunTest("{} < {word:I}", graph, "J");
            RunTest("{} << {word:A}", graph, "B", "C", "D", "E", "F", "G", "H", "I", "J");
            RunTest("{} << {word:B}", graph, "E", "F", "G", "H", "I", "J");
            RunTest("{} << {word:C}", graph, "E", "F", "G", "H", "I", "J");
            RunTest("{} << {word:D}", graph, "E", "F", "G", "H", "I", "J");
            RunTest("{} << {word:E}", graph, "F", "G", "H", "I", "J");
            RunTest("{} << {word:F}", graph, "H", "I", "J");
            RunTest("{} << {word:G}", graph, "H", "I", "J");
            RunTest("{} << {word:H}", graph, "I", "J");
            RunTest("{} << {word:I}", graph, "J");
            RunTest("{} << {word:J}", graph);
            RunTest("{} << {word:K}", graph);
            RunTest("{} >> {word:A}", graph);
            RunTest("{} >> {word:B}", graph, "A");
            RunTest("{} >> {word:C}", graph, "A");
            RunTest("{} >> {word:D}", graph, "A");
            RunTest("{} >> {word:E}", graph, "A", "B", "C", "D");
            RunTest("{} >> {word:F}", graph, "A", "B", "C", "D", "E");
            RunTest("{} >> {word:G}", graph, "A", "B", "C", "D", "E");
            RunTest("{} >> {word:H}", graph, "A", "B", "C", "D", "E", "F", "G");
            RunTest("{} >> {word:I}", graph, "A", "B", "C", "D", "E", "F", "G", "H");
            RunTest("{} >> {word:J}", graph, "A", "B", "C", "D", "E", "F", "G", "H", "I");
            RunTest("{} >> {word:K}", graph);
        }
コード例 #22
0
        // public method --------------------------------------------------------------
        /// <summary>
        /// Returns a pretty-printed string representation of the given semantic graph,
        /// on one or more lines.
        /// </summary>
        public virtual string FormatSemanticGraph(SemanticGraph sg)
        {
            if (sg.VertexSet().IsEmpty())
            {
                return("[]");
            }
            @out = new StringBuilder();
            // not thread-safe!!!
            used = Generics.NewHashSet();
            if (sg.GetRoots().Count == 1)
            {
                FormatSGNode(sg, sg.GetFirstRoot(), 1);
            }
            else
            {
                int index = 0;
                foreach (IndexedWord root in sg.GetRoots())
                {
                    index += 1;
                    @out.Append("root_").Append(index).Append("> ");
                    FormatSGNode(sg, root, 9);
                    @out.Append("\n");
                }
            }
            string result = @out.ToString();

            if (!result.StartsWith("["))
            {
                result = "[" + result + "]";
            }
            return(result);
        }
コード例 #23
0
        public virtual void TestNamedRelation()
        {
            SemanticGraph  graph   = SemanticGraph.ValueOf("[ate subj>Bill dobj>[muffins compound>blueberry]]");
            SemgrexPattern pattern = SemgrexPattern.Compile("{idx:0}=gov >>=foo {idx:3}=dep");
            SemgrexMatcher matcher = pattern.Matcher(graph);

            NUnit.Framework.Assert.IsTrue(matcher.Find());
            NUnit.Framework.Assert.AreEqual("ate", matcher.GetNode("gov").ToString());
            NUnit.Framework.Assert.AreEqual("blueberry", matcher.GetNode("dep").ToString());
            NUnit.Framework.Assert.AreEqual("compound", matcher.GetRelnString("foo"));
            NUnit.Framework.Assert.IsFalse(matcher.Find());
            pattern = SemgrexPattern.Compile("{idx:3}=dep <<=foo {idx:0}=gov");
            matcher = pattern.Matcher(graph);
            NUnit.Framework.Assert.IsTrue(matcher.Find());
            NUnit.Framework.Assert.AreEqual("ate", matcher.GetNode("gov").ToString());
            NUnit.Framework.Assert.AreEqual("blueberry", matcher.GetNode("dep").ToString());
            NUnit.Framework.Assert.AreEqual("dobj", matcher.GetRelnString("foo"));
            NUnit.Framework.Assert.IsFalse(matcher.Find());
            pattern = SemgrexPattern.Compile("{idx:3}=dep <=foo {idx:2}=gov");
            matcher = pattern.Matcher(graph);
            NUnit.Framework.Assert.IsTrue(matcher.Find());
            NUnit.Framework.Assert.AreEqual("muffins", matcher.GetNode("gov").ToString());
            NUnit.Framework.Assert.AreEqual("blueberry", matcher.GetNode("dep").ToString());
            NUnit.Framework.Assert.AreEqual("compound", matcher.GetRelnString("foo"));
            NUnit.Framework.Assert.IsFalse(matcher.Find());
            pattern = SemgrexPattern.Compile("{idx:2}=gov >=foo {idx:3}=dep");
            matcher = pattern.Matcher(graph);
            NUnit.Framework.Assert.IsTrue(matcher.Find());
            NUnit.Framework.Assert.AreEqual("muffins", matcher.GetNode("gov").ToString());
            NUnit.Framework.Assert.AreEqual("blueberry", matcher.GetNode("dep").ToString());
            NUnit.Framework.Assert.AreEqual("compound", matcher.GetRelnString("foo"));
            NUnit.Framework.Assert.IsFalse(matcher.Find());
        }
コード例 #24
0
        /// <summary>Determine the case of the pronoun "you" or "it".</summary>
        private static string PronounCase(SemanticGraph sg, IndexedWord word)
        {
            word = sg.GetNodeByIndex(word.Index());
            IndexedWord parent = sg.GetParent(word);

            if (parent != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(parent, word);
                if (edge != null)
                {
                    if (UniversalEnglishGrammaticalRelations.Object.IsAncestor(edge.GetRelation()))
                    {
                        /* "you" is an object. */
                        return("Acc");
                    }
                    else
                    {
                        if (UniversalEnglishGrammaticalRelations.NominalModifier.IsAncestor(edge.GetRelation()) || edge.GetRelation() == GrammaticalRelation.Root)
                        {
                            if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.CaseMarker))
                            {
                                /* "you" is the head of a prepositional phrase. */
                                return("Acc");
                            }
                        }
                    }
                }
            }
            return("Nom");
        }
コード例 #25
0
        public static void RunTest(SemgrexPattern pattern, SemanticGraph graph, params string[] expectedMatches)
        {
            // results are not in the order I would expect.  Using a counter
            // allows them to be in any order
            IntCounter <string> counts = new IntCounter <string>();

            for (int i = 0; i < expectedMatches.Length; ++i)
            {
                counts.IncrementCount(expectedMatches[i]);
            }
            IntCounter <string> originalCounts = new IntCounter <string>(counts);
            SemgrexMatcher      matcher        = pattern.Matcher(graph);

            for (int i_1 = 0; i_1 < expectedMatches.Length; ++i_1)
            {
                if (!matcher.Find())
                {
                    throw new AssertionFailedError("Expected " + expectedMatches.Length + " matches for pattern " + pattern + " on " + graph + ", only got " + i_1);
                }
                string match = matcher.GetMatch().ToString();
                if (!counts.ContainsKey(match))
                {
                    throw new AssertionFailedError("Unexpected match " + match + " for pattern " + pattern + " on " + graph);
                }
                counts.DecrementCount(match);
                if (counts.GetCount(match) < 0)
                {
                    throw new AssertionFailedError("Found too many matches for " + match + " for pattern " + pattern + " on " + graph);
                }
            }
            if (matcher.FindNextMatchingNode())
            {
                throw new AssertionFailedError("Found more than " + expectedMatches.Length + " matches for pattern " + pattern + " on " + graph + "... extra match is " + matcher.GetMatch());
            }
        }
コード例 #26
0
        /// <summary>Extracts features from relative and interrogative pronouns.</summary>
        private static Dictionary <string, string> GetRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word)
        {
            Dictionary <string, string> features = new Dictionary <string, string>();

            if (word.Tag().StartsWith("W"))
            {
                bool        isRel  = false;
                IndexedWord parent = sg.GetParent(word);
                if (parent != null)
                {
                    IndexedWord parentParent = sg.GetParent(parent);
                    if (parentParent != null)
                    {
                        SemanticGraphEdge edge = sg.GetEdge(parentParent, parent);
                        isRel = edge.GetRelation().Equals(UniversalEnglishGrammaticalRelations.RelativeClauseModifier);
                    }
                }
                if (isRel)
                {
                    features["PronType"] = "Rel";
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "that"))
                    {
                        features["PronType"] = "Dem";
                    }
                    else
                    {
                        features["PronType"] = "Int";
                    }
                }
            }
            return(features);
        }
コード例 #27
0
        protected internal static void ExtractEnumerations(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            Tree          tree         = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
            SemanticGraph dependency   = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
            TregexPattern tgrepPattern = enumerationsMentionPattern;
            TregexMatcher matcher      = tgrepPattern.Matcher(tree);
            IDictionary <IntPair, Tree> spanToMentionSubTree = Generics.NewHashMap();

            while (matcher.Find())
            {
                matcher.GetMatch();
                Tree         m1       = matcher.GetNode("m1");
                Tree         m2       = matcher.GetNode("m2");
                IList <Tree> mLeaves  = m1.GetLeaves();
                int          beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                int          endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m1;
                mLeaves  = m2.GetLeaves();
                beginIdx = ((CoreLabel)mLeaves[0].Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                endIdx   = ((CoreLabel)mLeaves[mLeaves.Count - 1].Label()).Get(typeof(CoreAnnotations.IndexAnnotation));
                spanToMentionSubTree[new IntPair(beginIdx, endIdx)] = m2;
            }
            foreach (IntPair mSpan in spanToMentionSubTree.Keys)
            {
                if (!mentionSpanSet.Contains(mSpan) && !InsideNE(mSpan, namedEntitySpanSet))
                {
                    int     dummyMentionId = -1;
                    Mention m = new Mention(dummyMentionId, mSpan.Get(0), mSpan.Get(1), dependency, new List <CoreLabel>(sent.SubList(mSpan.Get(0), mSpan.Get(1))), spanToMentionSubTree[mSpan]);
                    mentions.Add(m);
                    mentionSpanSet.Add(mSpan);
                }
            }
        }
コード例 #28
0
        public virtual void AddFeatures(SemanticGraph sg, Tree t, bool addLemma, bool addUPOS)
        {
            ICollection <int> imperatives = t != null?GetImperatives(t) : new HashSet <int>();

            foreach (IndexedWord word in sg.VertexListSorted())
            {
                string posTag = word.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                string token  = word.Get(typeof(CoreAnnotations.TextAnnotation));
                int    index  = word.Get(typeof(CoreAnnotations.IndexAnnotation));
                Dictionary <string, string> wordFeatures = word.Get(typeof(CoreAnnotations.CoNLLUFeats));
                if (wordFeatures == null)
                {
                    wordFeatures = new Dictionary <string, string>();
                    word.Set(typeof(CoreAnnotations.CoNLLUFeats), wordFeatures);
                }
                /* Features that only depend on the word and the PTB POS tag. */
                wordFeatures.PutAll(GetPOSFeatures(token, posTag));
                /* Semantic graph features. */
                wordFeatures.PutAll(GetGraphFeatures(sg, word));
                /* Handle VBs. */
                if (imperatives.Contains(index))
                {
                    /* Imperative */
                    wordFeatures["VerbForm"] = "Fin";
                    wordFeatures["Mood"]     = "Imp";
                }
                else
                {
                    if (posTag.Equals("VB"))
                    {
                        /* Infinitive */
                        wordFeatures["VerbForm"] = "Inf";
                    }
                }
                /* Subjunctive detection too unreliable. */
                //} else {
                //  /* Present subjunctive */
                //  wordFeatures.put("VerbForm", "Fin");
                //  wordFeatures.put("Tense", "Pres");
                //  wordFeatures.put("Mood", "Subj");
                //}
                string lemma = word.Get(typeof(CoreAnnotations.LemmaAnnotation));
                if (addLemma && (lemma == null || lemma.Equals("_")))
                {
                    word.Set(typeof(CoreAnnotations.LemmaAnnotation), morphology.Lemma(token, posTag));
                }
            }
            if (addUPOS && t != null)
            {
                t = UniversalPOSMapper.MapTree(t);
                IList <ILabel>      uPOSTags = t.PreTerminalYield();
                IList <IndexedWord> yield    = sg.VertexListSorted();
                // int len = yield.size();
                foreach (IndexedWord word_1 in yield)
                {
                    ILabel uPOSTag = uPOSTags[word_1.Index() - 1];
                    word_1.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uPOSTag.Value());
                }
            }
        }
コード例 #29
0
        private string FindNextParagraphSpeaker(IList <ICoreMap> paragraph, int paragraphOffset, Dictionaries dict)
        {
            ICoreMap lastSent = paragraph[paragraph.Count - 1];
            string   speaker  = string.Empty;

            foreach (CoreLabel w in lastSent.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                if (w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("report") || w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("say"))
                {
                    string        word       = w.Get(typeof(CoreAnnotations.TextAnnotation));
                    SemanticGraph dependency = lastSent.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
                    IndexedWord   t          = dependency.GetNodeByWordPattern(word);
                    foreach (Pair <GrammaticalRelation, IndexedWord> child in dependency.ChildPairs(t))
                    {
                        if (child.First().GetShortName().Equals("nsubj"))
                        {
                            int subjectIndex = child.Second().Index();
                            // start from 1
                            IntTuple headPosition = new IntTuple(2);
                            headPosition.Set(0, paragraph.Count - 1 + paragraphOffset);
                            headPosition.Set(1, subjectIndex - 1);
                            if (mentionheadPositions.Contains(headPosition) && mentionheadPositions[headPosition].nerString.StartsWith("PER"))
                            {
                                speaker = int.ToString(mentionheadPositions[headPosition].mentionID);
                            }
                        }
                    }
                }
            }
            return(speaker);
        }
コード例 #30
0
        /// <summary>
        /// Constructs and returns a new Alignment from the given hypothesis
        /// <c>SemanticGraph</c>
        /// to the given text (passage) SemanticGraph, using
        /// the given array of indexes.  The i'th node of the array should contain the
        /// index of the node in the text (passage) SemanticGraph to which the i'th
        /// node in the hypothesis SemanticGraph is aligned, or -1 if it is aligned to
        /// NO_WORD.
        /// </summary>
        public static Edu.Stanford.Nlp.Semgraph.Semgrex.Alignment MakeFromIndexArray(SemanticGraph txtGraph, SemanticGraph hypGraph, int[] indexes, double score, string justification)
        {
            if (txtGraph == null || txtGraph.IsEmpty())
            {
                throw new ArgumentException("Invalid txtGraph " + txtGraph);
            }
            if (hypGraph == null || hypGraph.IsEmpty())
            {
                throw new ArgumentException("Invalid hypGraph " + hypGraph);
            }
            if (indexes == null)
            {
                throw new ArgumentException("Null index array");
            }
            if (indexes.Length != hypGraph.Size())
            {
                throw new ArgumentException("Index array length " + indexes.Length + " does not match hypGraph size " + hypGraph.Size());
            }
            IDictionary <IndexedWord, IndexedWord> map = Generics.NewHashMap();

            for (int i = 0; i < indexes.Length; i++)
            {
                IndexedWord hypNode = hypGraph.GetNodeByIndex(i);
                IndexedWord txtNode = IndexedWord.NoWord;
                if (indexes[i] >= 0)
                {
                    txtNode = txtGraph.GetNodeByIndex(indexes[i]);
                }
                map[hypNode] = txtNode;
            }
            return(new Edu.Stanford.Nlp.Semgraph.Semgrex.Alignment(map, score, justification));
        }