protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence) { GrammaticalStructure gs = parser.Predict(sentence); SemanticGraph deps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Collapsed, extraDependencies, null); SemanticGraph uncollapsedDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Basic, extraDependencies, null); SemanticGraph ccDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Ccprocessed, extraDependencies, null); SemanticGraph enhancedDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Enhanced, extraDependencies, null); SemanticGraph enhancedPlusPlusDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.EnhancedPlusPlus, extraDependencies, null); sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), deps); sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), enhancedDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation), enhancedPlusPlusDeps); }
/// <summary>Executes the given sequence of edits against the SemanticGraph.</summary> /// <remarks> /// Executes the given sequence of edits against the SemanticGraph. /// NOTE: because the graph could be destructively modified, the matcher may be invalid, and /// thus the pattern will only be executed against the first match. Repeat this routine on the returned /// SemanticGraph to reapply on other matches. /// TODO: create variant that returns set of expansions while matcher.find() returns true /// </remarks> /// <param name="sg">SemanticGraph to operate over (NOT destroyed/modified).</param> /// <returns>True if a match was found and executed, otherwise false.</returns> /// <exception cref="System.Exception"/> public virtual ICollection <SemanticGraph> Execute(SemanticGraph sg) { ICollection <SemanticGraph> generated = new List <SemanticGraph>(); SemgrexMatcher matcher = semgrexPattern.Matcher(sg); while (matcher.Find()) { // NOTE: Semgrex can match two named nodes to the same node. In this case, we simply, // check the named nodes, and if there are any collisions, we throw out this match. ICollection <string> nodeNames = matcher.GetNodeNames(); ICollection <IndexedWord> seen = Generics.NewHashSet(); foreach (string name in nodeNames) { IndexedWord curr = matcher.GetNode(name); if (seen.Contains(curr)) { goto nextMatch_break; } seen.Add(curr); } // System.out.println("REDUNDANT NODES FOUDN IN SEMGREX MATCH"); // if we do have to test, assemble the tests and arguments based off of the current // match and test. If false, continue, else execute as normal. if (predicateTest != null) { if (!predicateTest.Test(matcher)) { continue; } } // SemanticGraph tgt = new SemanticGraph(sg); // Generate a new graph, since we don't want to mutilate the original graph. // We use the same nodes, since the matcher operates off of those. SemanticGraph tgt = SemanticGraphFactory.DuplicateKeepNodes(sg); nodeMap = Generics.NewHashMap(); foreach (SsurgeonEdit edit in editScript) { edit.Evaluate(tgt, matcher); } generated.Add(tgt); nextMatch_continue :; } nextMatch_break :; return(generated); }
public virtual void TestLemma() { SemanticGraph graph = SemanticGraph.ValueOf("[ate subj>Bill dobj>[muffins compound>blueberry]]"); foreach (IndexedWord word in graph.VertexSet()) { word.SetLemma(word.Word()); } RunTest("{lemma:ate}", graph, "ate"); Tree tree = Tree.ValueOf("(ROOT (S (NP (PRP I)) (VP (VBP love) (NP (DT the) (NN display))) (. .)))"); graph = SemanticGraphFactory.GenerateCCProcessedDependencies(tree); foreach (IndexedWord word_1 in graph.VertexSet()) { word_1.SetLemma(word_1.Word()); } // This set of three tests also provides some coverage for a // bizarre error a user found where multiple copies of the same // IndexedWord were created RunTest("{}=Obj <dobj {lemma:love}=Pred", graph, "display/NN"); RunTest("{}=Obj <dobj {}=Pred", graph, "display/NN"); RunTest("{lemma:love}=Pred >dobj {}=Obj ", graph, "love/VBP"); }
/// <exception cref="System.Exception"/> public override Document NextDoc() { IList <IList <CoreLabel> > allWords = new List <IList <CoreLabel> >(); IList <Tree> allTrees = new List <Tree>(); CoNLL2011DocumentReader.Document conllDoc = reader.GetNextDocument(); if (conllDoc == null) { return(null); } Annotation anno = conllDoc.GetAnnotation(); IList <ICoreMap> sentences = anno.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap sentence in sentences) { if (!Constants.UseGoldParses && !replicateCoNLL) { // Remove tree from annotation and replace with parse using stanford parser sentence.Remove(typeof(TreeCoreAnnotations.TreeAnnotation)); } else { Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); treeLemmatizer.TransformTree(tree); // generate the dependency graph try { SemanticGraph deps = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Enhanced, GrammaticalStructure.Extras.None); SemanticGraph basicDeps = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.None); sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), basicDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), deps); } catch (Exception e) { logger.Log(Level.Warning, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e); } } } string preSpeaker = null; int utterance = -1; foreach (CoreLabel token in anno.Get(typeof(CoreAnnotations.TokensAnnotation))) { if (!token.ContainsKey(typeof(CoreAnnotations.SpeakerAnnotation))) { token.Set(typeof(CoreAnnotations.SpeakerAnnotation), string.Empty); } string curSpeaker = token.Get(typeof(CoreAnnotations.SpeakerAnnotation)); if (!curSpeaker.Equals(preSpeaker)) { utterance++; preSpeaker = curSpeaker; } token.Set(typeof(CoreAnnotations.UtteranceAnnotation), utterance); } // Run pipeline stanfordProcessor.Annotate(anno); foreach (ICoreMap sentence_1 in anno.Get(typeof(CoreAnnotations.SentencesAnnotation))) { allWords.Add(sentence_1.Get(typeof(CoreAnnotations.TokensAnnotation))); allTrees.Add(sentence_1.Get(typeof(TreeCoreAnnotations.TreeAnnotation))); } // Initialize gold mentions IList <IList <Mention> > allGoldMentions = ExtractGoldMentions(conllDoc); IList <IList <Mention> > allPredictedMentions; //allPredictedMentions = allGoldMentions; // Make copy of gold mentions since mentions may be later merged, mentionID's changed and stuff allPredictedMentions = mentionFinder.ExtractPredictedMentions(anno, maxID, dictionaries); try { RecallErrors(allGoldMentions, allPredictedMentions, anno); } catch (IOException e) { throw new Exception(e); } Document doc = Arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true); doc.conllDoc = conllDoc; return(doc); }
public virtual void Annotate(Annotation annotation) { // temporarily set the primary named entity tag to the coarse tag SetNamedEntityTagGranularity(annotation, "coarse"); if (performMentionDetection) { mentionAnnotator.Annotate(annotation); } try { IList <Tree> trees = new List <Tree>(); IList <IList <CoreLabel> > sentences = new List <IList <CoreLabel> >(); // extract trees and sentence words // we are only supporting the new annotation standard for this Annotator! bool hasSpeakerAnnotations = false; if (annotation.ContainsKey(typeof(CoreAnnotations.SentencesAnnotation))) { // int sentNum = 0; foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); sentences.Add(tokens); Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); trees.Add(tree); SemanticGraph dependencies = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Collapsed, GrammaticalStructure.Extras.None, null, true); // locking here is crucial for correct threading! sentence.Set(typeof(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation), dependencies); if (!hasSpeakerAnnotations) { // check for speaker annotations foreach (CoreLabel t in tokens) { if (t.Get(typeof(CoreAnnotations.SpeakerAnnotation)) != null) { hasSpeakerAnnotations = true; break; } } } MentionExtractor.MergeLabels(tree, tokens); MentionExtractor.InitializeUtterance(tokens); } } else { log.Error("this coreference resolution system requires SentencesAnnotation!"); return; } if (hasSpeakerAnnotations) { annotation.Set(typeof(CoreAnnotations.UseMarkedDiscourseAnnotation), true); } // extract all possible mentions // this is created for each new annotation because it is not threadsafe RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(allowReparsing); IList <IList <Mention> > allUnprocessedMentions = finder.ExtractPredictedMentions(annotation, 0, corefSystem.Dictionaries()); // add the relevant info to mentions and order them for coref Document document = mentionExtractor.Arrange(annotation, sentences, trees, allUnprocessedMentions); IList <IList <Mention> > orderedMentions = document.GetOrderedMentions(); IDictionary <int, CorefChain> result = corefSystem.CorefReturnHybridOutput(document); annotation.Set(typeof(CorefCoreAnnotations.CorefChainAnnotation), result); if (OldFormat) { IDictionary <int, CorefChain> oldResult = corefSystem.Coref(document); AddObsoleteCoreferenceAnnotations(annotation, orderedMentions, oldResult); } } catch (Exception e) { throw; } catch (Exception e) { throw new Exception(e); } finally { // restore to the fine-grained SetNamedEntityTagGranularity(annotation, "fine"); } }
/// <summary>Converts basic UD tree to enhanced++ UD graph.</summary> private static SemanticGraph ConvertBasicToEnhancedPlusPlus(SemanticGraph sg) { GrammaticalStructure gs = SemanticGraphToGrammaticalStructure(sg); return(SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gs)); }
/// <summary>Prints out all matches of a semgrex pattern on a file of dependencies.</summary> /// <remarks> /// Prints out all matches of a semgrex pattern on a file of dependencies. /// <p> /// Usage:<br /> /// java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args] /// <br /> /// See the help() function for a list of possible arguments to provide. /// </remarks> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { IDictionary <string, int> flagMap = Generics.NewHashMap(); flagMap[Pattern] = 1; flagMap[TreeFile] = 1; flagMap[Mode] = 1; flagMap[Extras] = 1; flagMap[ConlluFile] = 1; flagMap[OutputFormatOption] = 1; IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, flagMap); // args = argsMap.get(null); // TODO: allow patterns to be extracted from a file if (!(argsMap.Contains(Pattern)) || argsMap[Pattern].Length == 0) { Help(); System.Environment.Exit(2); } Edu.Stanford.Nlp.Semgraph.Semgrex.SemgrexPattern semgrex = Edu.Stanford.Nlp.Semgraph.Semgrex.SemgrexPattern.Compile(argsMap[Pattern][0]); string modeString = DefaultMode; if (argsMap.Contains(Mode) && argsMap[Mode].Length > 0) { modeString = argsMap[Mode][0].ToUpper(); } SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.ValueOf(modeString); string outputFormatString = DefaultOutputFormat; if (argsMap.Contains(OutputFormatOption) && argsMap[OutputFormatOption].Length > 0) { outputFormatString = argsMap[OutputFormatOption][0].ToUpper(); } SemgrexPattern.OutputFormat outputFormat = SemgrexPattern.OutputFormat.ValueOf(outputFormatString); bool useExtras = true; if (argsMap.Contains(Extras) && argsMap[Extras].Length > 0) { useExtras = bool.ValueOf(argsMap[Extras][0]); } IList <SemanticGraph> graphs = Generics.NewArrayList(); // TODO: allow other sources of graphs, such as dependency files if (argsMap.Contains(TreeFile) && argsMap[TreeFile].Length > 0) { foreach (string treeFile in argsMap[TreeFile]) { log.Info("Loading file " + treeFile); MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer()); treebank.LoadPath(treeFile); foreach (Tree tree in treebank) { // TODO: allow other languages... this defaults to English SemanticGraph graph = SemanticGraphFactory.MakeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.Maximal : GrammaticalStructure.Extras.None); graphs.Add(graph); } } } if (argsMap.Contains(ConlluFile) && argsMap[ConlluFile].Length > 0) { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); foreach (string conlluFile in argsMap[ConlluFile]) { log.Info("Loading file " + conlluFile); IEnumerator <SemanticGraph> it = reader.GetIterator(IOUtils.ReaderFromString(conlluFile)); while (it.MoveNext()) { SemanticGraph graph = it.Current; graphs.Add(graph); } } } foreach (SemanticGraph graph_1 in graphs) { SemgrexMatcher matcher = semgrex.Matcher(graph_1); if (!matcher.Find()) { continue; } if (outputFormat == SemgrexPattern.OutputFormat.List) { log.Info("Matched graph:" + Runtime.LineSeparator() + graph_1.ToString(SemanticGraph.OutputFormat.List)); int i = 1; bool found = true; while (found) { log.Info("Match " + i + " at: " + matcher.GetMatch().ToString(CoreLabel.OutputFormat.ValueIndex)); IList <string> nodeNames = Generics.NewArrayList(); Sharpen.Collections.AddAll(nodeNames, matcher.GetNodeNames()); nodeNames.Sort(); foreach (string name in nodeNames) { log.Info(" " + name + ": " + matcher.GetNode(name).ToString(CoreLabel.OutputFormat.ValueIndex)); } log.Info(" "); found = matcher.Find(); } } else { if (outputFormat == SemgrexPattern.OutputFormat.Offset) { if (graph_1.VertexListSorted().IsEmpty()) { continue; } System.Console.Out.Printf("+%d %s%n", graph_1.VertexListSorted()[0].Get(typeof(CoreAnnotations.LineNumberAnnotation)), argsMap[ConlluFile][0]); } } } }
public virtual void TestFind() { SemanticGraph h = SemanticGraph.ValueOf("[married/VBN nsubjpass>Hughes/NNP auxpass>was/VBD prep_to>Gracia/NNP]"); SemanticGraph t = SemanticGraph.ValueOf("[loved/VBD\nnsubj>Hughes/NNP\ndobj>[wife/NN poss>his/PRP$ appos>Gracia/NNP]\nconj_and>[obsessed/JJ\ncop>was/VBD\nadvmod>absolutely/RB\nprep_with>[Elicia/NN poss>his/PRP$ amod>little/JJ compound>daughter/NN]]]" ); string s = "(ROOT\n(S\n(NP (DT The) (NN chimney) (NNS sweeps))\n(VP (VBP do) (RB not)\n(VP (VB like)\n(S\n(VP (VBG working)\n(PP (IN on)\n(NP (DT an) (JJ empty) (NN stomach)))))))\n(. .)))"; Tree tree = Tree.ValueOf(s); SemanticGraph sg = SemanticGraphFactory.MakeFromTree(tree, SemanticGraphFactory.Mode.Collapsed, GrammaticalStructure.Extras.Maximal, null); SemgrexPattern pat = SemgrexPattern.Compile("{}=gov ![>det {}] & > {word:/^(?!not).*$/}=dep"); sg.PrettyPrint(); // SemgrexPattern pat = // SemgrexPattern.compile("{} [[<prep_to ({word:married} >nsubjpass {})] | [<nsubjpass ({word:married} >prep_to {})]]"); pat.PrettyPrint(); SemgrexMatcher mat = pat.Matcher(sg); while (mat.Find()) { // String match = mat.getMatch().word(); string gov = mat.GetNode("gov").Word(); // String reln = mat.getRelnString("reln"); string dep = mat.GetNode("dep").Word(); // System.out.println(match); System.Console.Out.WriteLine(dep + ' ' + gov); } SemgrexPattern pat2 = SemgrexPattern.Compile("{} [[>/nn|appos/ ({lemma:/wife|husband|partner/} >/poss/ {}=txtPartner)] | [<poss ({}=txtPartner >/nn|appos/ {lemma:/wife|husband|partner/})]" + "| [<nsubj ({$} >> ({word:/wife|husband|partner/} >poss {word:/his|her/} >/nn|appos/ {}))]]" ); SemgrexMatcher mat2 = pat2.Matcher(t); while (mat2.Find()) { string match = mat2.GetMatch().Word(); // String gov = mat.getNode("gov").word(); // String reln = mat.getRelnString("reln"); // String dep = mat.getNode("dep").word(); System.Console.Out.WriteLine(match); } // System.out.println(dep + " " + gov); Dictionary <IndexedWord, IndexedWord> map = new Dictionary <IndexedWord, IndexedWord>(); map[h.GetNodeByWordPattern("Hughes")] = t.GetNodeByWordPattern("Hughes"); map[h.GetNodeByWordPattern("Gracia")] = t.GetNodeByWordPattern("Gracia"); Alignment alignment = new Alignment(map, 0, string.Empty); SemgrexPattern fullPat = SemgrexPattern.Compile("({}=partnerOne [[<prep_to ({word:married} >nsubjpass {}=partnerTwo)] | [<nsubjpass ({word:married} >prep_to {}=partnerTwo)]]) @ ({} [[>/nn|appos/ ({lemma:/wife|husband|partner/} >/poss/ {}=txtPartner)] | [<poss ({}=txtPartner >/nn|appos/ {lemma:/wife|husband|partner/})]" + "| [<nsubj ({$} >> ({word:/wife|husband|partner/} >poss {word:/his|her/} >/nn|appos/ {}=txtPartner))]])"); fullPat.PrettyPrint(); SemgrexMatcher fullMat = fullPat.Matcher(h, alignment, t); if (fullMat.Find()) { System.Console.Out.WriteLine("woo: " + fullMat.GetMatch().Word()); System.Console.Out.WriteLine(fullMat.GetNode("txtPartner")); System.Console.Out.WriteLine(fullMat.GetNode("partnerOne")); System.Console.Out.WriteLine(fullMat.GetNode("partnerTwo")); } else { System.Console.Out.WriteLine("boo"); } SemgrexPattern pat3 = SemgrexPattern.Compile("({word:LIKE}=parent >>/aux.*/ {word:/do/}=node)"); System.Console.Out.WriteLine("pattern is "); pat3.PrettyPrint(); System.Console.Out.WriteLine("tree is "); sg.PrettyPrint(); //checking if ignoring case or not SemgrexMatcher mat3 = pat3.Matcher(sg, true); if (mat3.Find()) { string parent = mat3.GetNode("parent").Word(); string node = mat3.GetNode("node").Word(); System.Console.Out.WriteLine("Result: parent is " + parent + " and node is " + node); NUnit.Framework.Assert.AreEqual(parent, "like"); NUnit.Framework.Assert.AreEqual(node, "do"); } else { NUnit.Framework.Assert.Fail(); } }