/// <summary> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. /// </summary> /// <remarks> /// demoAPI demonstrates other ways of calling the parser with /// already tokenized text, or in some cases, raw text that needs to /// be tokenized as a single sentence. Output is handled with a /// TreePrint object. Note that the options used when creating the /// TreePrint can determine what results to print out. Once again, /// one can capture the output by passing a PrintWriter to /// TreePrint.printTree. This code is for English. /// </remarks> public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words string[] sent = new string[] { "This", "is", "an", "easy", "sentence", "." }; IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent); Tree parse = lp.Apply(rawWords); parse.PennPrint(); System.Console.Out.WriteLine(); // This option shows loading and using an explicit tokenizer string sent2 = "This is another sentence."; ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty); ITokenizer <CoreLabel> tok = tokenizerFactory.GetTokenizer(new StringReader(sent2)); IList <CoreLabel> rawWords2 = tok.Tokenize(); parse = lp.Apply(rawWords2); ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory(); GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); // You can also use a TreePrint object to print trees and dependencies TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.PrintTree(parse); }
// just static main public static void Main(string[] args) { string treeString = "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))"; // Typically the tree is constructed by parsing or reading a // treebank. This is just for example purposes Tree tree = Tree.ValueOf(treeString); // This creates English uncollapsed dependencies as a // SemanticGraph. If you are creating many SemanticGraphs, you // should use a GrammaticalStructureFactory and use it to generate // the intermediate GrammaticalStructure instead SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree); // Alternatively, this could have been the Chinese params or any // other language supported. As of 2014, only English and Chinese ITreebankLangParserParams @params = new EnglishTreebankParserParams(); IGrammaticalStructureFactory gsf = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder()); GrammaticalStructure gs = gsf.NewGrammaticalStructure(tree); log.Info(graph); SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B"); SemgrexMatcher matcher = semgrex.Matcher(graph); // This will produce two results on the given tree: "likes" is an // ancestor of both "dog" and "my" via the nsubj relation while (matcher.Find()) { log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B")); } }
/// <summary> /// demoDP demonstrates turning a file into tokens and then parse /// trees. /// </summary> /// <remarks> /// demoDP demonstrates turning a file into tokens and then parse /// trees. Note that the trees are printed by calling pennPrint on /// the Tree object. It is also possible to pass a PrintWriter to /// pennPrint if you want to capture the output. /// This code will work with any supported language. /// </remarks> public static void DemoDP(LexicalizedParser lp, string filename) { // This option shows loading, sentence-segmenting and tokenizing // a file using DocumentPreprocessor. ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // a PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = null; if (tlp.SupportsGrammaticalStructures()) { gsf = tlp.GrammaticalStructureFactory(); } // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename)) { Tree parse = lp.Apply(sentence); parse.PennPrint(); System.Console.Out.WriteLine(); if (gsf != null) { GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); ICollection tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); } } }
// TODO: when this method throws an exception (for whatever reason) // a waiting client might hang. There should be some graceful // handling of that. /// <exception cref="System.IO.IOException"/> public virtual void HandleDependencies(string arg, OutputStream outStream, string commandArgs) { Tree tree = Parse(arg, false); if (tree == null) { return; } // TODO: this might throw an exception if the parser doesn't support dependencies. Handle that cleaner? GrammaticalStructure gs = parser.GetTLPParams().GetGrammaticalStructure(tree, parser.TreebankLanguagePack().PunctuationWordRejectFilter(), parser.GetTLPParams().TypedDependencyHeadFinder()); ICollection <TypedDependency> deps = null; switch (commandArgs.ToUpper()) { case "COLLAPSED_TREE": { deps = gs.TypedDependenciesCollapsedTree(); break; } default: { throw new NotSupportedException("Dependencies type not implemented: " + commandArgs); } } OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8"); foreach (TypedDependency dep in deps) { osw.Write(dep.ToString()); osw.Write("\n"); } osw.Flush(); }
public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; java.util.List rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
/// <summary> /// Set pointer to the <code>GrammaticalStructure</code> of which this node /// is a part. Operates recursively to set pointer for all descendants too /// </summary> protected void SetTreeGraph(GrammaticalStructure tg) { this.tg = tg; foreach (TreeGraphNode child in _children) { child.SetTreeGraph(tg); } }
/// <summary>Read in typed dependencies in CoNLLX format.</summary> /// <param name="filename"/> /// <exception cref="System.IO.IOException"/> protected internal static IList <ICollection <TypedDependency> > ReadDepsCoNLLX(string filename) { IList <GrammaticalStructure> gss = GrammaticalStructure.ReadCoNLLXGrammaticalStructureCollection(filename, new FakeShortNameToGRel(), new GraphLessGrammaticalStructureFactory()); IList <ICollection <TypedDependency> > readDeps = new List <ICollection <TypedDependency> >(gss.Count); foreach (GrammaticalStructure gs in gss) { ICollection <TypedDependency> deps = gs.TypedDependencies(); readDeps.Add(deps); } return(readDeps); }
private static SemanticGraph GetParse(ICoreMap sentence) { GrammaticalStructure gs = parser.Predict(sentence); GrammaticalStructure.Extras maximal = GrammaticalStructure.Extras.Maximal; // SemanticGraph deps = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.ENHANCED, maximal, true, null), // uncollapsedDeps = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.BASIC, maximal, true, null), // SemanticGraph ccDeps = SemanticGraphFactory.makeFromTree(gs, SemanticGraphFactory.Mode.ENHANCED_PLUS_PLUS, maximal, true, null); SemanticGraph ccDeps = SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gs); return(ccDeps); }
protected internal override void DoOneSentence(Annotation annotation, ICoreMap sentence) { GrammaticalStructure gs = parser.Predict(sentence); SemanticGraph deps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Collapsed, extraDependencies, null); SemanticGraph uncollapsedDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Basic, extraDependencies, null); SemanticGraph ccDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Ccprocessed, extraDependencies, null); SemanticGraph enhancedDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.Enhanced, extraDependencies, null); SemanticGraph enhancedPlusPlusDeps = SemanticGraphFactory.MakeFromTree(gs, SemanticGraphFactory.Mode.EnhancedPlusPlus, extraDependencies, null); sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), deps); sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), enhancedDeps); sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation), enhancedPlusPlusDeps); }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { Tree parse = lp.apply(sentence); parse.pennPrint(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(true); Console.WriteLine("\n{0}\n", tdl); } }
// static main method only public static void Main(string[] args) { string modelPath = DependencyParser.DefaultModel; string taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; for (int argIndex = 0; argIndex < args.Length;) { switch (args[argIndex]) { case "-tagger": { taggerPath = args[argIndex + 1]; argIndex += 2; break; } case "-model": { modelPath = args[argIndex + 1]; argIndex += 2; break; } default: { throw new Exception("Unknown argument " + args[argIndex]); } } } string text = "I can almost always tell when movies use fake dinosaurs."; MaxentTagger tagger = new MaxentTagger(taggerPath); DependencyParser parser = DependencyParser.LoadFromModelFile(modelPath); DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text)); foreach (IList <IHasWord> sentence in tokenizer) { IList <TaggedWord> tagged = tagger.TagSentence(sentence); GrammaticalStructure gs = parser.Predict(tagged); // Print typed dependencies log.Info(gs); } }
/// <summary>Converts basic UD tree to enhanced++ UD graph.</summary> private static SemanticGraph ConvertBasicToEnhancedPlusPlus(SemanticGraph sg) { GrammaticalStructure gs = SemanticGraphToGrammaticalStructure(sg); return(SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gs)); }
/// <summary>This example shows a few more ways of providing input to a parser.</summary> /// <remarks> /// This example shows a few more ways of providing input to a parser. /// Usage: ParserDemo2 [grammar [textFile]] /// </remarks> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string grammar = args.Length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"; string[] options = new string[] { "-maxLength", "80", "-retainTmpSubcategories" }; LexicalizedParser lp = ((LexicalizedParser)LexicalizedParser.LoadModel(grammar, options)); ITreebankLanguagePack tlp = lp.GetOp().Langpack(); IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory(); IEnumerable <IList <IHasWord> > sentences; if (args.Length > 1) { DocumentPreprocessor dp = new DocumentPreprocessor(args[1]); IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >(); foreach (IList <IHasWord> sentence in dp) { tmp.Add(sentence); } sentences = tmp; } else { // Showing tokenization and parsing in code a couple of different ways. string[] sent = new string[] { "This", "is", "an", "easy", "sentence", "." }; IList <IHasWord> sentence = new List <IHasWord>(); foreach (string word in sent) { sentence.Add(new Word(word)); } string sent2 = ("This is a slightly longer and more complex " + "sentence requiring tokenization."); // Use the default tokenizer for this TreebankLanguagePack ITokenizer <IHasWord> toke = tlp.GetTokenizerFactory().GetTokenizer(new StringReader(sent2)); IList <IHasWord> sentence2 = toke.Tokenize(); string[] sent3 = new string[] { "It", "can", "can", "it", "." }; string[] tag3 = new string[] { "PRP", "MD", "VB", "PRP", "." }; // Parser gets second "can" wrong without help IList <TaggedWord> sentence3 = new List <TaggedWord>(); for (int i = 0; i < sent3.Length; i++) { sentence3.Add(new TaggedWord(sent3[i], tag3[i])); } Tree parse = lp.Parse(sentence3); parse.PennPrint(); IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >(); tmp.Add(sentence); tmp.Add(sentence2); tmp.Add(sentence3); sentences = tmp; } foreach (IList <IHasWord> sentence_1 in sentences) { Tree parse = lp.Parse(sentence_1); parse.PennPrint(); System.Console.Out.WriteLine(); GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); System.Console.Out.WriteLine("The words of the sentence:"); foreach (ILabel lab in parse.Yield()) { if (lab is CoreLabel) { System.Console.Out.WriteLine(((CoreLabel)lab).ToString(CoreLabel.OutputFormat.ValueMap)); } else { System.Console.Out.WriteLine(lab); } } System.Console.Out.WriteLine(); System.Console.Out.WriteLine(parse.TaggedYield()); System.Console.Out.WriteLine(); } // This method turns the String into a single sentence using the // default tokenizer for the TreebankLanguagePack. string sent3_1 = "This is one last test!"; lp.Parse(sent3_1).PennPrint(); }
/// <summary>Produces an enhanced++ dependencies SemanticGraph.</summary> public static SemanticGraph GenerateEnhancedPlusPlusDependencies(GrammaticalStructure gs) { return(MakeFromTree(gs, SemanticGraphFactory.Mode.EnhancedPlusPlus, GrammaticalStructure.Extras.None, null)); }
public static SemanticGraph GenerateCCProcessedDependencies(GrammaticalStructure gs) { return(MakeFromTree(gs, SemanticGraphFactory.Mode.Ccprocessed, GrammaticalStructure.Extras.None, null)); }
/// <summary>Produces an Uncollapsed (basic) SemanticGraph.</summary> public static SemanticGraph GenerateUncollapsedDependencies(GrammaticalStructure gs) { return(MakeFromTree(gs, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.None, null)); }
public static SemanticGraph MakeFromTree(GrammaticalStructure structure) { return(MakeFromTree(structure, SemanticGraphFactory.Mode.Basic, GrammaticalStructure.Extras.None, null)); }
public static SemanticGraph MakeFromTree(GrammaticalStructure tree, SemanticGraphFactory.Mode mode, bool includeExtras, IPredicate <TypedDependency> filter) { return(MakeFromTree(tree, mode, includeExtras ? GrammaticalStructure.Extras.Maximal : GrammaticalStructure.Extras.None, filter)); }
// TODO: these booleans would be more readable as enums similar to Mode. // Then the arguments would make more sense public static SemanticGraph MakeFromTree(GrammaticalStructure gs, SemanticGraphFactory.Mode mode, GrammaticalStructure.Extras includeExtras, IPredicate <TypedDependency> filter) { ICollection <TypedDependency> deps; switch (mode) { case SemanticGraphFactory.Mode.Enhanced: { deps = gs.TypedDependenciesEnhanced(); break; } case SemanticGraphFactory.Mode.EnhancedPlusPlus: { deps = gs.TypedDependenciesEnhancedPlusPlus(); break; } case SemanticGraphFactory.Mode.CollapsedTree: { deps = gs.TypedDependenciesCollapsedTree(); break; } case SemanticGraphFactory.Mode.Collapsed: { deps = gs.TypedDependenciesCollapsed(includeExtras); break; } case SemanticGraphFactory.Mode.Ccprocessed: { deps = gs.TypedDependenciesCCprocessed(includeExtras); break; } case SemanticGraphFactory.Mode.Basic: { deps = gs.TypedDependencies(includeExtras); break; } default: { throw new ArgumentException("Unknown mode " + mode); } } if (filter != null) { IList <TypedDependency> depsFiltered = Generics.NewArrayList(); foreach (TypedDependency td in deps) { if (filter.Test(td)) { depsFiltered.Add(td); } } deps = depsFiltered; } // there used to be an if clause that filtered out the case of empty // dependencies. However, I could not understand (or replicate) the error // it alluded to, and it led to empty dependency graphs for very short fragments, // which meant they were ignored by the RTE system. Changed. (pado) // See also the SemanticGraph constructor. //log.info(deps.toString()); return(new SemanticGraph(deps)); }
public static SemanticGraph GenerateCollapsedDependencies(GrammaticalStructure gs, GrammaticalStructure.Extras extras) { return(MakeFromTree(gs, SemanticGraphFactory.Mode.Collapsed, extras, null)); }
/// <summary> /// Create a new <code>TreeGraphNode</code> having the same tree /// structure and label values as an existing tree (but no shared storage) /// </summary> /// <param name="t">the tree to copy</param> /// <param name="graph">the graph of which this node is a part</param> public TreeGraphNode(Tree t, GrammaticalStructure graph) : this(t, (TreeGraphNode) null) { this.SetTreeGraph(graph); }
/// <summary> /// Create a new <code>TreeGraphNode</code> having the same tree /// structure and label values as an existing tree (but no shared storage) /// </summary> /// <param name="t">the tree to copy</param> /// <param name="graph">the graph of which this node is a part</param> public TreeGraphNode(Tree t, GrammaticalStructure graph) : this(t, (TreeGraphNode)null) { this.SetTreeGraph(graph); }