/// <summary> /// demoDP demonstrates turning a file into tokens and then parse /// trees. /// </summary> /// <remarks> /// demoDP demonstrates turning a file into tokens and then parse /// trees. Note that the trees are printed by calling pennPrint on /// the Tree object. It is also possible to pass a PrintWriter to /// pennPrint if you want to capture the output. /// This code will work with any supported language. /// </remarks> public static void DemoDP(LexicalizedParser lp, string filename) { // This option shows loading, sentence-segmenting and tokenizing // a file using DocumentPreprocessor. ITreebankLanguagePack tlp = lp.TreebankLanguagePack(); // a PennTreebankLanguagePack for English IGrammaticalStructureFactory gsf = null; if (tlp.SupportsGrammaticalStructures()) { gsf = tlp.GrammaticalStructureFactory(); } // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename)) { Tree parse = lp.Apply(sentence); parse.PennPrint(); System.Console.Out.WriteLine(); if (gsf != null) { GrammaticalStructure gs = gsf.NewGrammaticalStructure(parse); ICollection tdl = gs.TypedDependenciesCCprocessed(); System.Console.Out.WriteLine(tdl); System.Console.Out.WriteLine(); } } }