//use Stanford.NLP.Net to parse the sentence static Tree Parse(string sent) { // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz"); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }
static void Main() { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2016-10-31\models\"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This sample shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var tree = lp.apply(rawWords); tree.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }
/// <summary> /// Returns a new /// <c>SemanticGraph</c> /// constructed from a given /// <see cref="Edu.Stanford.Nlp.Trees.Tree"/> /// with given options. /// This factory method is intended to replace a profusion of highly similar /// factory methods, such as /// <c>typedDependencies()</c> /// , /// <c>typedDependenciesCollapsed()</c> /// , /// <c>allTypedDependencies()</c> /// , /// <c>allTypedDependenciesCollapsed()</c> /// , etc. /// For a fuller explanation of the meaning of the boolean arguments, see /// <see cref="Edu.Stanford.Nlp.Trees.GrammaticalStructure"/> /// . /// </summary> /// <param name="tree">A tree representing a phrase structure parse</param> /// <param name="includeExtras"> /// Whether to include extra dependencies, which may /// result in a non-tree /// </param> /// <param name="filter">A filter to exclude certain dependencies; ignored if null</param> /// <param name="originalDependencies"> /// generate original Stanford dependencies instead of new /// Universal Dependencies /// </param> /// <returns>A SemanticGraph</returns> public static SemanticGraph MakeFromTree(Tree tree, SemanticGraphFactory.Mode mode, GrammaticalStructure.Extras includeExtras, IPredicate <TypedDependency> filter, bool originalDependencies, bool includePunctuationDependencies) { GrammaticalStructure gs; if (originalDependencies) { IPredicate <string> wordFilt; if (includePunctuationDependencies) { wordFilt = Filters.AcceptFilter(); } else { wordFilt = new PennTreebankLanguagePack().PunctuationWordRejectFilter(); } gs = new EnglishGrammaticalStructure(tree, wordFilt, new SemanticHeadFinder(true)); } else { IPredicate <string> tagFilt; if (includePunctuationDependencies) { tagFilt = Filters.AcceptFilter(); } else { tagFilt = new PennTreebankLanguagePack().PunctuationTagRejectFilter(); } gs = new UniversalEnglishGrammaticalStructure(tree, tagFilt, new UniversalSemanticHeadFinder(true)); } return(MakeFromTree(gs, mode, includeExtras, filter)); }
public void ParseEasySentence() { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var parse = _lp.apply(rawWords); Assert.NotNull(parse); parse.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); using var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = _lp.apply(rawWords2); Assert.NotNull(parse); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); TestContext.Out.WriteLine($"\n{tdl}\n"); var tp = new TreePrint("penn,typedDependenciesCollapsed"); Assert.NotNull(tp); tp.printTree(parse); }
public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; java.util.List rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public string parse(string sentence) { string parsedout = ""; // This option shows loading and using an explicit tokenizer var sent2 = sentence; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); var parse = _sdpModel.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine(); for (var it = tdl.iterator(); it.hasNext(); ) { parsedout = parsedout + "\n" +it.next(); } //System.Console.WriteLine("{0}", it.next()); //System.Console.WriteLine(); //var tp = new TreePrint("penn,typedDependenciesCollapsed"); return parsedout; }
public void ProcessText(string inputText) { var jarRoot = "C:\\stanford-parser-full-2016-10-31\\stanford-parser-3.7.0-models";//\\edu\\stanford\\nlp\\models";//"nlp.stanford.edu\\stanford-parser-full-2017-06-09\\models"; var modelsDirectory = jarRoot + "\\edu\\stanford\\nlp\\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new StringReader(inputText); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); //Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree); }
/// <summary> /// Checks the a list of sentences for correct grammar. Returns a new list of the sentences with correct grammar. /// </summary> /// <param name="sentences">A list of strings that will have their grammar checked.</param> /// <returns>A new list of the sentences with correct grammar.</returns> public static HashSet <string> CheckGrammer(HashSet <string> sentences) { HashSet <string> answers = new HashSet <string>(); Console.WriteLine(sentences.Count + " potential sentences\n"); if (sentences.Count == 0) { return(null); } Console.WriteLine("Loading Stanford NLP..."); // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel("..\\..\\..\\packages\\englishPCFG.ser.gz"); Console.WriteLine("Stanford Parser Loaded!\n"); //Test the grammar of each potential sentence that has all english words foreach (var curSentence in sentences) { //Convert the sentence to a tree that Stanford NLP library generates from its parsing var tempSentence = curSentence; var sentReader = new StringReader(tempSentence); var tree = lp.apply(PTBTokenizer.factory(new CoreLabelTokenFactory(), "").getTokenizer(sentReader).tokenize()); sentReader.close(); //Determine if the word is a sentence string strTree = tree.ToString(); bool isSentence = false; if (strTree.Contains("(S ")) { tempSentence = curSentence + "."; isSentence = true; } else if (strTree.Contains("(SINV ") || strTree.Contains("(SBARQ ") || strTree.Contains("(SQ ")) { tempSentence = curSentence + "?"; isSentence = true; } if (isSentence) { var tlp = new PennTreebankLanguagePack(); string strRel = tlp.grammaticalStructureFactory().newGrammaticalStructure(tree).typedDependenciesCCprocessed().ToString(); if (strRel.Contains("nsubj(")) { answers.Add(tempSentence); } } } return(answers); }
public virtual void TestBasicCategory() { ITreebankLanguagePack tlp = new PennTreebankLanguagePack(); NUnit.Framework.Assert.AreEqual("NP", tlp.BasicCategory("NP-SBJ-R")); NUnit.Framework.Assert.AreEqual("-", tlp.BasicCategory("-")); NUnit.Framework.Assert.AreEqual("-LRB-", tlp.BasicCategory("-LRB-")); NUnit.Framework.Assert.AreEqual("-", tlp.BasicCategory("--PU")); NUnit.Framework.Assert.AreEqual("-", tlp.BasicCategory("--PU-U")); NUnit.Framework.Assert.AreEqual("-LRB-", tlp.BasicCategory("-LRB--PU")); NUnit.Framework.Assert.AreEqual("-LRB-", tlp.BasicCategory("-LRB--PU-U")); }
/// <param name="args"/> public static void Main(string[] args) { ITreebankLanguagePack tlp = new PennTreebankLanguagePack(); System.Console.Out.WriteLine("Start symbol: " + tlp.StartSymbol()); string start = tlp.StartSymbol(); System.Console.Out.WriteLine("Should be true: " + (tlp.IsStartSymbol(start))); string[] strs = new string[] { "-", "-LLB-", "NP-2", "NP=3", "NP-LGS", "NP-TMP=3" }; foreach (string str in strs) { System.Console.Out.WriteLine("String: " + str + " basic: " + tlp.BasicCategory(str) + " basicAndFunc: " + tlp.CategoryAndFunction(str)); } }
public NlpService() { string parserFileOrUrl = "englishPCFG.ser.gz"; _lp = LexicalizedParser.loadModel(parserFileOrUrl); if (_lp == null) { throw new InvalidOperationException("couldn't load " + parserFileOrUrl); } _tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); _tlp = new PennTreebankLanguagePack(); _structureFactory = _tlp.grammaticalStructureFactory(); }
public static List <DependencyRelationship> ParseDepencyRelationshipsInSentence(string sentence) { var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sentence); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = LoadedLexicalizedParserModel.Instance.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependencies(); return(ParseJavaDependecyRelationships(tdl)); }
public static IEnumerable <TypedDependency> ComputeDependencies(Parse parse) { // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.GrammaticalStructureFactory(); var tree = new ParseTree(parse); try { var gs = gsf.NewGrammaticalStructure(tree); return(gs.TypedDependencies()); } catch (Exception) { Console.WriteLine("Exception when computing deps for {0}", parse); return(new List <TypedDependency>()); } }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { var parse = lp.apply(sentence); parse.pennPrint(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(true); System.Console.WriteLine("\n{0}\n", tdl); } }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { Tree parse = lp.apply(sentence); parse.pennPrint(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(true); Console.WriteLine("\n{0}\n", tdl); } }
public static void Start(string model, string fileName) { var grammar = (!String.IsNullOrEmpty(model)) ? model : Program.ParserModel; var options = new[] { "-maxLength", "80", "-retainTmpSubcategories" }; var lp = LexicalizedParser.loadModel(grammar, options); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var sentences = new List<ArrayList>(); if (!string.IsNullOrEmpty(fileName)) { sentences.AddRange(new DocumentPreprocessor(fileName).Cast<ArrayList>()); } else { var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var arrList = new ArrayList(); foreach (var s in sent) { arrList.Add(new Word(s)); } sentences.Add(arrList); const string Sent2 = "This is a slightly longer and more complex sentence requiring tokenization."; var toke = tlp.getTokenizerFactory().getTokenizer(new StringReader(Sent2)); sentences.Add((ArrayList)toke.tokenize()); } foreach (var sentence in sentences) { var parse = lp.apply(sentence); parse.pennPrint(); System.Console.WriteLine("\n{0}\n", (parse.taggedYield())); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(true); System.Console.WriteLine("{0}\n", tdl); } }
public static void Start(string model, string fileName) { var grammar = (!String.IsNullOrEmpty(model)) ? model : Program.ParserModel; var options = new[] { "-maxLength", "80", "-retainTmpSubcategories" }; var lp = LexicalizedParser.loadModel(grammar, options); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var sentences = new List <ArrayList>(); if (!string.IsNullOrEmpty(fileName)) { sentences.AddRange(new DocumentPreprocessor(fileName).Cast <ArrayList>()); } else { var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var arrList = new ArrayList(); foreach (var s in sent) { arrList.Add(new Word(s)); } sentences.Add(arrList); const string Sent2 = "This is a slightly longer and more complex sentence requiring tokenization."; var toke = tlp.getTokenizerFactory().getTokenizer(new StringReader(Sent2)); sentences.Add((ArrayList)toke.tokenize()); } foreach (var sentence in sentences) { var parse = lp.apply(sentence); parse.pennPrint(); System.Console.WriteLine("\n{0}\n", (parse.taggedYield())); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(true); System.Console.WriteLine("{0}\n", tdl); } }
public void LoadSentencesFromFile() { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor var preprocessor = new DocumentPreprocessor(Files.DataFile("SampleText.txt")); foreach (var sentence in preprocessor.ToSeq().Cast <List>()) { var parse = _lp.apply(sentence); Assert.NotNull(parse); parse.pennPrint(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(true); TestContext.Out.WriteLine($"\n{tdl}\n"); } }
static void Main() { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2015-12-09\models\"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This sample shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = Sentence.toCoreLabelList(sent); var tree = lp.apply(rawWords); tree.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }
//use Stanford.NLP.Net to parse the sentence Tree Parse(string sent) { var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sentReader = new java.io.StringReader(sent); var rawWords = tokenizerFactory.getTokenizer(sentReader).tokenize(); sentReader.close(); var tree = lp.apply(rawWords); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree //var tp = new TreePrint("penn,typedDependenciesCollapsed"); var tp = new TreePrint("penn"); tp.printTree(tree); return(tree); }
public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = Sentence.toCoreLabelList(sent); var parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); System.Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
/// <summary>Lets you test out the TreeBinarizer on the command line.</summary> /// <remarks> /// Lets you test out the TreeBinarizer on the command line. /// This main method doesn't yet handle as many flags as one would like. /// But it does have: /// <ul> /// <li> -tlp TreebankLanguagePack /// <li>-tlpp TreebankLangParserParams /// <li>-insideFactor /// <li>-markovOrder /// </ul> /// </remarks> /// <param name="args"> /// Command line arguments: flags as above, as above followed by /// treebankPath /// </param> public static void Main(string[] args) { ITreebankLangParserParams tlpp = null; // TreebankLangParserParams tlpp = new EnglishTreebankParserParams(); // TreeReaderFactory trf = new LabeledScoredTreeReaderFactory(); // Looks like it must build CategoryWordTagFactory!! ITreeReaderFactory trf = null; string fileExt = "mrg"; IHeadFinder hf = new ModCollinsHeadFinder(); ITreebankLanguagePack tlp = new PennTreebankLanguagePack(); bool insideFactor = false; bool mf = false; int mo = 1; bool uwl = false; bool uat = false; double sst = 20.0; bool mfs = false; bool simpleLabels = false; bool noRebinarization = false; int i = 0; while (i < args.Length && args[i].StartsWith("-")) { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlp") && i + 1 < args.Length) { try { tlp = (ITreebankLanguagePack)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1])); } catch (Exception e) { log.Info("Couldn't instantiate: " + args[i + 1]); throw new Exception(e); } i++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlpp") && i + 1 < args.Length) { try { tlpp = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1])); } catch (Exception e) { log.Info("Couldn't instantiate: " + args[i + 1]); throw new Exception(e); } i++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-insideFactor")) { insideFactor = true; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-markovOrder") && i + 1 < args.Length) { i++; mo = System.Convert.ToInt32(args[i]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-simpleLabels")) { simpleLabels = true; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-noRebinarization")) { noRebinarization = true; } else { log.Info("Unknown option:" + args[i]); } } } } } } i++; } if (i >= args.Length) { log.Info("usage: java TreeBinarizer [-tlpp class|-markovOrder int|...] treebankPath"); System.Environment.Exit(0); } Treebank treebank; if (tlpp != null) { treebank = tlpp.MemoryTreebank(); tlp = tlpp.TreebankLanguagePack(); fileExt = tlp.TreebankFileExtension(); hf = tlpp.HeadFinder(); } else { treebank = new DiskTreebank(trf); } treebank.LoadPath(args[i], fileExt, true); ITreeTransformer tt = new Edu.Stanford.Nlp.Parser.Lexparser.TreeBinarizer(hf, tlp, insideFactor, mf, mo, uwl, uat, sst, mfs, simpleLabels, noRebinarization); foreach (Tree t in treebank) { Tree newT = tt.TransformTree(t); System.Console.Out.WriteLine("Original tree:"); t.PennPrint(); System.Console.Out.WriteLine("Binarized tree:"); newT.PennPrint(); System.Console.Out.WriteLine(); } }
public void SentenceParser(string sent2) { var modelsDirectory = jarRoot + @"edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer sent2.ToLower(); var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); //Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); ArrayList dep = gs.typedDependenciesCollapsed() as ArrayList; foreach (TypedDependency td in dep) { for (int i = 0; i < keyword.Length; i++) { if (td.dep().originalText().Equals(keyword[i])) { keyFlag = true; key = keyword[i]; break; } } if (keyFlag) { break; } } keyFlag = false; switch (key) { case "circle": Circle circle = new Circle(); shape = circle.GetProps(); propsUsed = Associator(shape, dep); break; case "rectangle": Rectangle rect = new Rectangle(); shape = rect.GetProps(); propsUsed = Associator(shape, dep); break; case "triangle": Triangle tri = new Triangle(); shape = tri.GetProps(); propsUsed = Associator(shape, dep); break; case "square": Square square = new Square(); shape = square.GetProps(); propsUsed = Associator(shape, dep); break; default: break; } //End of Switch dependency = tdl.ToString(); } //End of SentenceParser
private static void Main(string[] args) { /*FileStream ostrm; * StreamWriter writer; * TextWriter oldOut = Console.Out; * try * { * ostrm = new FileStream("C:\\Users\\Alexandre\\Desktop\\vs_output_2.txt", FileMode.OpenOrCreate, FileAccess.Write); * writer = new StreamWriter(ostrm); * } * catch (Exception e) * { * Console.WriteLine("Cannot open Redirect.txt for writing"); * Console.WriteLine(e.Message); * return; * } * Console.SetOut(writer);*/ /*// read file * var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train"; * var outputFilePath = currentDirectory + "Output/EnglishTok.nbin"; * MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/ // test detokenization /*var tokens = new List<string>() {"do", "n't", "commit"}; * var detokenizer = new DictionaryDetokenizer(); * var result = detokenizer.Detokenize(tokens.ToArray()); * Console.WriteLine(result);*/ /*// train model file * var inputFilePath = currentDirectory + "Input/sentences.train"; * var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin"; * var iterations = 100; * var cut = 5; * var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner(); * Console.WriteLine("Training model..."); * var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner); * Console.WriteLine("Writing output file '{0}'...", outputFilePath); * new BinaryGisModelWriter().Persist(model, outputFilePath); * Console.WriteLine("Output file written.");*/ /*// tokenize tests * var modelPath = currentDirectory + "../Resources/Models/"; * var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); * * var input = "It was built of a bright brick throughout; its skyline was fantastic, and even its ground plan was wild."; * var tokens = tokenizer.Tokenize(input); * Console.WriteLine(string.Join(" | ", tokens));*/ // detect tokenization issues /*var pathToFile = currentDirectory + "Input/tokenizerIssues.txt"; * var modelPath = currentDirectory + "../Resources/Models/"; * var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); * var allLines = File.ReadAllLines(pathToFile); * foreach (var line in allLines) * { * var tokens = tokenizer.Tokenize(line); * Console.WriteLine(string.Join(" | ", tokens)); * }*/ // parsing var sentence = "This is a generic bank response, which indicates simply that they are not willing to accept the transaction."; var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin"); var tokens = tokenizer.Tokenize(sentence); var modelPath = currentDirectory + "../Resources/Models/"; var parser = new OpenNLP.Tools.Parser.EnglishTreebankParser(modelPath, true, false); var parse = parser.DoParse(tokens); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.GrammaticalStructureFactory(); var tree = new ParseTree(parse); Console.WriteLine(tree); var gs = gsf.NewGrammaticalStructure(tree); var dependencies = gs.TypedDependencies(); foreach (var dep in dependencies) { Console.WriteLine(dep); } Console.WriteLine("==========="); Console.WriteLine("OK"); Console.ReadKey(); }
/// <summary> /// Gets depeendencies from sentence. /// </summary> /// <param name="annotation"></param> /// <returns></returns> private NotenizerDependencies GetDepencencies(Annotation annotation) { Tree tree; NotenizerDependency dep; GrammaticalStructure gramStruct; NotenizerDependencies dependencies; NotenizerDependency nsubjComplement; TreebankLanguagePack treeBankLangPack; java.util.Collection typedDependencies; GrammaticalStructureFactory gramStructFact; tree = annotation.get(typeof(TreeCoreAnnotations.TreeAnnotation)) as Tree; treeBankLangPack = new PennTreebankLanguagePack(); gramStructFact = treeBankLangPack.grammaticalStructureFactory(); gramStruct = gramStructFact.newGrammaticalStructure(tree); typedDependencies = gramStruct.typedDependenciesCollapsed(); dependencies = new NotenizerDependencies(); foreach (TypedDependency typedDependencyLoop in (typedDependencies as java.util.ArrayList)) { dep = new NotenizerDependency(typedDependencyLoop); dependencies.Add(dep); if (dep.Relation.IsNominalSubject()) { nsubjComplement = new NotenizerDependency(typedDependencyLoop); nsubjComplement.TokenType = dep.TokenType == TokenType.Dependent ? TokenType.Governor : TokenType.Dependent; dependencies.Add(nsubjComplement); } } return dependencies; }
// Add WordNet search paths to this as the 'object' parameter? /// <summary> /// Oxymoron: A terse paradox; the yoking of two contradictory terms. /// </summary> /// <param name="a"></param> /// <param name="windowSize"></param> public static void FindOxymoron(Analyzer a, int? windowSize, object greedy) { int ws = windowSize ?? 1; // Not used. The window size is one sentence. bool greedySearch = (bool?)greedy ?? false; GetDependencyIndexDelegate GetDependencyIndex = delegate(TreeGraphNode t) { return Convert.ToInt32(Regex.Match(t.toString(), "^.*?-(\\d+)\\'*$").Result("$1")) - 1; }; Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationVisitor = (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) => { if (n.IsRoot()) return; var oxymoronData = (OxymoronData)o; if (oxymoronData.Overlap.Value != 0) return; var w1 = oxymoronData.W1; var derivedFormsW2 = oxymoronData.GetDerivedFormsW2(); bool checkedAntonyms = false; var currentNode = n; while (!currentNode.Parent.IsRoot()) { currentNode = currentNode.Parent; if (currentNode.Value.Relation == WordNetEngine.SynSetRelation.Antonym) { checkedAntonyms = true; break; } } var p = n.Parent; var candidates = new List<string> { w1 }; if (!p.IsRoot()) candidates = p.Value.Words; var relation = n.Value.Relation; switch(relation) { case WordNetEngine.SynSetRelation.SimilarTo: n.Value.Words = Token.FindSynonyms(candidates); break; case WordNetEngine.SynSetRelation.Antonym: n.Value.Words = Token.FindAntonyms(candidates); if (!checkedAntonyms) checkedAntonyms = true; break; case WordNetEngine.SynSetRelation.DerivationallyRelated: n.Value.Words = Token.FindDerivationalForms(candidates, Analyzer.SimilarityPrefixes, Analyzer.MostCommonSimilaritySuffixes, useAllForms: greedySearch ? true : false); if (checkedAntonyms) { var negations = new List<string>(Analyzer.NegationPrefixes.Select(x => (string)(x.Clone()) + w1)); n.Value.Words.AddRange(Token.FindDerivationalForms(negations, null, null, useAllForms: greedySearch ? true : false)); } break; } if (!checkedAntonyms) n.Value.Words.AddRange(candidates); n.Value.Words = n.Value.Words.Distinct().ToList(); // Remove duplicates. if (oxymoronData.Debug) { Console.WriteLine("==================================================="); Console.WriteLine("Relation: " + relation.ToString()); //Console.WriteLine("Parent relation: " + p.Value.Relation.ToString()); Console.WriteLine("Child count: " + n.Children.Count()); Console.WriteLine("Node candidates:"); if (n.IsRoot() || n.Value.Words.Count == 0) Console.WriteLine(" None"); else { foreach (var w in n.Value.Words) Console.WriteLine(" " + w.ToString()); } if (n.IsLeaf()) Console.WriteLine("LEAF NODE"); Console.WriteLine("==================================================="); } if (checkedAntonyms) oxymoronData.Overlap.Value = n.Value.Words.Intersect(derivedFormsW2).Count(); }; Action<Miscellaneous.TreeNode<Analyzer.WordNetRelation>, object> WordNetRelationNullVisitor = (Miscellaneous.TreeNode<Analyzer.WordNetRelation> n, object o) => { //Console.WriteLine(n.Value.Relation.ToString()); n.Value.Words = null; }; string dependencySymbols = @"^(amod|advmod|acomp|dobj|nsubj|prep)$"; var allSubsequences = new List<List<Subsequence>>(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); for (int i = 0; i < a.Document.Sentences.Count; ++i) { var sentence = a.Document.Sentences[i]; var subsequenceTokens = new List<SubsequenceToken>(); foreach (var token in sentence.Tokens) subsequenceTokens.Add(new SubsequenceToken(token, sentence)); var phrases = sentence.Phrases; if (phrases.Count > 0) { var subsequence = new Subsequence(subsequenceTokens, sentence, phrases[0].Subsequences[0].ContainingSubsequence, i); var tree = sentence.Tree; GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); java.util.Collection tdc = gs.typedDependenciesCollapsed(); var candidates = new List<Subsequence>(); for (java.util.Iterator j = tdc.iterator(); j.hasNext(); ) { var td = (TypedDependency)j.next(); var relation = td.reln().getShortName(); if (Regex.IsMatch(relation, dependencySymbols)) { var governorIndex = GetDependencyIndex(td.gov()); var dependentIndex = GetDependencyIndex(td.dep()); var index = Math.Min(governorIndex, dependentIndex); var count = Math.Abs(dependentIndex - governorIndex) + 1; var ss = relation == "prep" ? subsequence.GetRange(index, count) : subsequence.Where((n, k) => k == governorIndex | k == dependentIndex).ToList(); // Remove any leftover punctuation from the candidate subsequences. ss.RemoveAll(n => Regex.IsMatch(n.Tag, Analyzer.PunctuationPatterns)); candidates.Add(new Subsequence(ss, sentence, subsequence.ContainingSubsequence, i)); } } // Determine whether the candidate pairs are oxymorons. for (int k = 0; k < candidates.Count; ++k) { var list = new List<Subsequence>(); Token[] pair = { candidates[k][0], candidates[k][candidates[k].Count - 1] }; // Clear (i.e. null) all the word lists in the WordNet search-path tree. a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor); var overlap = new OxymoronData.IntClass(0); a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair, overlap, greedy: greedySearch, debug: false)); if (overlap.Value == 0) { a.WordNetSearchPath.Traverse(WordNetRelationNullVisitor); a.WordNetSearchPath.Traverse(WordNetRelationVisitor, new OxymoronData(pair.Reverse().ToArray(), overlap, greedy: greedySearch, debug: false)); } if (overlap.Value != 0) { list.Add(candidates[k]); allSubsequences.Add(list); } } } } // Remove duplicate instances and merge those contained in others. var figures = MergeFigures(allSubsequences, RhetoricalFigures.Oxymoron, multiWindow: true); a.Figures.AddRange(figures); }
public string Tags(string input) { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @""; var modelsDirectory = jarRoot; var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This option shows loading and using an explicit tokenizer var sent2 = input; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); UnityEngine.Debug.Log(tdl); //tp.printTree(tree2); for (int i = 0; i < tdl.size(); i++) { TypedDependency node = (TypedDependency)tdl.get(i); string relation = node.reln().getShortName(); if (relation.Contains("nsubj")) { IndexedWord act = node.gov(); //node.dep().getword() action = act.value(); UnityEngine.Debug.Log("This is the action " + action); IndexedWord subject = node.dep(); subj = subject.value(); UnityEngine.Debug.Log("This is the subject " + subj); } if (relation.Contains("dobj")) { IndexedWord act = node.gov(); //node.dep().getword() action = act.value(); UnityEngine.Debug.Log("This is the action " + action); IndexedWord tar = node.dep(); target = tar.value(); UnityEngine.Debug.Log("This is the target " + target); } if (relation.Contains("nmod")) { IndexedWord tar_two = node.dep(); second_target = tar_two.value(); UnityEngine.Debug.Log("This is the target second " + second_target); } } return(tdl.ToString()); }