public void ParseEasySentence() { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var parse = _lp.apply(rawWords); Assert.NotNull(parse); parse.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); using var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = _lp.apply(rawWords2); Assert.NotNull(parse); var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(parse); var tdl = gs.typedDependenciesCCprocessed(); TestContext.Out.WriteLine($"\n{tdl}\n"); var tp = new TreePrint("penn,typedDependenciesCollapsed"); Assert.NotNull(tp); tp.printTree(parse); }
public List <string> FindTag(string text, List <string> tag) { List <string> adj = new List <string>(); object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { List taggedSentence = this.tagger.tagSentence(sentence); foreach (CoreLabel word in SentenceUtils.toCoreLabelList(taggedSentence).toArray()) { if (tag.Contains(word.tag())) { adj.Add(word.word()); } } } return(adj); }
public List <string> FindNouns(string text) { List <string> nouns = new List <string>(); object[] sentences = MaxentTagger.tokenizeText(new java.io.StringReader(text)).toArray(); foreach (ArrayList sentence in sentences) { List taggedSentence = this.tagger.tagSentence(sentence); foreach (CoreLabel word in SentenceUtils.toCoreLabelList(taggedSentence).toArray()) { switch (word.tag()) { case "NN": case "NNS": nouns.Add(word.word()); break; } } } return(nouns); }
public List <TargetCandidate> GetAllNounPhrases(string[] sentence, string[] target) { var tree = lexParser.apply(SentenceUtils.toCoreLabelList(sentence)); var dependencies = grammaticalStructureFactory.newGrammaticalStructure(tree).typedDependenciesCCprocessed(); List <TargetCandidate> nounPhrases = new List <TargetCandidate>(); var subTrees = tree.subTreeList(); for (int i = 0; i < subTrees.size(); i++) { Tree subTree = (Tree)subTrees.get(i); if (subTree.label().value() == "NP") { NounPhrase phrase = NounPhrase.SetSentence(sentence, tree, dependencies, target); phrase.SetPhrase(SentenceUtils.listToString(subTree.yield())); nounPhrases.Add(new TargetCandidate(phrase, caching)); } } return(nounPhrases); }
static void Main() { // Path to models extracted from `stanford-parser-3.6.0-models.jar` var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-parser-full-2018-10-17\models\"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Loading english PCFG parser from file var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This sample shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; var rawWords = SentenceUtils.toCoreLabelList(sent); var tree = lp.apply(rawWords); tree.pennPrint(); // This option shows loading and using an explicit tokenizer var sent2 = "This is another sentence."; var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(sent2); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.grammaticalStructureFactory(); var gs = gsf.newGrammaticalStructure(tree2); var tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); // Extract collapsed dependencies from parsed tree var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(tree2); }