private static void test_regex_filter_for_noun_phrases_from_wsj() { var pattern = RegexTools.regex_filter_pattern("{J\\S+|N\\S+}{J\\S+|N\\S+|IN\\S|TO\\S}*{N\\S+}"); Console.WriteLine(pattern); var treebank = new NLTKTreebankCorpusReader(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data/Treebank")); foreach (var content in treebank.read_tagged_sents().Filter((x)=>Regex.Match(x, pattern).Groups[0].Value)) { Console.WriteLine(content); } }
private static void words_from_nltk_treebank() { var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data/Treebank"); var treebank = new NLTKTreebankCorpusReader(path); foreach (var word in treebank.words()) { Console.WriteLine(word); } }
private static void raw_text_from_nltk_treebank() { var treebank = new NLTKTreebankCorpusReader(Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data/Treebank")); foreach (var content in treebank.read_raw()) { Console.WriteLine(content); } }