private OpenNLP.Tools.Parser.Parse ParseSentence(string sentence) { if (mParser == null) { mParser = new OpenNLP.Tools.Parser.EnglishTreebankParser(mModelPath, true, false); } return(mParser.DoParse(sentence)); }
public static OpenNLP.Tools.Parser.Parse ParseSentence(string sentence) { if (mParser == null) { mParser = new OpenNLP.Tools.Parser.EnglishTreebankParser(mModelPath, true, false); } return mParser.DoParse(sentence); }
private static void Main(string[] args) { /*FileStream ostrm; * StreamWriter writer; * TextWriter oldOut = Console.Out; * try * { * ostrm = new FileStream("C:\\Users\\Alexandre\\Desktop\\vs_output_2.txt", FileMode.OpenOrCreate, FileAccess.Write); * writer = new StreamWriter(ostrm); * } * catch (Exception e) * { * Console.WriteLine("Cannot open Redirect.txt for writing"); * Console.WriteLine(e.Message); * return; * } * Console.SetOut(writer);*/ /*// read file * var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train"; * var outputFilePath = currentDirectory + "Output/EnglishTok.nbin"; * MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/ // test detokenization /*var tokens = new List<string>() {"do", "n't", "commit"}; * var detokenizer = new DictionaryDetokenizer(); * var result = detokenizer.Detokenize(tokens.ToArray()); * Console.WriteLine(result);*/ /*// train model file * var inputFilePath = currentDirectory + "Input/sentences.train"; * var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin"; * var iterations = 100; * var cut = 5; * var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner(); * Console.WriteLine("Training model..."); * var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner); * Console.WriteLine("Writing output file '{0}'...", outputFilePath); * new BinaryGisModelWriter().Persist(model, outputFilePath); * Console.WriteLine("Output file written.");*/ /*// tokenize tests * var modelPath = currentDirectory + "../Resources/Models/"; * var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); * * var input = "It was built of a bright brick throughout; its skyline was fantastic, and even its ground plan was wild."; * var tokens = tokenizer.Tokenize(input); * Console.WriteLine(string.Join(" | ", tokens));*/ // detect tokenization issues /*var pathToFile = currentDirectory + "Input/tokenizerIssues.txt"; * var modelPath = currentDirectory + "../Resources/Models/"; * var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); * var allLines = File.ReadAllLines(pathToFile); * foreach (var line in allLines) * { * var tokens = tokenizer.Tokenize(line); * Console.WriteLine(string.Join(" | ", tokens)); * }*/ // parsing var sentence = "This is a generic bank response, which indicates simply that they are not willing to accept the transaction."; var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin"); var tokens = tokenizer.Tokenize(sentence); var modelPath = currentDirectory + "../Resources/Models/"; var parser = new OpenNLP.Tools.Parser.EnglishTreebankParser(modelPath, true, false); var parse = parser.DoParse(tokens); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.GrammaticalStructureFactory(); var tree = new ParseTree(parse); Console.WriteLine(tree); var gs = gsf.NewGrammaticalStructure(tree); var dependencies = gs.TypedDependencies(); foreach (var dep in dependencies) { Console.WriteLine(dep); } Console.WriteLine("==========="); Console.WriteLine("OK"); Console.ReadKey(); }