public string[] TokenizeSentence(string sentence) { if (mTokenizer == null) { mTokenizer = new EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin"); } return mTokenizer.Tokenize(sentence); }
public string[] TokenizeSentence(string sentence) { if (mTokenizer == null) { mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin"); } return(mTokenizer.Tokenize(sentence)); }
private void TestDechunk() { // detokenize var inputs = new string[] { "- Harry's your sister. - Look, what exactly am I supposed to be doing here?", "\"Piss off!\"", "- Sorry Mrs. Hudson, I'll skip the tea. Off out. - Both of you?", "I love playing half-life; that's just who I am!", "That's why I... have just begun to write a book.", "And they lived happily ever after...", "It's gonna be $1.5 sir." }; // var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin"); var chunker = new EnglishTreebankChunker(currentDirectory + "../Resources/Models/EnglishChunk.nbin"); var dechunker = new RegexDictionaryDechunker(); var detokienizer = new DictionaryDetokenizer(); var englishPosPath = currentDirectory + "../Resources/Models/EnglishPOS.nbin"; var tagDictPath = currentDirectory + "../Resources/Models/Parser/tagdict"; var posTagger = new EnglishMaximumEntropyPosTagger(englishPosPath, tagDictPath); foreach (var input in inputs) { string[] tokens = tokenizer.Tokenize(input); string[] tags = posTagger.Tag(tokens); var chunks = chunker.GetChunks(tokens, tags); var chunksStrings = chunks .Select(ch => detokienizer.Detokenize(ch.TaggedWords.Select(tw => tw.Word).ToArray())) .ToArray(); var output = dechunker.Dechunk(chunksStrings); Console.WriteLine("input: " + input); Console.WriteLine("chunks: " + string.Join(" | ", chunks)); Console.WriteLine("ouput: " + output); Console.WriteLine("--"); } }
static void Main(string[] args) { /*// read file var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train"; var outputFilePath = currentDirectory + "Output/EnglishTok.nbin"; MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/ // test detokenization /*var tokens = new List<string>() {"do", "n't", "commit"}; var detokenizer = new DictionaryDetokenizer(); var result = detokenizer.Detokenize(tokens.ToArray()); Console.WriteLine(result);*/ /*// train model file var inputFilePath = currentDirectory + "Input/sentences.train"; var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin"; var iterations = 100; var cut = 5; var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner(); Console.WriteLine("Training model..."); var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner); Console.WriteLine("Writing output file '{0}'...", outputFilePath); new BinaryGisModelWriter().Persist(model, outputFilePath); Console.WriteLine("Output file written.");*/ // tokenize tests var modelPath = currentDirectory + "../Resources/Models/"; var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); var input = "It was built of a bright brick throughout; its skyline was fantastic, and even its ground plan was wild."; var tokens = tokenizer.Tokenize(input); Console.WriteLine(string.Join(" | ", tokens)); Console.WriteLine("OK"); Console.ReadKey(); }
private static void Main(string[] args) { /*FileStream ostrm; StreamWriter writer; TextWriter oldOut = Console.Out; try { ostrm = new FileStream("C:\\Users\\Alexandre\\Desktop\\vs_output_2.txt", FileMode.OpenOrCreate, FileAccess.Write); writer = new StreamWriter(ostrm); } catch (Exception e) { Console.WriteLine("Cannot open Redirect.txt for writing"); Console.WriteLine(e.Message); return; } Console.SetOut(writer);*/ /*// read file var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train"; var outputFilePath = currentDirectory + "Output/EnglishTok.nbin"; MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/ // test detokenization /*var tokens = new List<string>() {"do", "n't", "commit"}; var detokenizer = new DictionaryDetokenizer(); var result = detokenizer.Detokenize(tokens.ToArray()); Console.WriteLine(result);*/ /*// train model file var inputFilePath = currentDirectory + "Input/sentences.train"; var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin"; var iterations = 100; var cut = 5; var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner(); Console.WriteLine("Training model..."); var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner); Console.WriteLine("Writing output file '{0}'...", outputFilePath); new BinaryGisModelWriter().Persist(model, outputFilePath); Console.WriteLine("Output file written.");*/ // detect tokenization issues /*var pathToFile = currentDirectory + "Input/tokenizerIssues.txt"; var modelPath = currentDirectory + "../Resources/Models/"; var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); var allLines = File.ReadAllLines(pathToFile); foreach (var line in allLines) { var tokens = tokenizer.Tokenize(line); Console.WriteLine(string.Join(" | ", tokens)); }*/ // parsing //var sentence = "If she wakes up, and she's a little more hurt, or she's a little more concerned that she might not make it out of there in one piece, and I had something to pick a lock with..."; //var sentence = "that you can do what you set out to do"; //var sentence = "This is a test."; var sentence = "The kids actually get involved in mapping out where the community center should be"; var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin"); var tokens = tokenizer.Tokenize(sentence); var modelPath = currentDirectory + "../Resources/Models/"; var parser = new EnglishTreebankParser(modelPath, true, false); var parse = parser.DoParse(tokens); // Extract dependencies from lexical tree var tlp = new PennTreebankLanguagePack(); var gsf = tlp.GrammaticalStructureFactory(); var tree = new ParseTree(parse); Console.WriteLine(tree); var gs = gsf.NewGrammaticalStructure(tree); var dependencies = gs.TypedDependencies(); foreach (var dep in dependencies) { Console.WriteLine(dep); } Console.WriteLine("==========="); Console.WriteLine("OK"); Console.ReadKey(); }
/// <summary> /// Create a parser and default goals. /// </summary> /// <param name="brain">The brain</param> /// <param name="robot">The robot</param> public override void Init(Brain.CBrain brain, IRobot robot) { this.brain = brain; this.robot = robot; // Load external NLP systems in their own threads restorerLoaded = new AutoResetEvent(false); parserLoaded = new AutoResetEvent(false); Thread parserLoader = new Thread(InitParser); Thread restorerLoader = new Thread(InitRestorer); parserLoader.Start(); restorerLoader.Start(); // Load up other systems in the meantime this.tokenizer = new EnglishMaximumEntropyTokenizer(sharpNLPPath + "EnglishTok.nbin"); this.tagger = new EnglishMaximumEntropyPosTagger(sharpNLPPath + "EnglishPOS.nbin", sharpNLPPath + @"\Parser\tagdict"); // Make default goals GoalBuilder gotoGoal = new GoalBuilder("GotoX", brain); int ruleIndex = gotoGoal.AddRule("GotoX"); gotoGoal.AddAndAntecedent(ruleIndex, "CurrentDestination Arg *=*;1"); gotoGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"Going to the $CurrentDestination_\""); gotoGoal.AddConsequent(ruleIndex, "Execs", "Motion GoTo $CurrentDestination close"); gotoGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"Commander, I finished moving to the $CurrentDestination_\""); gotoGoal.AddConsequent(ruleIndex, "Remove", "CurrentDestination Arg"); gotoGoal.AddConsequent(ruleIndex, "Quit", ""); gotoGoal.Commit(); failureGoal = new GoalBuilder("DidNotUnderstand", brain); ruleIndex = failureGoal.AddRule("DidNotUnderstand"); failureGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"I didn't understand what you said.\""); failureGoal.AddConsequent(ruleIndex, "Quit", ""); failureGoal.Commit(); GoalBuilder goingToGoal = new GoalBuilder("SayGoingToX", brain); ruleIndex = goingToGoal.AddRule("SayGoingToX"); goingToGoal.AddAndAntecedent(ruleIndex, "CurrentDestination Arg *=*;1"); goingToGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"I'm Going to the $CurrentDestination_\""); goingToGoal.AddConsequent(ruleIndex, "Quit", ""); goingToGoal.Commit(); GoalBuilder goingNowhereGoal = new GoalBuilder("SayGoingNowhere", brain); ruleIndex = goingNowhereGoal.AddRule("SayGoingNowhere"); goingNowhereGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"I'm not going anywhere right now.\""); goingNowhereGoal.AddConsequent(ruleIndex, "Quit", ""); goingNowhereGoal.Commit(); GoalBuilder dontKnowGoal = new GoalBuilder("DontKnowHowToX", brain); ruleIndex = dontKnowGoal.AddRule("DontKnowHowToX"); dontKnowGoal.AddAndAntecedent(ruleIndex, "DontKnow Arg *=*;1"); dontKnowGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"Sorry, but I don't know how to $DontKnow_\""); dontKnowGoal.AddConsequent(ruleIndex, "Remove", "DontKnow Arg"); dontKnowGoal.AddConsequent(ruleIndex, "Quit", ""); dontKnowGoal.Commit(); // Wait for all systems to finish loading restorerLoaded.WaitOne(); parserLoaded.WaitOne(); this.semantics = new SemanticsInterface(); }
private void initComponents() { sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(ModelDir, "EnglishSD.nbin")); tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(ModelDir, "EnglishTok.nbin")); posTagger = new EnglishMaximumEntropyPosTagger(Path.Combine(ModelDir, "EnglishPOS.nbin")); chunker = new EnglishTreebankChunker(Path.Combine(ModelDir, "EnglishChunk.nbin")); parser = new EnglishTreebankParser(FileUtils.WithSeparator(ModelDir), true, false); }