public string[] TokenizeSentence(string sentence)
 {
     if (mTokenizer == null)
     {
         mTokenizer = new EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin");
     }
     return mTokenizer.Tokenize(sentence);
 }
Esempio n. 2
0
 public string[] TokenizeSentence(string sentence)
 {
     if (mTokenizer == null)
     {
         mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin");
     }
     return(mTokenizer.Tokenize(sentence));
 }
Esempio n. 3
0
        private void TestDechunk()
        {
            // detokenize
            var inputs = new string[]
            {
                "- Harry's your sister. - Look, what exactly am I supposed to be doing here?",
                "\"Piss off!\"",
                "- Sorry Mrs. Hudson, I'll skip the tea. Off out. - Both of you?",
                "I love playing half-life; that's just who I am!",
                "That's why I... have just begun to write a book.",
                "And they lived happily ever after...",
                "It's gonna be $1.5 sir."
            };

            //
            var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin");
            var chunker = new EnglishTreebankChunker(currentDirectory + "../Resources/Models/EnglishChunk.nbin");
            var dechunker = new RegexDictionaryDechunker();
            var detokienizer = new DictionaryDetokenizer();
            var englishPosPath = currentDirectory + "../Resources/Models/EnglishPOS.nbin";
            var tagDictPath = currentDirectory + "../Resources/Models/Parser/tagdict";
            var posTagger = new EnglishMaximumEntropyPosTagger(englishPosPath, tagDictPath);

            foreach (var input in inputs)
            {
                string[] tokens = tokenizer.Tokenize(input);
                string[] tags = posTagger.Tag(tokens);

                var chunks = chunker.GetChunks(tokens, tags);
                var chunksStrings = chunks
                    .Select(ch => detokienizer.Detokenize(ch.TaggedWords.Select(tw => tw.Word).ToArray()))
                    .ToArray();
                var output = dechunker.Dechunk(chunksStrings);
                Console.WriteLine("input: " + input);
                Console.WriteLine("chunks: " + string.Join(" | ", chunks));
                Console.WriteLine("ouput: " + output);
                Console.WriteLine("--");
            }
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            /*// read file
            var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train";
            var outputFilePath = currentDirectory + "Output/EnglishTok.nbin";
            MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/

            // test detokenization
            /*var tokens = new List<string>() {"do", "n't", "commit"};
            var detokenizer = new DictionaryDetokenizer();
            var result = detokenizer.Detokenize(tokens.ToArray());
            Console.WriteLine(result);*/

            /*// train model file
            var inputFilePath = currentDirectory + "Input/sentences.train";
            var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin";
            var iterations = 100;
            var cut = 5;
            var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner();
            Console.WriteLine("Training model...");
            var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner);
            Console.WriteLine("Writing output file '{0}'...", outputFilePath);
            new BinaryGisModelWriter().Persist(model, outputFilePath);
            Console.WriteLine("Output file written.");*/

            // tokenize tests
            var modelPath = currentDirectory + "../Resources/Models/";
            var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");

            var input = "It was built of a bright brick throughout; its skyline was fantastic, and even its ground plan was wild.";
            var tokens = tokenizer.Tokenize(input);
            Console.WriteLine(string.Join(" | ", tokens));

            Console.WriteLine("OK");
            Console.ReadKey();
        }
Esempio n. 5
0
        private static void Main(string[] args)
        {
            /*FileStream ostrm;
            StreamWriter writer;
            TextWriter oldOut = Console.Out;
            try
            {
                ostrm = new FileStream("C:\\Users\\Alexandre\\Desktop\\vs_output_2.txt", FileMode.OpenOrCreate, FileAccess.Write);
                writer = new StreamWriter(ostrm);
            }
            catch (Exception e)
            {
                Console.WriteLine("Cannot open Redirect.txt for writing");
                Console.WriteLine(e.Message);
                return;
            }
            Console.SetOut(writer);*/

            /*// read file
            var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train";
            var outputFilePath = currentDirectory + "Output/EnglishTok.nbin";
            MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/

            // test detokenization
            /*var tokens = new List<string>() {"do", "n't", "commit"};
            var detokenizer = new DictionaryDetokenizer();
            var result = detokenizer.Detokenize(tokens.ToArray());
            Console.WriteLine(result);*/

            /*// train model file
            var inputFilePath = currentDirectory + "Input/sentences.train";
            var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin";
            var iterations = 100;
            var cut = 5;
            var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner();
            Console.WriteLine("Training model...");
            var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner);
            Console.WriteLine("Writing output file '{0}'...", outputFilePath);
            new BinaryGisModelWriter().Persist(model, outputFilePath);
            Console.WriteLine("Output file written.");*/

            // detect tokenization issues
            /*var pathToFile = currentDirectory + "Input/tokenizerIssues.txt";
            var modelPath = currentDirectory + "../Resources/Models/";
            var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");
            var allLines = File.ReadAllLines(pathToFile);
            foreach (var line in allLines)
            {
                var tokens = tokenizer.Tokenize(line);
                Console.WriteLine(string.Join(" | ", tokens));
            }*/

            // parsing
            //var sentence = "If she wakes up, and she's a little more hurt, or she's a little more concerned that she might not make it out of there in one piece, and I had something to pick a lock with...";
            //var sentence = "that you can do what you set out to do";
            //var sentence = "This is a test.";
            var sentence = "The kids actually get involved in mapping out where the community center should be";
            var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin");
            var tokens = tokenizer.Tokenize(sentence);
            var modelPath = currentDirectory + "../Resources/Models/";
            var parser = new EnglishTreebankParser(modelPath, true, false);
            var parse = parser.DoParse(tokens);
            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.GrammaticalStructureFactory();
            var tree = new ParseTree(parse);
            Console.WriteLine(tree);
            var gs = gsf.NewGrammaticalStructure(tree);
            var dependencies = gs.TypedDependencies();

            foreach (var dep in dependencies)
            {
                Console.WriteLine(dep);
            }

            Console.WriteLine("===========");
            Console.WriteLine("OK");
            Console.ReadKey();
        }
        /// <summary>
        /// Create a parser and default goals.
        /// </summary>
        /// <param name="brain">The brain</param>
        /// <param name="robot">The robot</param>
        public override void Init(Brain.CBrain brain, IRobot robot)
        {
            this.brain = brain;
            this.robot = robot;

            // Load external NLP systems in their own threads
            restorerLoaded = new AutoResetEvent(false);
            parserLoaded = new AutoResetEvent(false);
            Thread parserLoader = new Thread(InitParser);
            Thread restorerLoader = new Thread(InitRestorer);
            parserLoader.Start();
            restorerLoader.Start();

            // Load up other systems in the meantime
            this.tokenizer = new EnglishMaximumEntropyTokenizer(sharpNLPPath + "EnglishTok.nbin");
            this.tagger = new EnglishMaximumEntropyPosTagger(sharpNLPPath + "EnglishPOS.nbin", sharpNLPPath + @"\Parser\tagdict");

            // Make default goals
            GoalBuilder gotoGoal = new GoalBuilder("GotoX", brain);
            int ruleIndex = gotoGoal.AddRule("GotoX");
            gotoGoal.AddAndAntecedent(ruleIndex, "CurrentDestination Arg *=*;1");
            gotoGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"Going to the $CurrentDestination_\"");
            gotoGoal.AddConsequent(ruleIndex, "Execs", "Motion GoTo $CurrentDestination close");
            gotoGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"Commander, I finished moving to the $CurrentDestination_\"");
            gotoGoal.AddConsequent(ruleIndex, "Remove", "CurrentDestination Arg");
            gotoGoal.AddConsequent(ruleIndex, "Quit", "");
            gotoGoal.Commit();

            failureGoal = new GoalBuilder("DidNotUnderstand", brain);
            ruleIndex = failureGoal.AddRule("DidNotUnderstand");
            failureGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"I didn't understand what you said.\"");
            failureGoal.AddConsequent(ruleIndex, "Quit", "");
            failureGoal.Commit();

            GoalBuilder goingToGoal = new GoalBuilder("SayGoingToX", brain);
            ruleIndex = goingToGoal.AddRule("SayGoingToX");
            goingToGoal.AddAndAntecedent(ruleIndex, "CurrentDestination Arg *=*;1");
            goingToGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"I'm Going to the $CurrentDestination_\"");
            goingToGoal.AddConsequent(ruleIndex, "Quit", "");
            goingToGoal.Commit();

            GoalBuilder goingNowhereGoal = new GoalBuilder("SayGoingNowhere", brain);
            ruleIndex = goingNowhereGoal.AddRule("SayGoingNowhere");
            goingNowhereGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"I'm not going anywhere right now.\"");
            goingNowhereGoal.AddConsequent(ruleIndex, "Quit", "");
            goingNowhereGoal.Commit();

            GoalBuilder dontKnowGoal = new GoalBuilder("DontKnowHowToX", brain);
            ruleIndex = dontKnowGoal.AddRule("DontKnowHowToX");
            dontKnowGoal.AddAndAntecedent(ruleIndex, "DontKnow Arg *=*;1");
            dontKnowGoal.AddConsequent(ruleIndex, "Execs", "Voice Say \"Sorry, but I don't know how to $DontKnow_\"");
            dontKnowGoal.AddConsequent(ruleIndex, "Remove", "DontKnow Arg");
            dontKnowGoal.AddConsequent(ruleIndex, "Quit", "");
            dontKnowGoal.Commit();

            // Wait for all systems to finish loading
            restorerLoaded.WaitOne();
            parserLoaded.WaitOne();
            this.semantics = new SemanticsInterface();
        }
Esempio n. 7
0
 private void initComponents()
 {
     sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(ModelDir, "EnglishSD.nbin"));
     tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(ModelDir, "EnglishTok.nbin"));
     posTagger = new EnglishMaximumEntropyPosTagger(Path.Combine(ModelDir, "EnglishPOS.nbin"));
     chunker = new EnglishTreebankChunker(Path.Combine(ModelDir, "EnglishChunk.nbin"));
     parser = new EnglishTreebankParser(FileUtils.WithSeparator(ModelDir), true, false);
 }