Exemplo n.º 1
0
        private OpenNLP.Tools.Parser.Parse ParseSentence(string sentence)
        {
            if (mParser == null)
            {
                mParser = new OpenNLP.Tools.Parser.EnglishTreebankParser(mModelPath, true, false);
            }

            return(mParser.DoParse(sentence));
        }
Exemplo n.º 2
0
        public static OpenNLP.Tools.Parser.Parse ParseSentence(string sentence)
        {
            if (mParser == null)
            {
                mParser = new OpenNLP.Tools.Parser.EnglishTreebankParser(mModelPath, true, false);
            }

            return mParser.DoParse(sentence);
        }
Exemplo n.º 3
0
        private static void Main(string[] args)
        {
            /*FileStream ostrm;
             * StreamWriter writer;
             * TextWriter oldOut = Console.Out;
             * try
             * {
             *  ostrm = new FileStream("C:\\Users\\Alexandre\\Desktop\\vs_output_2.txt", FileMode.OpenOrCreate, FileAccess.Write);
             *  writer = new StreamWriter(ostrm);
             * }
             * catch (Exception e)
             * {
             *  Console.WriteLine("Cannot open Redirect.txt for writing");
             *  Console.WriteLine(e.Message);
             *  return;
             * }
             * Console.SetOut(writer);*/


            /*// read file
             * var tokenizerTrainingFilePath = currentDirectory + "Input/tokenizer.train";
             * var outputFilePath = currentDirectory + "Output/EnglishTok.nbin";
             * MaximumEntropyTokenizer.Train(tokenizerTrainingFilePath, outputFilePath);*/

            // test detokenization

            /*var tokens = new List<string>() {"do", "n't", "commit"};
             * var detokenizer = new DictionaryDetokenizer();
             * var result = detokenizer.Detokenize(tokens.ToArray());
             * Console.WriteLine(result);*/

            /*// train model file
             * var inputFilePath = currentDirectory + "Input/sentences.train";
             * var outputFilePath = currentDirectory + "Output/" + Path.GetFileNameWithoutExtension(inputFilePath) + ".nbin";
             * var iterations = 100;
             * var cut = 5;
             * var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner();
             * Console.WriteLine("Training model...");
             * var model = MaximumEntropySentenceDetector.TrainModel(inputFilePath, iterations, cut, endOfSentenceScanner);
             * Console.WriteLine("Writing output file '{0}'...", outputFilePath);
             * new BinaryGisModelWriter().Persist(model, outputFilePath);
             * Console.WriteLine("Output file written.");*/

            /*// tokenize tests
             * var modelPath = currentDirectory + "../Resources/Models/";
             * var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");
             *
             * var input = "It was built of a bright brick throughout; its skyline was fantastic, and even its ground plan was wild.";
             * var tokens = tokenizer.Tokenize(input);
             * Console.WriteLine(string.Join(" | ", tokens));*/


            // detect tokenization issues

            /*var pathToFile = currentDirectory + "Input/tokenizerIssues.txt";
             * var modelPath = currentDirectory + "../Resources/Models/";
             * var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");
             * var allLines = File.ReadAllLines(pathToFile);
             * foreach (var line in allLines)
             * {
             *  var tokens = tokenizer.Tokenize(line);
             *  Console.WriteLine(string.Join(" | ", tokens));
             * }*/

            // parsing
            var sentence  = "This is a generic bank response, which indicates simply that they are not willing to accept the transaction.";
            var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin");
            var tokens    = tokenizer.Tokenize(sentence);
            var modelPath = currentDirectory + "../Resources/Models/";
            var parser    = new OpenNLP.Tools.Parser.EnglishTreebankParser(modelPath, true, false);
            var parse     = parser.DoParse(tokens);
            // Extract dependencies from lexical tree
            var tlp  = new PennTreebankLanguagePack();
            var gsf  = tlp.GrammaticalStructureFactory();
            var tree = new ParseTree(parse);

            Console.WriteLine(tree);
            var gs           = gsf.NewGrammaticalStructure(tree);
            var dependencies = gs.TypedDependencies();

            foreach (var dep in dependencies)
            {
                Console.WriteLine(dep);
            }

            Console.WriteLine("===========");
            Console.WriteLine("OK");
            Console.ReadKey();
        }