public void LexicalizedParserTest()
        {
            // GZIPed model in the file
            var model = Files.Parser.Models("lexparser/englishPCFG.ser.gz");

            using var fs  = new FileStream(model, FileMode.Open);
            using var isw = new ikvm.io.InputStreamWrapper(fs);

            using var ois =
                      model.EndsWith(".gz")
                    ? new ObjectInputStream(new GZIPInputStream(isw))
                    : new ObjectInputStream(isw);

            var lp = LexicalizedParser.loadModel(ois);

            Assert.NotNull(lp);
        }
Example #2
0
        public static List <string> ExtractNounsFromSemantics(string sentence)
        {
            string assemblyPath    = Assembly.GetExecutingAssembly().GetName().CodeBase;
            string projectPath     = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName;
            string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models");

            // Loading english PCFG parser from file
            LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This shows loading and using an explicit tokenizer
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sentence);
            var rawWords         = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree = lp.apply(rawWords);

            return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList());
        }
Example #3
0
        public TargetIdentificator(string word2VecModelPath)
        {
            lexParser = LexicalizedParser.loadModel(@"Resources\englishPCFG.ser.gz");
            grammaticalStructureFactory = new PennTreebankLanguagePack().grammaticalStructureFactory();

            lemmatizer = IStemmer.GetLemmatizer();

            wordNetEngine = new WordNetEngine();
            wordNetEngine.LoadFromDirectory(@"Resources\WordNet");

            word2VecVocabulary = new Word2VecBinaryReader().Read(word2VecModelPath);

            MLContext    mlContext = new MLContext();
            ITransformer mlModel   = mlContext.Model.Load("MLModel.zip", out _);

            predictionEngine = mlContext.Model.CreatePredictionEngine <ModelInput, ModelOutput>(mlModel);

            wiki = new Wiki("NLP/1.0", "https://en.wikipedia.org", "https://en.wikipedia.org/w/api.php");

            caching = new CachingFile();
        }
        public static void Start(string model, string fileName)
        {
            var grammar = (!String.IsNullOrEmpty(model)) ? model : Program.ParserModel;
            var options = new[] { "-maxLength", "80", "-retainTmpSubcategories" };
            var lp      = LexicalizedParser.loadModel(grammar, options);
            var tlp     = new PennTreebankLanguagePack();
            var gsf     = tlp.grammaticalStructureFactory();

            var sentences = new List <ArrayList>();

            if (!string.IsNullOrEmpty(fileName))
            {
                sentences.AddRange(new DocumentPreprocessor(fileName).Cast <ArrayList>());
            }
            else
            {
                var sent    = new[] { "This", "is", "an", "easy", "sentence", "." };
                var arrList = new ArrayList();
                foreach (var s in sent)
                {
                    arrList.Add(new Word(s));
                }
                sentences.Add(arrList);

                const string Sent2 = "This is a slightly longer and more complex sentence requiring tokenization.";
                var          toke  = tlp.getTokenizerFactory().getTokenizer(new StringReader(Sent2));
                sentences.Add((ArrayList)toke.tokenize());
            }

            foreach (var sentence in sentences)
            {
                var parse = lp.apply(sentence);
                parse.pennPrint();
                System.Console.WriteLine("\n{0}\n", (parse.taggedYield()));

                var gs  = gsf.newGrammaticalStructure(parse);
                var tdl = gs.typedDependenciesCCprocessed(true);
                System.Console.WriteLine("{0}\n", tdl);
            }
        }
Example #5
0
        static void Main()
        {
            // Path to models extracted from `stanford-parser-3.6.0-models.jar`
            var jarRoot         = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2015-12-09\models\";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This sample shows parsing a list of correctly tokenized words
            var sent     = new[] { "This", "is", "an", "easy", "sentence", "." };
            var rawWords = Sentence.toCoreLabelList(sent);
            var tree     = lp.apply(rawWords);

            tree.pennPrint();

            // This option shows loading and using an explicit tokenizer
            var sent2            = "This is another sentence.";
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new StringReader(sent2);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree
            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();

            Console.WriteLine("\n{0}\n", tdl);

            // Extract collapsed dependencies from parsed tree
            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);
        }
Example #6
0
        //set up environment
        //set up environment
        public void Init()
        {
            // Path to models extracted from `stanford-parser-3.9.1-models.jar`
            jarRoot         = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "..\\..\\stanford-corenlp-full-2018-02-27");
            modelsDirectory = jarRoot + "\\edu\\stanford\\nlp\\models";

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            exeDir = Environment.CurrentDirectory;

            //load json to create dictionary
            try
            {
                string         json       = System.IO.File.ReadAllText(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) + "\\..\\..\\verbs-dictionaries.json");
                JsonSerializer serializer = new JsonSerializer();
                JArray         values     = JsonConvert.DeserializeObject <JArray>(json);
                foreach (var item in values.Children())
                {
                    dict.Add((string)item[0], (string)item[3]);
                }
            }
            catch (Exception exp)
            {
                Console.WriteLine("Fail loading verb dictionary:\n {0}", exp);
            }

            // Loading english PCFG parser from file
            try
            {
                lp = LexicalizedParser.loadModel(modelsDirectory + "\\lexparser\\englishPCFG.ser.gz");
            }
            catch (Exception exp)
            {
                Console.WriteLine("Fail loading parser model:\n{0}", exp);
            }

            Console.WriteLine("\nParser successfully loaded!\n");
        }
        public void SetUp()
        {
            var modelPath = Files.Parser.Models("lexparser/englishPCFG.ser.gz");

            _lp = LexicalizedParser.loadModel(modelPath);
        }
Example #8
0
        public void SentenceParser(string sent2)
        {
            var modelsDirectory = jarRoot + @"edu\stanford\nlp\models";

            // Loading english PCFG parser from file
            var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This option shows loading and using an explicit tokenizer
            sent2.ToLower();
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sent2);
            var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree2 = lp.apply(rawWords2);

            // Extract dependencies from lexical tree

            var tlp = new PennTreebankLanguagePack();
            var gsf = tlp.grammaticalStructureFactory();
            var gs  = gsf.newGrammaticalStructure(tree2);
            var tdl = gs.typedDependenciesCCprocessed();
            //Console.WriteLine("\n{0}\n", tdl);


            // Extract collapsed dependencies from parsed tree

            var tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.printTree(tree2);



            ArrayList dep = gs.typedDependenciesCollapsed() as ArrayList;

            foreach (TypedDependency td in dep)
            {
                for (int i = 0; i < keyword.Length; i++)
                {
                    if (td.dep().originalText().Equals(keyword[i]))
                    {
                        keyFlag = true;
                        key     = keyword[i];
                        break;
                    }
                }
                if (keyFlag)
                {
                    break;
                }
            }

            keyFlag = false;


            switch (key)
            {
            case "circle":

                Circle circle = new Circle();
                shape     = circle.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "rectangle":

                Rectangle rect = new Rectangle();
                shape     = rect.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "triangle":

                Triangle tri = new Triangle();
                shape     = tri.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            case "square":

                Square square = new Square();
                shape     = square.GetProps();
                propsUsed = Associator(shape, dep);

                break;

            default:

                break;
            } //End of Switch

            dependency = tdl.ToString();
        } //End of SentenceParser
Example #9
0
        // Constructors and finalizers:
        private Repository()
        {
            _assemblyName = Regex.Match(_assemblyFullName, "^(.*?),.*$").Result("$1");

            _rootDrive = ("/usr/project/xtmp/dp195/Poetix18/").Replace(@"\", Dsc);
            _nlpFolder = ("rhetorica/nlp/").Replace(@"\", Dsc);

            _openNlpModelsFolder = ("OpenNLP/models/").Replace(@"\", Dsc);
            _openNlpModelsPath   = RootDrive + _nlpFolder + _openNlpModelsFolder;

            _wordNetFolder = ("WordNet_3/").Replace(@"\", Dsc);
            _wordNetPath   = RootDrive + _nlpFolder + _wordNetFolder;

            _grammarFolder = ("StanfordParser/grammar/").Replace(@"\", Dsc);
            _grammarPath   = RootDrive + _nlpFolder + _grammarFolder;

            _dataFolder   = ("data/").Replace(@"\", Dsc);
            _nlpTextsPath = RootDrive + _dataFolder;

            string[] localTextDirectoryParts =
            {
                CurrentAssemblyDirectoryPath,
                "..",                        "..","..", "data"
                //"..", "..", "text"
            };
            _localTextPath = Path.Combine(localTextDirectoryParts) + "/"; // For development use

            // WordNet engine:
            Console.Write("Loading WordNet engine.... ");
            _wordNetEngine = new WordNetEngine(WordNetPath, true);
            Console.WriteLine("Done.");

            // OpenNLP sentence detector:
            Console.Write("Loading OpenNLP sentence detector.... ");
            java.io.FileInputStream modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-sent.bin");
            _sentenceModel = new SentenceModel(modelInputStream);
            modelInputStream.close();
            _sentenceDetector = new SentenceDetectorME(_sentenceModel);
            Console.WriteLine("Done.");

            // OpenNLP tokenizer:
            Console.Write("Loading OpenNLP tokenizer.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-token.bin");
            _tokenizerModel  = new opennlp.tools.tokenize.TokenizerModel(modelInputStream);
            modelInputStream.close();
            _tokenizer = new opennlp.tools.tokenize.TokenizerME(_tokenizerModel);
            Console.WriteLine("Done.");

            // OpenNLP name finder:
            Console.Write("Loading OpenNLP name finder.... ");
            modelInputStream      = new java.io.FileInputStream(OpenNlpModelsPath + "en-ner-person.bin");
            _tokenNameFinderModel = new TokenNameFinderModel(modelInputStream);
            modelInputStream.close();
            _nameFinder = new NameFinderME(_tokenNameFinderModel);
            Console.WriteLine("Done.");

            // OpenNLP POS tagger:
            Console.Write("Loading OpenNLP POS tagger.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-pos-maxent.bin");
            _posModel        = new POSModel(modelInputStream);
            modelInputStream.close();
            _tagger = new POSTaggerME(_posModel);
            Console.WriteLine("Done.");

            // OpenNLP chunker:
            Console.Write("Loading OpenNLP chunker.... ");
            modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-chunker.bin");
            _chunkerModel    = new ChunkerModel(modelInputStream);
            modelInputStream.close();
            _chunker = new ChunkerME(_chunkerModel);
            Console.WriteLine("Done.");

            // OpenNLP parser:
            if (_loadParser)
            {
                Console.Write("Loading OpenNLP parser.... ");
                modelInputStream = new java.io.FileInputStream(OpenNlpModelsPath + "en-parser-chunking.bin");
                _parserModel     = new ParserModel(modelInputStream);
                modelInputStream.close();
                _parser = ParserFactory.create(_parserModel);
                Console.WriteLine("Done.");
            }

            // Stanford parser:
            //_stanfordParser = new LexicalizedParser(GrammarPath + "englishPCFG.ser.gz"); // Obsolete method
            _stanfordParser = LexicalizedParser.loadModel(GrammarPath + "englishPCFG.ser.gz");

            // Porter stemmer:
            _porterStemmer = new PorterStemmer();
        }
Example #10
0
    public string Tags(string input)
    {
        // Path to models extracted from `stanford-parser-3.6.0-models.jar`
        var jarRoot         = @"";
        var modelsDirectory = jarRoot;

        var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");


        // This option shows loading and using an explicit tokenizer
        var sent2            = input;
        var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        var sent2Reader      = new java.io.StringReader(sent2);
        var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

        sent2Reader.close();
        var tree2 = lp.apply(rawWords2);

        // Extract dependencies from lexical tree
        var tlp = new PennTreebankLanguagePack();
        var gsf = tlp.grammaticalStructureFactory();
        var gs  = gsf.newGrammaticalStructure(tree2);
        var tdl = gs.typedDependenciesCCprocessed();


        // Extract collapsed dependencies from parsed tree
        var tp = new TreePrint("penn,typedDependenciesCollapsed");

        UnityEngine.Debug.Log(tdl);
        //tp.printTree(tree2);

        for (int i = 0; i < tdl.size(); i++)
        {
            TypedDependency node = (TypedDependency)tdl.get(i);

            string relation = node.reln().getShortName();

            if (relation.Contains("nsubj"))
            {
                IndexedWord act = node.gov();
                //node.dep().getword()
                action = act.value();

                UnityEngine.Debug.Log("This is the action " + action);

                IndexedWord subject = node.dep();
                subj = subject.value();

                UnityEngine.Debug.Log("This is the subject " + subj);
            }

            if (relation.Contains("dobj"))
            {
                IndexedWord act = node.gov();
                //node.dep().getword()
                action = act.value();
                UnityEngine.Debug.Log("This is the action " + action);

                IndexedWord tar = node.dep();
                target = tar.value();
                UnityEngine.Debug.Log("This is the target " + target);
            }

            if (relation.Contains("nmod"))
            {
                IndexedWord tar_two = node.dep();
                second_target = tar_two.value();
                UnityEngine.Debug.Log("This is the target second " + second_target);
            }
        }

        return(tdl.ToString());
    }
 public edu.stanford.nlp.parser.lexparser.LexicalizedParser LoadLexicalizedParser(string jarLexicalizedParserFile)
 {
     return(LexicalizedParser.loadModel(jarLexicalizedParserFile));
 }