public static void DemoAPI(LexicalizedParser lp) { // This option shows parsing a list of correctly tokenized words var sent = new[] { "This", "is", "an", "easy", "sentence", "." }; java.util.List rawWords = Sentence.toCoreLabelList(sent); Tree parse = lp.apply(rawWords); parse.pennPrint(); // This option shows loading and using an explicit tokenizer const string Sent2 = "This is another sentence."; TokenizerFactory tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new StringReader(Sent2); java.util.List rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); parse = lp.apply(rawWords2); var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(); Console.WriteLine("\n{0}\n", tdl); var tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); }
public NlpService() { string parserFileOrUrl = "englishPCFG.ser.gz"; _lp = LexicalizedParser.loadModel(parserFileOrUrl); if (_lp == null) { throw new InvalidOperationException("couldn't load " + parserFileOrUrl); } _tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); _tlp = new PennTreebankLanguagePack(); _structureFactory = _tlp.grammaticalStructureFactory(); }
public static void DemoDP(LexicalizedParser lp, string fileName) { // This option shows loading and sentence-segment and tokenizing // a file using DocumentPreprocessor var tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); // You could also create a tokenizer here (as below) and pass it // to DocumentPreprocessor foreach (List sentence in new DocumentPreprocessor(fileName)) { Tree parse = lp.apply(sentence); parse.pennPrint(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); java.util.List tdl = gs.typedDependenciesCCprocessed(true); Console.WriteLine("\n{0}\n", tdl); } }
public TargetIdentificator(string word2VecModelPath) { lexParser = LexicalizedParser.loadModel(@"Resources\englishPCFG.ser.gz"); grammaticalStructureFactory = new PennTreebankLanguagePack().grammaticalStructureFactory(); lemmatizer = IStemmer.GetLemmatizer(); wordNetEngine = new WordNetEngine(); wordNetEngine.LoadFromDirectory(@"Resources\WordNet"); word2VecVocabulary = new Word2VecBinaryReader().Read(word2VecModelPath); MLContext mlContext = new MLContext(); ITransformer mlModel = mlContext.Model.Load("MLModel.zip", out _); predictionEngine = mlContext.Model.CreatePredictionEngine <ModelInput, ModelOutput>(mlModel); wiki = new Wiki("NLP/1.0", "https://en.wikipedia.org", "https://en.wikipedia.org/w/api.php"); caching = new CachingFile(); }