/// <summary> /// Parse the document searching for sentences where the entity found. /// Returns a csv line with the file, the entity the sentence and the sintax analisis of the sentences /// </summary> /// <param name="text">Document text</param> /// <param name="entity">Entity.</param> /// <param name="origFile">Original file.</param> public static List <string[]> Parse(string text, string entity, string origFile, string language) { var results = new List <string[]>(); //Load spanish models. var modelsDirectory = StanfordEnv.PARSER_MODELS; var lexparserDirectory = modelsDirectory + StanfordEnv.GetParserLanguageFiles(language); var lp = LexicalizedParser.loadModel(lexparserDirectory); string[] splittedText = SplitText(text); List <string> entityLines = GetEntitiesLines(splittedText, entity); foreach (var line in entityLines) { //Parser sentence. var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(line); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); results.Add(new string[] { origFile, entity, line, tree2.ToString() }); } return(results); }
public static string MediaWikiToXHTML(string markup) { string retVal = null; using (XhtmlPrinter printer = new XhtmlPrinter()) { java.io.Reader rdr = new java.io.StringReader(markup); // org.wikimodel.wem.WikiPrinter wp = new org.wikimodel.wem.WikiPrinter (); // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(wp); // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(printer); org.wikimodel.wem.IWemListener listener = new org.wikimodel.wem.xhtml.PrintListener(printer); org.wikimodel.wem.mediawiki.MediaWikiParser mep = new org.wikimodel.wem.mediawiki.MediaWikiParser(); mep.parse(rdr, listener); retVal = printer.Text; rdr.close(); rdr = null; listener = null; mep = null; } // End Using printer return(retVal); }
public bool Run() { string code = editor.Document.TextContent; // check if we already have compiled this code before int hashCode = code.GetHashCode(); if (database != null && this.hashCode == hashCode) { return(false); } this.hashCode = hashCode; java.io.StringReader reader = new java.io.StringReader(code); try { java.io.StringWriter writer = new java.io.StringWriter(); try { java.io.PrintWriter log = new java.io.PrintWriter(writer); try { Database db; db = JPortal.run(name, reader, log); if (db != null) { database = db; } } finally { log.flush(); log.close(); } string result = writer.ToString(); char[] sep = { '\n' }; string[] lines = result.Split(sep); for (int i = 0; i < lines.Length; i++) { string line = lines[i].Trim(); if (line.Length == 0) { continue; } form.LogInfo = line; } } finally { writer.close(); } } finally { reader.close(); } return(database != null); }
public static List <string> ExtractNounsFromSemantics(string sentence) { string assemblyPath = Assembly.GetExecutingAssembly().GetName().CodeBase; string projectPath = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName; string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models"); // Loading english PCFG parser from file LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This shows loading and using an explicit tokenizer var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sentence); var rawWords = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree = lp.apply(rawWords); return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList()); }