public RSTDocument ReadDocumentContent(string str, string name) { RSTDocument result = new RSTDocument(); result.name = name; var documentxml = XElement.Parse(str); var tokensquery = from c in documentxml.Elements("tokens").Elements("token") select c; foreach (var token in tokensquery) { var tk = new Common.Token(); tk.name = int.Parse(token.Attribute("id").Value); tk.word = token.Attribute("word").Value; tk.lemma = token.Attribute("lemma").Value; tk.eduid = int.Parse(token.Attribute("eduidx").Value); tk.sentence = int.Parse(token.Attribute("sidx").Value) + 1; //here from 0 stanford from 1 result.Tokens.Add(tk); } var treebankstr = (from c in documentxml.Elements("rstview") select c).First().Value; var input = new java.io.StringReader(treebankstr); var treeReader = new edu.stanford.nlp.trees.PennTreeReader(input); result.root = new RSTNode(); result.root.Load(treeReader.readTree(), result.Tokens); return(result); }
/// <summary> /// Parse the document searching for sentences where the entity found. /// Returns a csv line with the file, the entity the sentence and the sintax analisis of the sentences /// </summary> /// <param name="text">Document text</param> /// <param name="entity">Entity.</param> /// <param name="origFile">Original file.</param> public static List <string[]> Parse(string text, string entity, string origFile, string language) { var results = new List <string[]>(); //Load spanish models. var modelsDirectory = StanfordEnv.PARSER_MODELS; var lexparserDirectory = modelsDirectory + StanfordEnv.GetParserLanguageFiles(language); var lp = LexicalizedParser.loadModel(lexparserDirectory); string[] splittedText = SplitText(text); List <string> entityLines = GetEntitiesLines(splittedText, entity); foreach (var line in entityLines) { //Parser sentence. var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(line); var rawWords2 = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree2 = lp.apply(rawWords2); results.Add(new string[] { origFile, entity, line, tree2.ToString() }); } return(results); }
public static string MediaWikiToXHTML(string markup) { string retVal = null; using (XhtmlPrinter printer = new XhtmlPrinter()) { java.io.Reader rdr = new java.io.StringReader(markup); // org.wikimodel.wem.WikiPrinter wp = new org.wikimodel.wem.WikiPrinter (); // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(wp); // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(printer); org.wikimodel.wem.IWemListener listener = new org.wikimodel.wem.xhtml.PrintListener(printer); org.wikimodel.wem.mediawiki.MediaWikiParser mep = new org.wikimodel.wem.mediawiki.MediaWikiParser(); mep.parse(rdr, listener); retVal = printer.Text; rdr.close(); rdr = null; listener = null; mep = null; } // End Using printer return(retVal); }
public void Load(string Path) { var reader = XElement.Load(Path); var tokensquery = from c in reader.Elements("tokens").Elements("token") select c; var tokens = new List <RSTWord>(); foreach (var item in tokensquery) { var tk = new RSTWord(); tk.id = int.Parse(item.Attribute("id").Value); tk.Text = item.Attribute("word").Value; tk.eduid = int.Parse(item.Attribute("eduidx").Value); tk.sentenceid = int.Parse(item.Attribute("sidx").Value); tokens.Add(tk); } var query = from c in reader.Elements("rsttree").Elements("node") select c; var treebankstr = (from c in reader.Elements("rstview") select c).First().Value; var input = new java.io.StringReader(treebankstr); var treeReader = new edu.stanford.nlp.trees.PennTreeReader(input); this.Root = new RSTNode(); Root.Load(treeReader.readTree(), tokens); }
public static J48 ByARFF(string ARFF, string count) { java.io.StringReader ArffReader = new java.io.StringReader(ARFF); ArffLoader.ArffReader ARFFData = new ArffLoader.ArffReader(ArffReader, Convert.ToInt32(count), true); Instances structure = ARFFData.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); Instance inst; while ((inst = ARFFData.readInstance(structure)) != null) { structure.add(inst); } //Instances data = ARFFData.getData(); //J48 cls = new J48(); //cls.buildClassifier(data); //String[] options = new String[1]; //options[0] = "-U"; // unpruned tree J48 tree = new J48(); // new instance of tree //tree.setOptions(options); // set the options tree.buildClassifier(structure); // build classifier return(tree); }
public bool Run() { string code = editor.Document.TextContent; // check if we already have compiled this code before int hashCode = code.GetHashCode(); if (database != null && this.hashCode == hashCode) { return(false); } this.hashCode = hashCode; java.io.StringReader reader = new java.io.StringReader(code); try { java.io.StringWriter writer = new java.io.StringWriter(); try { java.io.PrintWriter log = new java.io.PrintWriter(writer); try { Database db; db = JPortal.run(name, reader, log); if (db != null) { database = db; } } finally { log.flush(); log.close(); } string result = writer.ToString(); char[] sep = { '\n' }; string[] lines = result.Split(sep); for (int i = 0; i < lines.Length; i++) { string line = lines[i].Trim(); if (line.Length == 0) { continue; } form.LogInfo = line; } } finally { writer.close(); } } finally { reader.close(); } return(database != null); }
public static List <string> Go(string input) { java.io.Reader reader = new java.io.StringReader(input); DocumentPreprocessor dp = new DocumentPreprocessor(reader); dp.setTokenizerFactory(TokenizerFactory); List <string> output = new List <string>(); foreach (java.util.List sentence in dp) { output.Add(StringUtils.joinWithOriginalWhiteSpace(sentence)); } return(output); }
public void CopyTo(Document document) { foreach (var fieldEntry in _fieldMap) { var fieldName = fieldEntry.Key; var field = fieldEntry.Value; if (field.HasBoosts()) { var reader = new java.io.StringReader(field.GetText()); var tokenStream = _analyzer.tokenStream(fieldName, reader); document.add(new Field(fieldName, new BoostingTokenFilter(tokenStream, field.GetStartOffsets(), field.GetBoosts()))); } else { document.add(new Field(fieldName, field.GetText(), Field.Store.NO, Field.Index.ANALYZED)); } } }
public static List <string> ExtractNounsFromSemantics(string sentence) { string assemblyPath = Assembly.GetExecutingAssembly().GetName().CodeBase; string projectPath = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName; string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models"); // Loading english PCFG parser from file LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz"); // This shows loading and using an explicit tokenizer var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); var sent2Reader = new java.io.StringReader(sentence); var rawWords = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); sent2Reader.close(); var tree = lp.apply(rawWords); return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList()); }
public static J48 ByHeaderAndData(string header, string dataARFF) { java.io.StringReader ArffReader = new java.io.StringReader(header); ArffLoader.ArffReader ARFFData = new ArffLoader.ArffReader(ArffReader, 100, false); Instances structure = ARFFData.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); ArffReader = new java.io.StringReader(dataARFF); Instances data = ARFFData.getData(); Instance inst; while ((inst = ARFFData.readInstance(data)) != null) { structure.add(inst); } J48 tree = new J48(); // new instance of tree tree.buildClassifier(structure); // build classifier return(tree); }
static void Main(string[] args) { var tf = new edu.stanford.nlp.trees.LabeledScoredTreeFactory(new CustomStringLabelFactory()); var str = "(x2 / score :null_edge(x1 / null_tag) :null_edge(x3 / null_tag) :time(xap0 / before :quant(x5 / temporal - quantity :unit(y / year) :null_edge(x4 / null_tag))))"; var input = new java.io.StringReader(str); var treeReader = new edu.stanford.nlp.trees.PennTreeReader(input, tf, new CustomTreeNormalizer(), new CustomTokenizerAdapter(input)); var t = treeReader.readTree(); TreePrint p = new TreePrint("penn"); p.printTree(t); //READ RST INFORMATION RSTTree tree = new RSTTree("lincon"); tree.Load(Path.Combine(Root, "rst.xml")); tree.EvaluateODonell(); var sum = tree.Summarize(); //READ AMR INFORMATION FOR EACH EDU AND ASSOCIATTE THE ODONELL SCORE IGraph g = new Graph(); var parser = new VDS.RDF.Parsing.RdfXmlParser(); // NTriplesParser ntparser = new NTriplesParser(); parser.Load(g, Path.Combine(Root, "output.xml")); var document = new AMRDocument(); document.Load(g); foreach (var item in document.EDUSentences) { item.ApplyRSTWeight(sum.Where(c => c.edu == item.Id).Select(c => c.Weight).First()); } //var rstdocument = new RSTDocumentRepository(); //rstdocument.DeleteAllNodes(); //rstdocument.Save(tree); AMRNEORepository repo = new AMRNEORepository(); repo.DeleteAllNodes(); repo.SaveDocument(document); //var ids = Helper.ReadIds(g); //foreach (var item in ids) //{ // item.sentence = Helper.GetSentence(g, item); // item.AddNodes(g); // if (item.id == 22) // { // Console.WriteLine(item.urlid); // Console.WriteLine(item.sentence); // Console.WriteLine(item.Root.uriid); // Console.WriteLine(item.Root.Term.uriid); // Console.WriteLine(item.Root.Term.type); // } //} //SparqlQueryParser qparser = new SparqlQueryParser(); ////Then we can parse a SPARQL string into a query //StringBuilder querystr = new StringBuilder(); //querystr.AppendLine("PREFIX amr-core: <http://amr.isi.edu/rdf/core-amr#>"); //querystr.AppendLine("PREFIX amr-data: <http://amr.isi.edu/amr_data#>"); //querystr.AppendLine("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>"); //querystr.AppendLine("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>"); //querystr.AppendLine("PREFIX amr-terms: <http://amr.isi.edu/rdf/amr-terms#>"); ////querystr.AppendLine("SELECT ?p WHERE { ?s rdf:type ?p }"); ////querystr.Append("SELECT ?s ?sentence ?id ?root ?rtype ?amrtype"); //querystr.Append("SELECT ?root ?rtype ?amrtypelbl "); //querystr.Append("WHERE {"); //querystr.Append("?s amr-core:has-sentence ?sentence."); //querystr.Append("?s amr-core:has-id ?id."); //querystr.Append("?s amr-core:root ?root. "); //querystr.Append("?root rdf:type ?rtype. "); //querystr.Append("?rtype rdf:type ?amrtype. "); //querystr.Append("?amrtype rdfs:label ?amrtypelbl. "); //querystr.Append("}"); //SparqlQuery q = qparser.ParseFromString(querystr.ToString()); ////http://amr.isi.edu/rdf/core-amr#has-id //var rset = (SparqlResultSet)g.ExecuteQuery(q); //var SB = new StringBuilder(); //if (rset.Result && rset.Results.Count > 0) //{ // foreach (var result in rset.Results) // { // foreach (var r in result) // { // Console.WriteLine(r.Key + " " + r.Value); // } // //Do what you want with each result // } //} //File.WriteAllText("dic.txt", SB.ToString()); //http://amr.isi.edu/amr_data/22#root01 //foreach (var item in g.Triples) //{ // Console.WriteLine(item.Subject); //} //foreach (var node in g.Nodes) //{ // Console.WriteLine(node.ToString()); //} //g.SaveToFile("output.rdf"); }
public bool AddPOSTag(string strSource, out string[] outToken, out string[] outTag) { if (tagger == null) { InitTagger(); } java.io.StringReader reader = new java.io.StringReader(strSource); List tokens = MaxentTagger.tokenizeText(reader); List<string> lstToken = new List<string>(); List<string> lstTag = new List<string>(); int iLastEndPosition = 0; for (int i = 0; i < tokens.size(); i++) { List sen = (List)tokens.get(i); ArrayList res = tagger.tagSentence(sen); for (int j = 0; j < res.size(); j++) { edu.stanford.nlp.ling.TaggedWord tw = (edu.stanford.nlp.ling.TaggedWord)res.get(j); string wd = tw.word(); string tg = tw.tag(); if (sen.size() == res.size()) { edu.stanford.nlp.ling.Word w = (edu.stanford.nlp.ling.Word) sen.get(j); if (w.beginPosition() > iLastEndPosition) { lstToken.Add(" "); lstTag.Add(""); } iLastEndPosition = w.endPosition(); } lstToken.Add(wd); lstTag.Add(tg); } } outToken = lstToken.ToArray(); ; outTag = lstTag.ToArray(); return true; }
public static string[] Text2Sentence(string strSourceText) { java.io.StringReader reader = new java.io.StringReader(strSourceText); List tokens = MaxentTagger.tokenizeText(reader); string[] outSentences = new string[tokens.size()]; for (int i = 0; i < tokens.size(); i++) { List res = (List)tokens.get(i); StringBuilder sb = new StringBuilder(); int iLastEndingPosition = 0; for (int j = 0; j < res.size(); j++) { edu.stanford.nlp.ling.Word w = (edu.stanford.nlp.ling.Word)res.get(j); string wd = w.word(); if (w.beginPosition() > iLastEndingPosition) { sb.Append(" "); } sb.Append(wd); iLastEndingPosition = w.endPosition(); } outSentences[i] = sb.ToString().Trim(); } return outSentences; }