Esempio n. 1
0
        public RSTDocument ReadDocumentContent(string str, string name)
        {
            RSTDocument result = new RSTDocument();

            result.name = name;

            var documentxml = XElement.Parse(str);
            var tokensquery = from c in documentxml.Elements("tokens").Elements("token")
                              select c;

            foreach (var token in tokensquery)
            {
                var tk = new Common.Token();
                tk.name     = int.Parse(token.Attribute("id").Value);
                tk.word     = token.Attribute("word").Value;
                tk.lemma    = token.Attribute("lemma").Value;
                tk.eduid    = int.Parse(token.Attribute("eduidx").Value);
                tk.sentence = int.Parse(token.Attribute("sidx").Value) + 1; //here from 0 stanford from 1
                result.Tokens.Add(tk);
            }


            var treebankstr = (from c in documentxml.Elements("rstview") select c).First().Value;
            var input       = new java.io.StringReader(treebankstr);
            var treeReader  = new edu.stanford.nlp.trees.PennTreeReader(input);

            result.root = new RSTNode();
            result.root.Load(treeReader.readTree(), result.Tokens);

            return(result);
        }
Esempio n. 2
0
        /// <summary>
        /// Parse the document searching for sentences where the entity found.
        /// Returns a csv line with the file, the entity the sentence and the sintax analisis of the sentences
        /// </summary>
        /// <param name="text">Document text</param>
        /// <param name="entity">Entity.</param>
        /// <param name="origFile">Original file.</param>
        public static List <string[]> Parse(string text, string entity, string origFile, string language)
        {
            var results = new List <string[]>();
            //Load spanish models.
            var modelsDirectory    = StanfordEnv.PARSER_MODELS;
            var lexparserDirectory = modelsDirectory + StanfordEnv.GetParserLanguageFiles(language);
            var lp = LexicalizedParser.loadModel(lexparserDirectory);

            string[]      splittedText = SplitText(text);
            List <string> entityLines  = GetEntitiesLines(splittedText, entity);

            foreach (var line in entityLines)
            {
                //Parser sentence.
                var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
                var sent2Reader      = new java.io.StringReader(line);
                var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
                sent2Reader.close();
                var tree2 = lp.apply(rawWords2);

                results.Add(new string[] { origFile, entity, line, tree2.ToString() });
            }

            return(results);
        }
Esempio n. 3
0
        public static string MediaWikiToXHTML(string markup)
        {
            string retVal = null;

            using (XhtmlPrinter printer = new XhtmlPrinter())
            {
                java.io.Reader rdr = new java.io.StringReader(markup);

                // org.wikimodel.wem.WikiPrinter wp = new org.wikimodel.wem.WikiPrinter ();
                // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(wp);

                // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(printer);
                org.wikimodel.wem.IWemListener listener = new org.wikimodel.wem.xhtml.PrintListener(printer);

                org.wikimodel.wem.mediawiki.MediaWikiParser mep =
                    new org.wikimodel.wem.mediawiki.MediaWikiParser();
                mep.parse(rdr, listener);
                retVal = printer.Text;

                rdr.close();
                rdr      = null;
                listener = null;
                mep      = null;
            } // End Using printer

            return(retVal);
        }
Esempio n. 4
0
        public void Load(string Path)
        {
            var reader      = XElement.Load(Path);
            var tokensquery = from c in reader.Elements("tokens").Elements("token")
                              select c;
            var tokens = new List <RSTWord>();

            foreach (var item in tokensquery)
            {
                var tk = new RSTWord();
                tk.id         = int.Parse(item.Attribute("id").Value);
                tk.Text       = item.Attribute("word").Value;
                tk.eduid      = int.Parse(item.Attribute("eduidx").Value);
                tk.sentenceid = int.Parse(item.Attribute("sidx").Value);
                tokens.Add(tk);
            }
            var query = from c in reader.Elements("rsttree").Elements("node")
                        select c;
            var treebankstr = (from c in reader.Elements("rstview") select c).First().Value;
            var input       = new java.io.StringReader(treebankstr);
            var treeReader  = new edu.stanford.nlp.trees.PennTreeReader(input);

            this.Root = new RSTNode();
            Root.Load(treeReader.readTree(), tokens);
        }
Esempio n. 5
0
        public static J48 ByARFF(string ARFF, string count)
        {
            java.io.StringReader  ArffReader = new java.io.StringReader(ARFF);
            ArffLoader.ArffReader ARFFData   = new ArffLoader.ArffReader(ArffReader, Convert.ToInt32(count), true);
            Instances             structure  = ARFFData.getStructure();

            structure.setClassIndex(structure.numAttributes() - 1);
            Instance inst;

            while ((inst = ARFFData.readInstance(structure)) != null)
            {
                structure.add(inst);
            }
            //Instances data = ARFFData.getData();
            //J48 cls = new J48();
            //cls.buildClassifier(data);
            //String[] options = new String[1];
            //options[0] = "-U"; // unpruned tree
            J48 tree = new J48(); // new instance of tree

            //tree.setOptions(options); // set the options
            tree.buildClassifier(structure); // build classifier

            return(tree);
        }
Esempio n. 6
0
        public bool Run()
        {
            string code = editor.Document.TextContent;
            // check if we already have compiled this code before
            int hashCode = code.GetHashCode();

            if (database != null && this.hashCode == hashCode)
            {
                return(false);
            }
            this.hashCode = hashCode;
            java.io.StringReader reader = new java.io.StringReader(code);
            try
            {
                java.io.StringWriter writer = new java.io.StringWriter();
                try
                {
                    java.io.PrintWriter log = new java.io.PrintWriter(writer);
                    try
                    {
                        Database db;
                        db = JPortal.run(name, reader, log);
                        if (db != null)
                        {
                            database = db;
                        }
                    }
                    finally
                    {
                        log.flush();
                        log.close();
                    }
                    string   result = writer.ToString();
                    char[]   sep    = { '\n' };
                    string[] lines  = result.Split(sep);
                    for (int i = 0; i < lines.Length; i++)
                    {
                        string line = lines[i].Trim();
                        if (line.Length == 0)
                        {
                            continue;
                        }
                        form.LogInfo = line;
                    }
                }
                finally
                {
                    writer.close();
                }
            }
            finally
            {
                reader.close();
            }
            return(database != null);
        }
Esempio n. 7
0
        public static List <string> Go(string input)
        {
            java.io.Reader       reader = new java.io.StringReader(input);
            DocumentPreprocessor dp     = new DocumentPreprocessor(reader);

            dp.setTokenizerFactory(TokenizerFactory);

            List <string> output = new List <string>();

            foreach (java.util.List sentence in dp)
            {
                output.Add(StringUtils.joinWithOriginalWhiteSpace(sentence));
            }

            return(output);
        }
Esempio n. 8
0
        public void CopyTo(Document document)
        {
            foreach (var fieldEntry in _fieldMap)
            {
                var fieldName = fieldEntry.Key;
                var field     = fieldEntry.Value;

                if (field.HasBoosts())
                {
                    var reader      = new java.io.StringReader(field.GetText());
                    var tokenStream = _analyzer.tokenStream(fieldName, reader);
                    document.add(new Field(fieldName, new BoostingTokenFilter(tokenStream, field.GetStartOffsets(), field.GetBoosts())));
                }
                else
                {
                    document.add(new Field(fieldName, field.GetText(), Field.Store.NO, Field.Index.ANALYZED));
                }
            }
        }
Esempio n. 9
0
        public static List <string> ExtractNounsFromSemantics(string sentence)
        {
            string assemblyPath    = Assembly.GetExecutingAssembly().GetName().CodeBase;
            string projectPath     = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName;
            string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models");

            // Loading english PCFG parser from file
            LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This shows loading and using an explicit tokenizer
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sentence);
            var rawWords         = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree = lp.apply(rawWords);

            return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList());
        }
Esempio n. 10
0
        public static J48 ByHeaderAndData(string header, string dataARFF)
        {
            java.io.StringReader  ArffReader = new java.io.StringReader(header);
            ArffLoader.ArffReader ARFFData   = new ArffLoader.ArffReader(ArffReader, 100, false);
            Instances             structure  = ARFFData.getStructure();

            structure.setClassIndex(structure.numAttributes() - 1);

            ArffReader = new java.io.StringReader(dataARFF);
            Instances data = ARFFData.getData();

            Instance inst;

            while ((inst = ARFFData.readInstance(data)) != null)
            {
                structure.add(inst);
            }
            J48 tree = new J48();            // new instance of tree

            tree.buildClassifier(structure); // build classifier

            return(tree);
        }
Esempio n. 11
0
        static void Main(string[] args)
        {
            var tf = new edu.stanford.nlp.trees.LabeledScoredTreeFactory(new CustomStringLabelFactory());

            var str   = "(x2 / score :null_edge(x1 / null_tag) :null_edge(x3 / null_tag)	:time(xap0 / before	:quant(x5 / temporal - quantity	:unit(y / year) :null_edge(x4 / null_tag))))";
            var input = new java.io.StringReader(str);

            var treeReader = new edu.stanford.nlp.trees.PennTreeReader(input, tf, new CustomTreeNormalizer(), new CustomTokenizerAdapter(input));

            var t = treeReader.readTree();


            TreePrint p = new TreePrint("penn");

            p.printTree(t);



            //READ RST INFORMATION
            RSTTree tree = new RSTTree("lincon");

            tree.Load(Path.Combine(Root, "rst.xml"));
            tree.EvaluateODonell();

            var sum = tree.Summarize();

            //READ AMR INFORMATION FOR EACH EDU AND ASSOCIATTE THE ODONELL SCORE
            IGraph g      = new Graph();
            var    parser = new VDS.RDF.Parsing.RdfXmlParser();

            //   NTriplesParser ntparser = new NTriplesParser();
            parser.Load(g, Path.Combine(Root, "output.xml"));
            var document = new AMRDocument();

            document.Load(g);

            foreach (var item in document.EDUSentences)
            {
                item.ApplyRSTWeight(sum.Where(c => c.edu == item.Id).Select(c => c.Weight).First());
            }

            //var rstdocument = new RSTDocumentRepository();
            //rstdocument.DeleteAllNodes();
            //rstdocument.Save(tree);

            AMRNEORepository repo = new AMRNEORepository();

            repo.DeleteAllNodes();
            repo.SaveDocument(document);



            //var ids = Helper.ReadIds(g);
            //foreach (var item in ids)
            //{

            //    item.sentence = Helper.GetSentence(g, item);
            //    item.AddNodes(g);


            //    if (item.id == 22)
            //    {
            //        Console.WriteLine(item.urlid);
            //        Console.WriteLine(item.sentence);
            //        Console.WriteLine(item.Root.uriid);
            //        Console.WriteLine(item.Root.Term.uriid);
            //        Console.WriteLine(item.Root.Term.type);
            //    }

            //}

            //SparqlQueryParser qparser = new SparqlQueryParser();
            ////Then we can parse a SPARQL string into a query

            //StringBuilder querystr = new StringBuilder();
            //querystr.AppendLine("PREFIX amr-core: <http://amr.isi.edu/rdf/core-amr#>");
            //querystr.AppendLine("PREFIX amr-data: <http://amr.isi.edu/amr_data#>");
            //querystr.AppendLine("PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>");
            //querystr.AppendLine("PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>");
            //querystr.AppendLine("PREFIX amr-terms: <http://amr.isi.edu/rdf/amr-terms#>");
            ////querystr.AppendLine("SELECT  ?p WHERE { ?s rdf:type ?p }");
            ////querystr.Append("SELECT ?s ?sentence ?id ?root ?rtype ?amrtype");
            //querystr.Append("SELECT ?root ?rtype  ?amrtypelbl ");
            //querystr.Append("WHERE {");
            //querystr.Append("?s amr-core:has-sentence ?sentence.");
            //querystr.Append("?s amr-core:has-id ?id.");
            //querystr.Append("?s amr-core:root ?root. ");
            //querystr.Append("?root rdf:type ?rtype. ");
            //querystr.Append("?rtype rdf:type ?amrtype. ");
            //querystr.Append("?amrtype rdfs:label ?amrtypelbl. ");
            //querystr.Append("}");

            //SparqlQuery q = qparser.ParseFromString(querystr.ToString());

            ////http://amr.isi.edu/rdf/core-amr#has-id
            //var rset = (SparqlResultSet)g.ExecuteQuery(q);

            //var SB = new StringBuilder();
            //if (rset.Result && rset.Results.Count > 0)
            //{
            //    foreach (var result in rset.Results)
            //    {
            //        foreach (var r in result)
            //        {
            //            Console.WriteLine(r.Key + " " + r.Value);
            //        }

            //        //Do what you want with each result
            //    }
            //}
            //File.WriteAllText("dic.txt", SB.ToString());
            //http://amr.isi.edu/amr_data/22#root01

            //foreach (var item in g.Triples)
            //{


            //    Console.WriteLine(item.Subject);

            //}



            //foreach (var node in g.Nodes)
            //{
            //    Console.WriteLine(node.ToString());
            //}

            //g.SaveToFile("output.rdf");
        }
Esempio n. 12
0
        public bool AddPOSTag(string strSource, out string[] outToken, out string[] outTag)
        {
            if (tagger == null)
            {
                InitTagger();
            }
            java.io.StringReader reader = new java.io.StringReader(strSource);
            List tokens = MaxentTagger.tokenizeText(reader);

            List<string> lstToken = new List<string>();
            List<string> lstTag = new List<string>();
            int iLastEndPosition = 0;
            for (int i = 0; i < tokens.size(); i++)
            {
                List sen = (List)tokens.get(i);
                ArrayList res = tagger.tagSentence(sen);
                for (int j = 0; j < res.size(); j++)
                {
                    edu.stanford.nlp.ling.TaggedWord tw = (edu.stanford.nlp.ling.TaggedWord)res.get(j);
                    string wd = tw.word();
                    string tg = tw.tag();
                    if (sen.size() == res.size())
                    {
                        edu.stanford.nlp.ling.Word w = (edu.stanford.nlp.ling.Word) sen.get(j);
                        if (w.beginPosition() > iLastEndPosition)
                        {
                            lstToken.Add(" ");
                            lstTag.Add("");
                        }
                        iLastEndPosition = w.endPosition();
                    }
                    lstToken.Add(wd);
                    lstTag.Add(tg);
                }
            }

            outToken = lstToken.ToArray(); ;
            outTag = lstTag.ToArray();

            return true;
        }
Esempio n. 13
0
        public static string[] Text2Sentence(string strSourceText)
        {
            java.io.StringReader reader = new java.io.StringReader(strSourceText);
            List tokens = MaxentTagger.tokenizeText(reader);

            string[] outSentences = new string[tokens.size()];

            for (int i = 0; i < tokens.size(); i++)
            {
                List res = (List)tokens.get(i);
                StringBuilder sb = new StringBuilder();

                int iLastEndingPosition = 0;
                for (int j = 0; j < res.size(); j++)
                {
                    edu.stanford.nlp.ling.Word w = (edu.stanford.nlp.ling.Word)res.get(j);
                    string wd = w.word();
                    if (w.beginPosition() > iLastEndingPosition)
                    {
                        sb.Append(" ");
                    }
                    sb.Append(wd);
                    iLastEndingPosition = w.endPosition();
                }
                outSentences[i] = sb.ToString().Trim();
            }

            return outSentences;
        }