Пример #1
0
        /// <summary>
        /// Parse the document searching for sentences where the entity found.
        /// Returns a csv line with the file, the entity the sentence and the sintax analisis of the sentences
        /// </summary>
        /// <param name="text">Document text</param>
        /// <param name="entity">Entity.</param>
        /// <param name="origFile">Original file.</param>
        public static List <string[]> Parse(string text, string entity, string origFile, string language)
        {
            var results = new List <string[]>();
            //Load spanish models.
            var modelsDirectory    = StanfordEnv.PARSER_MODELS;
            var lexparserDirectory = modelsDirectory + StanfordEnv.GetParserLanguageFiles(language);
            var lp = LexicalizedParser.loadModel(lexparserDirectory);

            string[]      splittedText = SplitText(text);
            List <string> entityLines  = GetEntitiesLines(splittedText, entity);

            foreach (var line in entityLines)
            {
                //Parser sentence.
                var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
                var sent2Reader      = new java.io.StringReader(line);
                var rawWords2        = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
                sent2Reader.close();
                var tree2 = lp.apply(rawWords2);

                results.Add(new string[] { origFile, entity, line, tree2.ToString() });
            }

            return(results);
        }
Пример #2
0
        public static string MediaWikiToXHTML(string markup)
        {
            string retVal = null;

            using (XhtmlPrinter printer = new XhtmlPrinter())
            {
                java.io.Reader rdr = new java.io.StringReader(markup);

                // org.wikimodel.wem.WikiPrinter wp = new org.wikimodel.wem.WikiPrinter ();
                // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(wp);

                // var listener = new org.wikimodel.wem.xwiki.XWikiSerializer(printer);
                org.wikimodel.wem.IWemListener listener = new org.wikimodel.wem.xhtml.PrintListener(printer);

                org.wikimodel.wem.mediawiki.MediaWikiParser mep =
                    new org.wikimodel.wem.mediawiki.MediaWikiParser();
                mep.parse(rdr, listener);
                retVal = printer.Text;

                rdr.close();
                rdr      = null;
                listener = null;
                mep      = null;
            } // End Using printer

            return(retVal);
        }
Пример #3
0
        public bool Run()
        {
            string code = editor.Document.TextContent;
            // check if we already have compiled this code before
            int hashCode = code.GetHashCode();

            if (database != null && this.hashCode == hashCode)
            {
                return(false);
            }
            this.hashCode = hashCode;
            java.io.StringReader reader = new java.io.StringReader(code);
            try
            {
                java.io.StringWriter writer = new java.io.StringWriter();
                try
                {
                    java.io.PrintWriter log = new java.io.PrintWriter(writer);
                    try
                    {
                        Database db;
                        db = JPortal.run(name, reader, log);
                        if (db != null)
                        {
                            database = db;
                        }
                    }
                    finally
                    {
                        log.flush();
                        log.close();
                    }
                    string   result = writer.ToString();
                    char[]   sep    = { '\n' };
                    string[] lines  = result.Split(sep);
                    for (int i = 0; i < lines.Length; i++)
                    {
                        string line = lines[i].Trim();
                        if (line.Length == 0)
                        {
                            continue;
                        }
                        form.LogInfo = line;
                    }
                }
                finally
                {
                    writer.close();
                }
            }
            finally
            {
                reader.close();
            }
            return(database != null);
        }
Пример #4
0
        public static List <string> ExtractNounsFromSemantics(string sentence)
        {
            string assemblyPath    = Assembly.GetExecutingAssembly().GetName().CodeBase;
            string projectPath     = Directory.GetParent(new Uri(Path.GetDirectoryName(Path.GetDirectoryName(Path.GetDirectoryName(assemblyPath)))).LocalPath).FullName;
            string modelsDirectory = Path.GetFullPath(projectPath + @"\Parser\CoreNLP-3.9.1-Models\edu\stanford\nlp\models");

            // Loading english PCFG parser from file
            LexicalizedParser lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");

            // This shows loading and using an explicit tokenizer
            var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
            var sent2Reader      = new java.io.StringReader(sentence);
            var rawWords         = tokenizerFactory.getTokenizer(sent2Reader).tokenize();

            sent2Reader.close();
            var tree = lp.apply(rawWords);

            return(tree.toArray().Cast <LabeledScoredTreeNode>().Where(n => n.isLeaf() && nounLabels.Contains(n.parent(tree).label().value())).Select(n => n.label().ToString()).ToList());
        }