예제 #1
0
 static Client()
 {
     // Create a StanfordCoreNLPClient object with POS tagging, lemmatization, NER, parsing, and coreference resolution
     java.util.Properties props = new java.util.Properties();
     props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
     props.setProperty("coref.algorithm", "neural");
     ServerPipeline = new StanfordCoreNLPClient(props, Properties.Settings.Default.CoreNLP_ServerHost, Properties.Settings.Default.CoreNLP_ServerPort, ClientThreads);
 }
예제 #2
0
        static EnglishPOSExtractor()
        {
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos");
            props.setProperty("ner.useSUTime", "0");

            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory($"{curDir}/english");

            pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);
        }
예제 #3
0
        /// <summary>
        /// Initialises CoreNLP pipeline to tokenise the input into sentences
        /// </summary>
        private void InitPipeline()
        {
            var jarRoot = @"C:\Users\Luca\Documents\University\General\UG3\SCC300\SCC300cs\SCC300cs\stanford-corenlp-3.7.0-models";

            // Annotation pipeline configuration
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit");
            props.setProperty("ner.useSUTime", "0");

            var CurDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);                                 //change working dir to locate models
            Directory.SetCurrentDirectory(CurDir);
        }
예제 #4
0
        // Sample from https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
        static void Main()
        {
            // creates a StanfordCoreNLP object with POS tagging, lemmatization, NER, parsing, and coreference resolution
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            StanfordCoreNLPClient pipeline = new StanfordCoreNLPClient(props, "http://localhost", 9000, 2);
            // read some text in the text variable
            var text = "Kosgi Santosh sent an email to Stanford University.";
            // create an empty Annotation just with the given text
            Annotation document = new Annotation(text);

            // run all Annotators on this text
            pipeline.annotate(document);

            var sentences = document.get(sentencesAnnotationClass) as java.util.AbstractList;

            foreach (CoreMap sentence in sentences)
            {
                var tokens = sentence.get(tokensAnnotationClass) as java.util.AbstractList;
                Console.WriteLine("----");
                foreach (CoreLabel token in tokens)
                {
                    var word = token.get(textAnnotationClass);
                    var pos  = token.get(partOfSpeechAnnotationClass);
                    var ner  = token.get(namedEntityTagAnnotationClass);
                    Console.WriteLine("{0}\t[pos={1};\tner={2};", word, pos, ner);
                }
            }
        }
예제 #5
0
 /// <summary>
 ///  Annotation with SUTime
 /// </summary>
 public ScenarioAnnotator(IEmbeddingNetwork net)
 {
     //词向量模型
     _net = net;
     //annotate properites
     _props = new java.util.Properties();
     //refrenece https://stanfordnlp.github.io/CoreNLP/annotators.html
     _props.setProperty("annotators",
                        //tokenize https://stanfordnlp.github.io/CoreNLP/tokenize.html
                        "tokenize, " +
                        //https://stanfordnlp.github.io/CoreNLP/cleanxml.html
                        //"cleanxml, " +
                        //ssplit https://stanfordnlp.github.io/CoreNLP/ssplit.html
                        "ssplit, " +
                        //part of speech https://stanfordnlp.github.io/CoreNLP/pos.html
                        "pos, " +
                        //lemma https://stanfordnlp.github.io/CoreNLP/lemma.html
                        "lemma, " +
                        //named entity recongnition https://stanfordnlp.github.io/CoreNLP/ner.html
                        "ner, " +
                        //depparse https://stanfordnlp.github.io/CoreNLP/parse.html
                        "depparse, " +
                        //Open Information Extraction https://stanfordnlp.github.io/CoreNLP/openie.html
                        "openie");
 }
예제 #6
0
 public void StanfordCore(string jarRoot = @"..\..\models")
 {
     var props = new java.util.Properties();
     props.setProperty("annotators", "tokenize, ssplit, pos, lemma");
     // props.setProperty("ner.useSUTime", "0"); 
     var curDir = Environment.CurrentDirectory;
     Directory.SetCurrentDirectory(jarRoot);
     pipeline = new StanfordCoreNLP(props);
     Directory.SetCurrentDirectory(curDir);
 }
예제 #7
0
        public StanfordCoreNLP()
        {
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");

            var dictBackup = Environment.CurrentDirectory;

            System.IO.Directory.SetCurrentDirectory(_modelsFolder);
            _nlp = new edu.stanford.nlp.pipeline.StanfordCoreNLP(props);
            System.IO.Directory.SetCurrentDirectory(dictBackup);
        }
예제 #8
0
        /// <summary inherit="yes"/>

        public override JProperties GetOutputProperties()
        {
            JProperties properties = (defaultOutputProperties == null ? new JProperties() : new JProperties(defaultOutputProperties));

            java.util.Enumeration propsEnum = props.keys();
            while (propsEnum.hasMoreElements())
            {
                object obj   = propsEnum.nextElement();
                String value = (String)(props.get((String)obj));
                properties.setProperty((String)obj, value);
            }
            return(properties);
        }
예제 #9
0
        public void Pos()
        {
            var sent  = new Sentence("Lucy is in the sky with diamonds.");
            var props = new java.util.Properties();

            props.setProperty("ner.useSUTime", "0");
            var nerTags = sent.nerTags(props);

            Assert.AreEqual("PERSON", nerTags.get(0));

            var firstPOSTag = sent.posTag(0);

            Assert.AreEqual("NNP", firstPOSTag);
        }
        /// <summary>
        ///
        /// </summary>
        /// <returns></returns>
        public bool Initialize()
        {
            if (this.Initialized)
            {
                return(true);
            }

            var props = new java.util.Properties();

            // we tokenize and sentece split
            props.setProperty("annotators", "tokenize, ssplit");

            // don't separate words only when whitespace is encountered
            // e.g. ***THIS IS... = *** + THIS + IS...
            props.setProperty("tokenize.options", "whitespace=false");

            // totally ignores parentheses and brackets - we don't care for them
            // NOTE (twolf): this doesn't really work - so we take care of it in
            // |QualifiedWords|
            props.setProperty("tokenize.options", "normalizeOtherBrackets=false");
            props.setProperty("tokenize.options", "normalizeParentheses=false");

            // version 3.7.0 of CoreNLP supports splitting hypentated words
            // yet has a whitelist of hyphenated words - so we let it do the work
            // for us and refrain from really long words like:
            // pg345.txt: two-pages-to-the-week-with-Sunday-squeezed-in-a-corner
            props.setProperty("tokenize.options", "splitHyphenated=true");


            // two or more newlines should be treated as a sentece break
            // this is especially important for tables of contents
            props.setProperty(
                StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, "two");

            // ignore
            props.setProperty("ssplit.tokenPatternsToDiscard", "\\p{Punct}");

            try {
                _pipeLine = new StanfordCoreNLP(props);
            } catch (Exception) {
                _pipeLine = null;
            }

            return(this.Initialized);
        }
예제 #11
0
파일: Xslt.cs 프로젝트: nuxleus/saxonica
 /// <summary>
 /// Output an XML representation of the compiled code of the stylesheet, for purposes of 
 /// diagnostics and instrumentation
 /// </summary>
 /// <param name="destination">The destination for the diagnostic output</param>
 
 public void Explain(XmlDestination destination) {
     JConfiguration config = pss.getConfiguration();
     JResult result = destination.GetResult(config.makePipelineConfiguration());          
     JProperties properties = new JProperties();
     properties.setProperty("indent", "yes");
     properties.setProperty("{http://saxon.sf.net/}indent-spaces", "2");
     JReceiver receiver = config.getSerializerFactory().getReceiver(
         result, config.makePipelineConfiguration(), properties);
     JExpressionPresenter presenter = new JExpressionPresenter(config, receiver);
     pss.explain(presenter);
 }
예제 #12
0
        /// <summary>Set a serialization property</summary>
        /// <remarks>In the case of XSLT, properties set within the serializer override
        /// any properties set in <c>xsl:output</c> declarations in the stylesheet.
        /// Similarly, with XQuery, they override any properties set in the Query
        /// prolog using <c>declare option saxon:output</c>.</remarks>
        /// <example>
        ///   <code>
        ///     Serializer qout = new Serializer();
        ///     qout.SetOutputProperty(Serializer.METHOD, "xml");
        ///     qout.SetOutputProperty(Serializer.INDENT, "yes");
        ///     qout.SetOutputProperty(Serializer.SAXON_INDENT_SPACES, "1");
        ///   </code>
        /// </example>
        /// <param name="name">The name of the serialization property to be set</param>
        /// <param name="value">The value to be set for the serialization property. May be null
        /// to unset the property (that is, to set it back to the default value).</param>

        public void SetOutputProperty(QName name, String value)
        {
            props.setProperty(name.ClarkName, value);
        }
예제 #13
0
        private java.util.Properties InitProperties()
        {
            var properties = new java.util.Properties();

            properties.setProperty("parse.model", localModelPath + "edu/stanford/nlp/models/srparser/englishSR.ser.gz");
            properties.setProperty("sentiment.model", localModelPath + "edu/stanford/nlp/models/sentiment/sentiment.ser.gz");
            properties.setProperty("pos.model", localModelPath + "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
            properties.setProperty("ner.model", localModelPath + "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
            properties.setProperty("dcoref.demonym", localModelPath + "edu/stanford/nlp/models/dcoref/demonyms.txt");
            properties.setProperty("dcoref.states", localModelPath + "edu/stanford/nlp/models/dcoref/state-abbreviations.txt");
            properties.setProperty("dcoref.animate", localModelPath + "edu/stanford/nlp/models/dcoref/animate.unigrams.txt");
            properties.setProperty("dcoref.inanimate", localModelPath + "edu/stanford/nlp/models/dcoref/inanimate.unigrams.txt");
            properties.setProperty("dcoref.big.gender.number", localModelPath + "edu/stanford/nlp/models/dcoref/gender.data.gz");
            properties.setProperty("dcoref.countries", localModelPath + "edu/stanford/nlp/models/dcoref/countries");
            properties.setProperty("dcoref.states.provinces", localModelPath + "edu/stanford/nlp/models/dcoref/statesandprovinces");
            properties.setProperty("dcoref.singleton.model", localModelPath + "edu/stanford/nlp/models/dcoref/singleton.predictor.ser");
            properties.setProperty("annotators", "tokenize, ssplit, pos, parse, lemma, ner, sentiment");
            properties.setProperty("tokenize.language", "en");
            properties.setProperty("ner.useSUTime", "0");
            properties.setProperty("sutime.binders", "0");
            properties.setProperty("sutime.rules", localModelPath + "edu/stanford/nlp/models/sutime/defs.sutime.txt, " + localModelPath + "edu/stanford/nlp/models/sutime/english.sutime.txt");

            return(properties);
        }
예제 #14
0
        static ChinesePOSExtractor()
        {
            var props = new java.util.Properties();

            props.setProperty("annotators", "segment, ssplit, pos");
            props.setProperty("customAnnotatorClass.segment", "edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");

            props.setProperty("segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
            props.setProperty("segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese");
            props.setProperty("segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
            props.setProperty("segment.sighanPostProcessing", "true");

            //sentence split
            props.setProperty("ssplit.boundaryTokenRegex", "[.]|[!?]+|[。]|[!?]+");

            //pos
            props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger");

            //ner
            props.setProperty("ner.model", "edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz");
            props.setProperty("ner.applyNumericClassifiers", "false");
            props.setProperty("ner.useSUTime", "false");

            //# parse
            props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz");

            pipeline = new StanfordCoreNLP(props);
        }
예제 #15
0
        /// <summary>Set a serialization property</summary>
        /// <remarks>In the case of XSLT, properties set within the serializer override
        /// any properties set in <c>xsl:output</c> declarations in the stylesheet.
        /// Similarly, with XQuery, they override any properties set in the Query
        /// prolog using <c>declare option saxon:output</c>.</remarks>
        /// <example>
        ///   <code>
        ///     Serializer qout = new Serializer();
        ///     qout.SetOutputProperty(Serializer.METHOD, "xml");
        ///     qout.SetOutputProperty(Serializer.INDENT, "yes");
        ///     qout.SetOutputProperty(Serializer.SAXON_INDENT_SPACES, "1");
        ///   </code>
        /// </example>
        /// <param name="name">The name of the serialization property to be set</param>
        /// <param name="value">The value to be set for the serialization property. May be null
        /// to unset the property (that is, to set it back to the default value).</param>

        public void SetOutputProperty(QName name, String value)
        {
            props.setProperty(name.ClarkName, value);
            serializer.setOutputProperty(net.sf.saxon.s9api.Serializer.getProperty(name.UnderlyingQName()), value);
        }
예제 #16
0
 public void SetProperty(string key, string value)
 {
     underlyingModel.setProperty(key, value);
 }
        //使用nlp將文章分析後回傳key
        private List<string> nlp(string sentence)
        {
            List<string> return_key = new List<string>();
            string Relay_file = ".\\xml";
            string Relay_name = "Relay" + ".xml";
            string Relay_path = Relay_file + "\\" + Relay_name;

            // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar`
            var jarRoot = @"stanford-corenlp-3.5.2-models\\";

            // Annotation pipeline configuration
            var props = new java.util.Properties();
            props.setProperty("ner.useSUTime", "false");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            System.IO.Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            System.IO.Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(sentence);
            pipeline.annotate(annotation);

            //輸出nlp分析結果至Relay.xml
            FileOutputStream os = new FileOutputStream(new File(Relay_file, Relay_name));
            pipeline.xmlPrint(annotation, os);
            os.close();

            //呼叫ner將單字組合為有意義的key組裝
            foreach (string k in ner(Relay_path))
            {
                return_key.Add(k);
            }

            return return_key;
        }