static Client() { // Create a StanfordCoreNLPClient object with POS tagging, lemmatization, NER, parsing, and coreference resolution java.util.Properties props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("coref.algorithm", "neural"); ServerPipeline = new StanfordCoreNLPClient(props, Properties.Settings.Default.CoreNLP_ServerHost, Properties.Settings.Default.CoreNLP_ServerPort, ClientThreads); }
static EnglishPOSExtractor() { var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos"); props.setProperty("ner.useSUTime", "0"); var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory($"{curDir}/english"); pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); }
/// <summary> /// Initialises CoreNLP pipeline to tokenise the input into sentences /// </summary> private void InitPipeline() { var jarRoot = @"C:\Users\Luca\Documents\University\General\UG3\SCC300\SCC300cs\SCC300cs\stanford-corenlp-3.7.0-models"; // Annotation pipeline configuration var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit"); props.setProperty("ner.useSUTime", "0"); var CurDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); //change working dir to locate models Directory.SetCurrentDirectory(CurDir); }
// Sample from https://stanfordnlp.github.io/CoreNLP/corenlp-server.html static void Main() { // creates a StanfordCoreNLP object with POS tagging, lemmatization, NER, parsing, and coreference resolution var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); StanfordCoreNLPClient pipeline = new StanfordCoreNLPClient(props, "http://localhost", 9000, 2); // read some text in the text variable var text = "Kosgi Santosh sent an email to Stanford University."; // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document); var sentences = document.get(sentencesAnnotationClass) as java.util.AbstractList; foreach (CoreMap sentence in sentences) { var tokens = sentence.get(tokensAnnotationClass) as java.util.AbstractList; Console.WriteLine("----"); foreach (CoreLabel token in tokens) { var word = token.get(textAnnotationClass); var pos = token.get(partOfSpeechAnnotationClass); var ner = token.get(namedEntityTagAnnotationClass); Console.WriteLine("{0}\t[pos={1};\tner={2};", word, pos, ner); } } }
/// <summary> /// Annotation with SUTime /// </summary> public ScenarioAnnotator(IEmbeddingNetwork net) { //词向量模型 _net = net; //annotate properites _props = new java.util.Properties(); //refrenece https://stanfordnlp.github.io/CoreNLP/annotators.html _props.setProperty("annotators", //tokenize https://stanfordnlp.github.io/CoreNLP/tokenize.html "tokenize, " + //https://stanfordnlp.github.io/CoreNLP/cleanxml.html //"cleanxml, " + //ssplit https://stanfordnlp.github.io/CoreNLP/ssplit.html "ssplit, " + //part of speech https://stanfordnlp.github.io/CoreNLP/pos.html "pos, " + //lemma https://stanfordnlp.github.io/CoreNLP/lemma.html "lemma, " + //named entity recongnition https://stanfordnlp.github.io/CoreNLP/ner.html "ner, " + //depparse https://stanfordnlp.github.io/CoreNLP/parse.html "depparse, " + //Open Information Extraction https://stanfordnlp.github.io/CoreNLP/openie.html "openie"); }
public void StanfordCore(string jarRoot = @"..\..\models") { var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma"); // props.setProperty("ner.useSUTime", "0"); var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); }
public StanfordCoreNLP() { var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); var dictBackup = Environment.CurrentDirectory; System.IO.Directory.SetCurrentDirectory(_modelsFolder); _nlp = new edu.stanford.nlp.pipeline.StanfordCoreNLP(props); System.IO.Directory.SetCurrentDirectory(dictBackup); }
/// <summary inherit="yes"/> public override JProperties GetOutputProperties() { JProperties properties = (defaultOutputProperties == null ? new JProperties() : new JProperties(defaultOutputProperties)); java.util.Enumeration propsEnum = props.keys(); while (propsEnum.hasMoreElements()) { object obj = propsEnum.nextElement(); String value = (String)(props.get((String)obj)); properties.setProperty((String)obj, value); } return(properties); }
public void Pos() { var sent = new Sentence("Lucy is in the sky with diamonds."); var props = new java.util.Properties(); props.setProperty("ner.useSUTime", "0"); var nerTags = sent.nerTags(props); Assert.AreEqual("PERSON", nerTags.get(0)); var firstPOSTag = sent.posTag(0); Assert.AreEqual("NNP", firstPOSTag); }
/// <summary> /// /// </summary> /// <returns></returns> public bool Initialize() { if (this.Initialized) { return(true); } var props = new java.util.Properties(); // we tokenize and sentece split props.setProperty("annotators", "tokenize, ssplit"); // don't separate words only when whitespace is encountered // e.g. ***THIS IS... = *** + THIS + IS... props.setProperty("tokenize.options", "whitespace=false"); // totally ignores parentheses and brackets - we don't care for them // NOTE (twolf): this doesn't really work - so we take care of it in // |QualifiedWords| props.setProperty("tokenize.options", "normalizeOtherBrackets=false"); props.setProperty("tokenize.options", "normalizeParentheses=false"); // version 3.7.0 of CoreNLP supports splitting hypentated words // yet has a whitelist of hyphenated words - so we let it do the work // for us and refrain from really long words like: // pg345.txt: two-pages-to-the-week-with-Sunday-squeezed-in-a-corner props.setProperty("tokenize.options", "splitHyphenated=true"); // two or more newlines should be treated as a sentece break // this is especially important for tables of contents props.setProperty( StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, "two"); // ignore props.setProperty("ssplit.tokenPatternsToDiscard", "\\p{Punct}"); try { _pipeLine = new StanfordCoreNLP(props); } catch (Exception) { _pipeLine = null; } return(this.Initialized); }
/// <summary> /// Output an XML representation of the compiled code of the stylesheet, for purposes of /// diagnostics and instrumentation /// </summary> /// <param name="destination">The destination for the diagnostic output</param> public void Explain(XmlDestination destination) { JConfiguration config = pss.getConfiguration(); JResult result = destination.GetResult(config.makePipelineConfiguration()); JProperties properties = new JProperties(); properties.setProperty("indent", "yes"); properties.setProperty("{http://saxon.sf.net/}indent-spaces", "2"); JReceiver receiver = config.getSerializerFactory().getReceiver( result, config.makePipelineConfiguration(), properties); JExpressionPresenter presenter = new JExpressionPresenter(config, receiver); pss.explain(presenter); }
/// <summary>Set a serialization property</summary> /// <remarks>In the case of XSLT, properties set within the serializer override /// any properties set in <c>xsl:output</c> declarations in the stylesheet. /// Similarly, with XQuery, they override any properties set in the Query /// prolog using <c>declare option saxon:output</c>.</remarks> /// <example> /// <code> /// Serializer qout = new Serializer(); /// qout.SetOutputProperty(Serializer.METHOD, "xml"); /// qout.SetOutputProperty(Serializer.INDENT, "yes"); /// qout.SetOutputProperty(Serializer.SAXON_INDENT_SPACES, "1"); /// </code> /// </example> /// <param name="name">The name of the serialization property to be set</param> /// <param name="value">The value to be set for the serialization property. May be null /// to unset the property (that is, to set it back to the default value).</param> public void SetOutputProperty(QName name, String value) { props.setProperty(name.ClarkName, value); }
private java.util.Properties InitProperties() { var properties = new java.util.Properties(); properties.setProperty("parse.model", localModelPath + "edu/stanford/nlp/models/srparser/englishSR.ser.gz"); properties.setProperty("sentiment.model", localModelPath + "edu/stanford/nlp/models/sentiment/sentiment.ser.gz"); properties.setProperty("pos.model", localModelPath + "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"); properties.setProperty("ner.model", localModelPath + "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"); properties.setProperty("dcoref.demonym", localModelPath + "edu/stanford/nlp/models/dcoref/demonyms.txt"); properties.setProperty("dcoref.states", localModelPath + "edu/stanford/nlp/models/dcoref/state-abbreviations.txt"); properties.setProperty("dcoref.animate", localModelPath + "edu/stanford/nlp/models/dcoref/animate.unigrams.txt"); properties.setProperty("dcoref.inanimate", localModelPath + "edu/stanford/nlp/models/dcoref/inanimate.unigrams.txt"); properties.setProperty("dcoref.big.gender.number", localModelPath + "edu/stanford/nlp/models/dcoref/gender.data.gz"); properties.setProperty("dcoref.countries", localModelPath + "edu/stanford/nlp/models/dcoref/countries"); properties.setProperty("dcoref.states.provinces", localModelPath + "edu/stanford/nlp/models/dcoref/statesandprovinces"); properties.setProperty("dcoref.singleton.model", localModelPath + "edu/stanford/nlp/models/dcoref/singleton.predictor.ser"); properties.setProperty("annotators", "tokenize, ssplit, pos, parse, lemma, ner, sentiment"); properties.setProperty("tokenize.language", "en"); properties.setProperty("ner.useSUTime", "0"); properties.setProperty("sutime.binders", "0"); properties.setProperty("sutime.rules", localModelPath + "edu/stanford/nlp/models/sutime/defs.sutime.txt, " + localModelPath + "edu/stanford/nlp/models/sutime/english.sutime.txt"); return(properties); }
static ChinesePOSExtractor() { var props = new java.util.Properties(); props.setProperty("annotators", "segment, ssplit, pos"); props.setProperty("customAnnotatorClass.segment", "edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator"); props.setProperty("segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz"); props.setProperty("segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese"); props.setProperty("segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz"); props.setProperty("segment.sighanPostProcessing", "true"); //sentence split props.setProperty("ssplit.boundaryTokenRegex", "[.]|[!?]+|[。]|[!?]+"); //pos props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger"); //ner props.setProperty("ner.model", "edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz"); props.setProperty("ner.applyNumericClassifiers", "false"); props.setProperty("ner.useSUTime", "false"); //# parse props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz"); pipeline = new StanfordCoreNLP(props); }
/// <summary>Set a serialization property</summary> /// <remarks>In the case of XSLT, properties set within the serializer override /// any properties set in <c>xsl:output</c> declarations in the stylesheet. /// Similarly, with XQuery, they override any properties set in the Query /// prolog using <c>declare option saxon:output</c>.</remarks> /// <example> /// <code> /// Serializer qout = new Serializer(); /// qout.SetOutputProperty(Serializer.METHOD, "xml"); /// qout.SetOutputProperty(Serializer.INDENT, "yes"); /// qout.SetOutputProperty(Serializer.SAXON_INDENT_SPACES, "1"); /// </code> /// </example> /// <param name="name">The name of the serialization property to be set</param> /// <param name="value">The value to be set for the serialization property. May be null /// to unset the property (that is, to set it back to the default value).</param> public void SetOutputProperty(QName name, String value) { props.setProperty(name.ClarkName, value); serializer.setOutputProperty(net.sf.saxon.s9api.Serializer.getProperty(name.UnderlyingQName()), value); }
public void SetProperty(string key, string value) { underlyingModel.setProperty(key, value); }
//使用nlp將文章分析後回傳key private List<string> nlp(string sentence) { List<string> return_key = new List<string>(); string Relay_file = ".\\xml"; string Relay_name = "Relay" + ".xml"; string Relay_path = Relay_file + "\\" + Relay_name; // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar` var jarRoot = @"stanford-corenlp-3.5.2-models\\"; // Annotation pipeline configuration var props = new java.util.Properties(); props.setProperty("ner.useSUTime", "false"); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("sutime.binders", "0"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; System.IO.Directory.SetCurrentDirectory(jarRoot); var pipeline = new StanfordCoreNLP(props); System.IO.Directory.SetCurrentDirectory(curDir); // Annotation var annotation = new Annotation(sentence); pipeline.annotate(annotation); //輸出nlp分析結果至Relay.xml FileOutputStream os = new FileOutputStream(new File(Relay_file, Relay_name)); pipeline.xmlPrint(annotation, os); os.close(); //呼叫ner將單字組合為有意義的key組裝 foreach (string k in ner(Relay_path)) { return_key.Add(k); } return return_key; }