private setProperty ( global par0, global par1 ) : global::java.lang.Object | ||
par0 | global | |
par1 | global | |
Результат | global::java.lang.Object |
/// <summary> /// Setup extended tagger that includes POS, lemma and entity analysis /// </summary> private void SetupExtendedTagger() { PerformanceTester.StartMET("NLP"); // Get path to Stanford NLP models var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models"); // Turn off logging RedwoodConfiguration.current().clear().apply(); var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner"); // Makes Named Entity Recognition work in the library props.setProperty("ner.useSUTime", "0"); props.put("ner.applyFineGrained", "0"); props.put("ner.fine.regexner.mapping", jarRoot + @"\edu\stanford\nlp\models\kbp\english\"); // Set current directory var curDir = Environment.CurrentDirectory; var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models"; Directory.SetCurrentDirectory(jarRoot); // Load Stanford NLP Tagger = new StanfordCoreNLP(props); PerformanceTester.StopMET("NLP"); }
static void Main() { // Path to the folder with models var segmenterData = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-segmenter-2015-12-09\data"; var sampleData = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-segmenter-2015-12-09\test.simp.utf8"; // `test.simple.utf8` contains following text: // 面对新世纪,世界各国人民的共同愿望是:继续发展人类以往创造的一切文明成果,克服20世纪困扰着人类的战争和贫 // 困问题,推进和平与发展的崇高事业,创造一个美好的世界。 // This is a very simple demo of calling the Chinese Word Segmenter programmatically. // It assumes an input file in UTF8. This will run correctly in the distribution home // directory. To run in general, the properties for where to find dictionaries or // normalizations have to be set. // @author Christopher Manning // Setup Segmenter loading properties var props = new Properties(); props.setProperty("sighanCorporaDict", segmenterData); // Lines below are needed because CTBSegDocumentIteratorFactory accesses it props.setProperty("serDictionary", segmenterData + @"\dict-chris6.ser.gz"); props.setProperty("testFile", sampleData); props.setProperty("inputEncoding", "UTF-8"); props.setProperty("sighanPostProcessing", "true"); // Load Word Segmenter var segmenter = new CRFClassifier(props); segmenter.loadClassifierNoExceptions(segmenterData + @"\ctb.gz", props); segmenter.classifyAndWriteAnswers(sampleData); }
/// <summary> /// /// </summary> /// <param name="jarRootPath">Path to the folder with models extracted from 'stanford-corenlp-3.5.2-models.jar'</param> public StanfordLemmatizer(string jarRootPath) { if (!Directory.Exists(jarRootPath)) { string fullPath = Path.GetFullPath(jarRootPath); throw new DirectoryNotFoundException("Folder(s) extracted from 'stanford-corenlp-3.5.2-models.jar' was not found in path: . " + "-->" + fullPath + "<--. " + "Please make sure correct path is listed in .config file."); } // Set properties required for lemma java.util.Properties props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma"); props.setProperty("ner.useSUTime", "0"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically string curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRootPath); _pipeLine = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); // Instantiate annotation _sentencesAnnotation = new CoreAnnotations.SentencesAnnotation(); _tokensAnnotation = new CoreAnnotations.TokensAnnotation(); _lemmaAnnotation = new CoreAnnotations.LemmaAnnotation(); }
public void StanfordCoreNlpDemoThatChangeCurrentDirectory() { const string Text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply."; // Annotation pipeline configuration var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("sutime.binders", "0"); // we should change current directory so StanfordCoreNLP could find all the model files var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(Config.JarRoot); var pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); // Annotation var annotation = new Annotation(Text); pipeline.annotate(annotation); // Result - Pretty Print using (var stream = new ByteArrayOutputStream()) { pipeline.prettyPrint(annotation, new PrintWriter(stream)); Console.WriteLine(stream.toString()); } this.CustomAnnotationPrint(annotation); }
static void Main(string[] args) { var host = "192.168.33.12:9092"; var topic = "test2"; var count = 50000000; var size = 100; var prop = new Properties(); prop.setProperty("bootstrap.servers", "192.168.33.12:9092"); prop.setProperty("acks", "1"); prop.setProperty("buffer.memory", "67108864"); prop.setProperty("batch.size", "8196"); var producer = new KafkaProducer(prop, new ByteArraySerializer(), new ByteArraySerializer()); var payload = new byte[size]; for (int i = 0; i < size; i++) payload[i] = (byte)'a'; var record = new ProducerRecord(topic, payload); var stats = new Stats(count, 5000, Console.WriteLine); for (int i = 0; i < count; i++) { //var payload = Encoding.UTF8.GetBytes(i.ToString()); //var record = new ProducerRecord(topic, payload); var sendStart = DateTimeExtensions.CurrentTimeMillis(); var cb = new StatsCallback { Action = stats.NextCompletion(sendStart, payload.Length, stats) }; producer.send(record, cb); } producer.close(); stats.PrintTotal(); }
private void CoreNLPForm_Load(object sender, EventArgs e) { OutputType.Items.Add("XML"); OutputType.Items.Add("Json"); OutputType.Items.Add("Conll"); OutputType.Items.Add("Pretty"); OutputType.Text = "XML"; OutputText.ScrollBars = ScrollBars.Both; InputText.Text = ""; //PWF.Show(); PWF = new PleaseWaitForm(); PWF.Show(); this.Visible = false; //----------------java jarRoot = Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + @"\stanford-corenlp-full-2016-10-31"; props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("ner.useSUTime", "0"); curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); this.Visible = true; PWF.Close(); //---------------- }
static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.7.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2016-10-31\models"; // Text for processing var text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply."; // Annotation pipeline configuration var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref"); props.setProperty("ner.useSUTime", "0"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); var pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); // Annotation var annotation = new Annotation(text); pipeline.annotate(annotation); // Result - Pretty Print using (var stream = new ByteArrayOutputStream()) { pipeline.prettyPrint(annotation, new PrintWriter(stream)); Console.WriteLine(stream.toString()); stream.close(); } }
/// <summary> /// Annotation pipeline configuration. You can easily add more annotators here. /// </summary> /// <returns></returns> private Properties GetPipelineProperties() { var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner, entitymentions, sentiment"); props.setProperty("ner.useSUTime", "0"); //Turns off NER's SUTime component. return props; }
public void InitializeNLP() { // We should change current directory, so StanfordCoreNLP could find all the model files automatically Directory.SetCurrentDirectory(jarRoot); string curDir = Environment.CurrentDirectory; // Annotation pipeline configuration java.util.Properties props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); props.setProperty("ner.useSUTime", "0"); pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); }
private Annotation PrepareAnnotation() { var props = new Properties(); props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger"); props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz"); props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz"); props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt"); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse"); props.setProperty("sutime.binders", "0"); props.setProperty("ner.useSUTime", "0"); var pipeline = new StanfordCoreNLP(props); var annotatedText = new Annotation(text); pipeline.annotate(annotatedText); return annotatedText; }
private static void Main() { // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar` var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // Loading POS Tagger and including them into pipeline var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List; var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal()); } }
public StanfordLemmatizer() { // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar` var jarRoot = @"C:\Work\NLP\Stanford\stanford-corenlp-full-2015-12-09\stanford-corenlp-3.6.0-models"; _separator = Guid.NewGuid().ToString(); // Text for processing // Annotation pipeline configuration var props = new Properties(); //props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref"); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner"); props.setProperty("ner.useSUTime", "0"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); _pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); }
public void Initialize() { var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); //if we're using an external sentence segmentation strategy, then this is how we're going to do it //https://stackoverflow.com/a/28017131 if (!useBuiltInSentenceSplitter) { props.put("ssplit.isOneSentence", "true"); } props.setProperty("sutime.binders", "0"); Directory.SetCurrentDirectory(jarRoot); pipeline = new StanfordCoreNLP(props); }
private Annotation PrepareAnnotation(string text, DateTime currentDate) { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt," + modelsDir + @"\sutime\english.holidays.sutime.txt," + modelsDir + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd")); pipeline.annotate(annotation); return annotation; }
static void Main(string[] args) { if (args.Length != 1) { System.Console.WriteLine("usage: StanfordSegmenter.Csharp.Samples.exe filename"); return; } var props = new Properties(); props.setProperty("sighanCorporaDict", @"..\..\..\..\temp\stanford-segmenter-2013-06-20\data"); // props.setProperty("NormalizationTable", @"..\..\..\..\temp\stanford-segmenter-2013-06-20\data\norm.simp.utf8"); // props.setProperty("normTableEncoding", "UTF-8"); // below is needed because CTBSegDocumentIteratorFactory accesses it props.setProperty("serDictionary", @"..\..\..\..\temp\stanford-segmenter-2013-06-20\data\dict-chris6.ser.gz"); props.setProperty("testFile", args[0]); props.setProperty("inputEncoding", "UTF-8"); props.setProperty("sighanPostProcessing", "true"); var segmenter = new CRFClassifier(props); segmenter.loadClassifierNoExceptions(@"..\..\..\..\temp\stanford-segmenter-2013-06-20\data\ctb.gz", props); segmenter.classifyAndWriteAnswers(args[0]); }
public void SUTimeDefautTest() { var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new PTBTokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); var tagger = new MaxentTagger( Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); pipeline.addAnnotator(new POSTaggerAnnotator(tagger)); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; var props = new Properties(); props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today."; var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14"); pipeline.annotate(annotation); Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n"); var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass()); foreach (CoreMap cm in timexAnnsAll) { var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass()); var first = tokens.get(0); var last = tokens.get(tokens.size() - 1); var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass()); Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}", cm, first, last, (time.getTemporal())); } }
public static void Start(string modelLocation = null) { var curDir = Environment.CurrentDirectory; if (!string.IsNullOrEmpty(modelLocation)) { _modelLocation = modelLocation; } try { // Annotation pipeline configuration var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("sutime.binders", "false"); props.setProperty("ner.useSUTime", "false"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically Directory.SetCurrentDirectory(HostingEnvironment.MapPath(ModelLocation)); pipeline = new StanfordCoreNLP(props); } finally { Directory.SetCurrentDirectory(curDir); } }
//*************************************************/ // METHODS //*************************************************/ #region Methods /// <summary> /// Setup tagger including POS /// </summary> private void SetupTagger() { PerformanceTester.StartMET("NLP"); // Get path to Stanford NLP models var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models"); // Turn off logging RedwoodConfiguration.current().clear().apply(); // Set properties var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos"); // Set current directory var curDir = Environment.CurrentDirectory; var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models"; Directory.SetCurrentDirectory(jarRoot); // Load Stanford NLP Tagger = new StanfordCoreNLP(props); PerformanceTester.StopMET("NLP"); }
public static void appendProperty(Properties properties, string name, string separator, params string[] values) { if (values == null || values.Length == 0) return; string str1 = ""; string[] strArray = values; int length = strArray.Length; for (int index = 0; index < length; ++index) { string str2 = strArray[index]; if (str2 != null && String.instancehelper_length(str2) != 0) { if (String.instancehelper_length(str1) > 0 && !String.instancehelper_endsWith(str1, separator)) str1 = new StringBuilder().append(str1).append(separator).toString(); str1 = new StringBuilder().append(str1).append(str2).toString(); } } string property = properties.getProperty(name, ""); if (String.instancehelper_length(property) > 0) str1 = new StringBuilder().append(str1).append(separator).toString(); properties.setProperty(name, new StringBuilder().append(str1).append(property).toString()); }
/// <summary> /// Initializes this instance. /// </summary> private void Initialize() { if (pipeline == null) { // Todo: How to get this ourselves //var jarRoot = @"C:\Users\karlbuha\Documents\Visual Studio 2012\Projects\ServiceMe\RestServiceV1\NLPModules\"; var jarRoot = ConfigurationManager.AppSettings["NlpModulePath"]; //var jarRoot = @"F:\sitesroot\0\bin\NlpModules\"; // Annotation pipeline configuration var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma"); props.setProperty("sutime.binders", "0"); props.setProperty("ner.useSUTime", "false"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); NlpProvider.pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); NlpProvider.relevantPos = ConfigurationManager.AppSettings["NlpFos"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries).ToList(); } }
public static void appendProperties(Properties properties, Class cls) { string property1 = properties.getProperty("platform.name"); Properties properties1 = (Properties) cls.getAnnotation((Class) ClassLiteral<Properties>.Value); Platform[] platformArray1; if (properties1 == null) { Platform platform1 = (Platform) cls.getAnnotation((Class) ClassLiteral<Platform>.Value); if (platform1 == null) return; Platform[] platformArray2 = new Platform[1]; int index = 0; Platform platform2 = platform1; platformArray2[index] = platform2; platformArray1 = platformArray2; } else platformArray1 = properties1.value(); string[] strArray1 = new string[0]; string[] strArray2 = new string[0]; string[] strArray3 = new string[0]; string[] strArray4 = new string[0]; string[] strArray5 = new string[0]; string[] strArray6 = new string[0]; string[] strArray7 = new string[0]; string[] strArray8 = new string[0]; string[] strArray9 = new string[0]; Platform[] platformArray3 = platformArray1; int length1 = platformArray3.Length; for (int index1 = 0; index1 < length1; ++index1) { Platform platform = platformArray3[index1]; string[][] strArray10 = new string[2][]; int index2 = 0; string[] strArray11 = platform.value(); strArray10[index2] = strArray11; int index3 = 1; string[] strArray12 = platform.not(); strArray10[index3] = strArray12; string[][] strArray13 = strArray10; bool[] flagArray1 = new bool[2]; int index4 = 0; int num1 = 0; flagArray1[index4] = num1 != 0; int index5 = 1; int num2 = 0; flagArray1[index5] = num2 != 0; bool[] flagArray2 = flagArray1; for (int index6 = 0; index6 < strArray13.Length; ++index6) { string[] strArray14 = strArray13[index6]; int length2 = strArray14.Length; for (int index7 = 0; index7 < length2; ++index7) { string str = strArray14[index7]; if (String.instancehelper_startsWith(property1, str)) { flagArray2[index6] = true; break; } } } if ((strArray13[0].Length == 0 || flagArray2[0]) && (strArray13[1].Length == 0 || !flagArray2[1])) { if (platform.define().Length > 0) strArray1 = platform.define(); if (platform.include().Length > 0) strArray2 = platform.include(); if (platform.cinclude().Length > 0) strArray3 = platform.cinclude(); if (platform.includepath().Length > 0) strArray4 = platform.includepath(); if (platform.options().Length > 0) strArray5 = platform.options(); if (platform.linkpath().Length > 0) strArray6 = platform.linkpath(); if (platform.link().Length > 0) strArray7 = platform.link(); if (platform.preloadpath().Length > 0) strArray8 = platform.preloadpath(); if (platform.preload().Length > 0) strArray9 = platform.preload(); } } string property2 = properties.getProperty("path.separator"); Loader.appendProperty(properties, "generator.define", "\0", strArray1); Loader.appendProperty(properties, "generator.include", "\0", strArray2); Loader.appendProperty(properties, "generator.cinclude", "\0", strArray3); Loader.appendProperty(properties, "compiler.includepath", property2, strArray4); if (strArray5.Length > 0) { string property3 = properties.getProperty("compiler.options"); properties.setProperty("compiler.options", ""); for (int index1 = 0; index1 < strArray5.Length; ++index1) { string str1 = property3; if (String.instancehelper_length(strArray5[index1]) > 0) str1 = properties.getProperty(new StringBuilder().append("compiler.options.").append(strArray5[index1]).toString()); Properties properties2 = properties; string name = "compiler.options"; string separator = " "; string[] strArray10 = new string[1]; int index2 = 0; string str2 = str1; strArray10[index2] = str2; Loader.appendProperty(properties2, name, separator, strArray10); } } Loader.appendProperty(properties, "compiler.linkpath", property2, strArray6); Loader.appendProperty(properties, "compiler.link", property2, strArray7); Loader.appendProperty(properties, "loader.preloadpath", property2, strArray6); Loader.appendProperty(properties, "loader.preloadpath", property2, strArray8); Loader.appendProperty(properties, "loader.preload", property2, strArray7); Loader.appendProperty(properties, "loader.preload", property2, strArray9); }
private void BgWorkerClean_DoWork(object sender, DoWorkEventArgs e) { DictionaryData DictData = (DictionaryData)e.Argument; //report what we're working on FilenameLabel.Invoke((MethodInvoker) delegate { FilenameLabel.Text = "Loading CoreNLP models... please wait..."; }); //largely taken from here: https://github.com/sergey-tihon/Stanford.NLP.NET/issues/39 var jarRoot = @"stanford-corenlp-full-2018-02-27\"; var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); props.setProperty("sutime.binders", "0"); var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(Path.Combine(Path.GetDirectoryName(AppDomain.CurrentDomain.BaseDirectory), jarRoot)); var pipeline = new StanfordCoreNLP(props); //selects the text encoding based on user selection Encoding SelectedEncoding = null; this.Invoke((MethodInvoker) delegate() { SelectedEncoding = Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString()); }); //get the list of files var SearchDepth = SearchOption.TopDirectoryOnly; if (ScanSubfolderCheckbox.Checked) { SearchDepth = SearchOption.AllDirectories; } var files = Directory.EnumerateFiles(DictData.TextFileFolder, "*.txt", SearchDepth); //try //{ //open up the output file using (StreamWriter outputFile = new StreamWriter(new FileStream(DictData.OutputFileLocation, FileMode.Create), SelectedEncoding)) { using (StreamWriter outputFileSentences = new StreamWriter(new FileStream(AddSuffix(DictData.OutputFileLocation, "_Sentences"), FileMode.Create), SelectedEncoding)) { //write the header row to the output file StringBuilder HeaderString = new StringBuilder(); HeaderString.Append("\"Filename\",\"Sentences\",\"Classification\",\"Classification_M\",\"Classification_SD\""); outputFile.WriteLine(HeaderString.ToString()); StringBuilder HeaderStringSentence = new StringBuilder(); HeaderStringSentence.Append("\"Filename\",\"SentNumber\",\"SentenceText\",\"Classification\",\"Class_Prob\",\"Class_Number\""); outputFileSentences.WriteLine(HeaderStringSentence.ToString()); foreach (string fileName in files) { //set up our variables to report string Filename_Clean = Path.GetFileName(fileName); Dictionary <string, int> DictionaryResults = new Dictionary <string, int>(); //report what we're working on FilenameLabel.Invoke((MethodInvoker) delegate { FilenameLabel.Text = "Analyzing: " + Filename_Clean; }); //read in the text file, convert everything to lowercase string InputText = System.IO.File.ReadAllText(fileName, SelectedEncoding).Trim(); // _ _ _____ _ // / \ _ __ __ _| |_ _ _______ |_ _|____ _| |_ // / _ \ | '_ \ / _` | | | | |_ / _ \ | |/ _ \ \/ / __| // / ___ \| | | | (_| | | |_| |/ / __/ | | __/> <| |_ // /_/ \_\_| |_|\__,_|_|\__, /___\___| |_|\___/_/\_\\__| // |___/ var annotation = new edu.stanford.nlp.pipeline.Annotation(InputText); pipeline.annotate(annotation); List <double> SentimentValues = new List <double>(); var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList; int SentenceCount = 0; foreach (CoreMap sentence in sentences) { SentenceCount++; Tree tree = sentence.get(new SentimentCoreAnnotations.SentimentAnnotatedTree().getClass()) as Tree; //add this sentence to our overall list of sentiment scores SentimentValues.Add(RNNCoreAnnotations.getPredictedClass(tree)); // __ __ _ _ ___ _ _ // \ \ / / __(_) |_ ___ / _ \ _ _| |_ _ __ _ _| |_ // \ \ /\ / / '__| | __/ _ \ | | | | | | | __| '_ \| | | | __| // \ V V /| | | | || __/ | |_| | |_| | |_| |_) | |_| | |_ // \_/\_/ |_| |_|\__\___| \___/ \__,_|\__| .__/ \__,_|\__| // |_| string[] OutputString_SentenceLevel = new string[6]; string Classification = GetClassification((double)RNNCoreAnnotations.getPredictedClass(tree)); OutputString_SentenceLevel[0] = "\"" + Filename_Clean + "\""; OutputString_SentenceLevel[1] = SentenceCount.ToString(); OutputString_SentenceLevel[2] = "\"" + sentence.ToString().Replace("\"", "\"\"") + "\""; OutputString_SentenceLevel[3] = Classification; OutputString_SentenceLevel[4] = RNNCoreAnnotations.getPredictedClassProb(tree.label()).ToString(); OutputString_SentenceLevel[5] = RNNCoreAnnotations.getPredictedClass(tree).ToString(); outputFileSentences.WriteLine(String.Join(",", OutputString_SentenceLevel)); } //write output at the file level string[] OutputString = new string[5]; OutputString[0] = "\"" + Filename_Clean + "\""; OutputString[1] = SentenceCount.ToString(); OutputString[2] = GetClassification(SentimentValues.Average()); OutputString[3] = SentimentValues.Average().ToString(); OutputString[4] = StandardDeviation(SentimentValues).ToString(); outputFile.WriteLine(String.Join(",", OutputString)); } //this is the closing bracket for the sentence-level "using" filestream } //this is the closing bracket for the document-level "using" filestream } //} //catch //{ // MessageBox.Show("Senti-Gent encountered an issue somewhere while trying to analyze your texts. The most common cause of this is trying to open your output file while Senti-Gent is still running. Did any of your input files move, or is your output file being opened/modified by another application?", "Error while analyzing", MessageBoxButtons.OK, MessageBoxIcon.Error); //} }
public void StanfordCoreNlpDemoManualConfiguration() { Console.WriteLine(Environment.CurrentDirectory); const string Text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply."; // Annotation pipeline configuration var props = new Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("pos.model", Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger")); props.setProperty("ner.model", Config.GetModel(@"ner\english.all.3class.distsim.crf.ser.gz")); props.setProperty("parse.model", Config.GetModel(@"lexparser\englishPCFG.ser.gz")); props.setProperty("dcoref.demonym", Config.GetModel(@"dcoref\demonyms.txt")); props.setProperty("dcoref.states", Config.GetModel(@"dcoref\state-abbreviations.txt")); props.setProperty("dcoref.animate", Config.GetModel(@"dcoref\animate.unigrams.txt")); props.setProperty("dcoref.inanimate", Config.GetModel(@"dcoref\inanimate.unigrams.txt")); props.setProperty("dcoref.male", Config.GetModel(@"dcoref\male.unigrams.txt")); props.setProperty("dcoref.neutral", Config.GetModel(@"dcoref\neutral.unigrams.txt")); props.setProperty("dcoref.female", Config.GetModel(@"dcoref\female.unigrams.txt")); props.setProperty("dcoref.plural", Config.GetModel(@"dcoref\plural.unigrams.txt")); props.setProperty("dcoref.singular", Config.GetModel(@"dcoref\singular.unigrams.txt")); props.setProperty("dcoref.countries", Config.GetModel(@"dcoref\countries")); props.setProperty("dcoref.extra.gender", Config.GetModel(@"dcoref\namegender.combine.txt")); props.setProperty("dcoref.states.provinces", Config.GetModel(@"dcoref\statesandprovinces")); props.setProperty("dcoref.singleton.predictor", Config.GetModel(@"dcoref\singleton.predictor.ser")); props.setProperty("dcoref.big.gender.number", Config.GetModel(@"dcoref\gender.data.gz")); var sutimeRules = new[] { Config.GetModel(@"sutime\defs.sutime.txt"), Config.GetModel(@"sutime\english.holidays.sutime.txt"), Config.GetModel(@"sutime\english.sutime.txt") }; props.setProperty("sutime.rules", String.Join(",", sutimeRules)); props.setProperty("sutime.binders", "0"); var pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLP(props); // Annotation var annotation = new Annotation(Text); pipeline.annotate(annotation); // Result - Pretty Print using (var stream = new ByteArrayOutputStream()) { pipeline.prettyPrint(annotation, new PrintWriter(stream)); Console.WriteLine(stream.toString()); } this.CustomAnnotationPrint(annotation); }
public void extractTime(string text) { sentenceInput = text; string presentDate = "2015-10-10"; string curr = Environment.CurrentDirectory; var jarRoot = curr + @"\stanford-corenlp-3.5.2-models"; var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models"; // Annotation pipeline configuration var pipeline = new AnnotationPipeline(); pipeline.addAnnotator(new TokenizerAnnotator(false)); pipeline.addAnnotator(new WordsToSentencesAnnotator(false)); // SUTime configuration var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt," + modelsDirectory + @"\sutime\english.holidays.sutime.txt," + modelsDirectory + @"\sutime\english.sutime.txt"; var props = new Properties(); props.setProperty("sutime.rules", sutimeRules); props.setProperty("sutime.binders", "0"); props.setProperty("sutime.markTimeRanges", "true"); props.setProperty("sutime.includeRange", "true"); pipeline.addAnnotator(new TimeAnnotator("sutime", props)); // Sample text for time expression extraction var annotation = new Annotation(text); annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate); pipeline.annotate(annotation); // Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass())); var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList; foreach (CoreMap cm in timexAnnsAll) { var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression; string typeTimex = time.getTemporal().getTimexType().toString(); if (typeTimex.ToLower() == "duration") { typeTime = "tPeriod"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date") { string textOftime = time.getText().ToString(); char[] delimiterChars = { ' ' }; string[] words = textOftime.Split(delimiterChars); string mainword = words[0]; var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"); var sentences = MaxentTagger.tokenizeText(new StringReader(text)); var first = sentences.get(0) as ArrayList; int size = first.size(); int i = 0; int index = -3; while (i < size) { if (first.get(i).ToString() == mainword) index = i; i++; } var taggedSentence = tagger.tagSentence(first); string checker = taggedSentence.get(index - 1).ToString(); if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in") { typeTime = "tTrigger"; valueTime = "Start : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else if (checker.ToLower() == "before/in") { if (typeTimex == "TIME") { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toISOString(); Console.WriteLine(valueTime); } else { DateTime result = new DateTime(); DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); string dt = time.getTemporal().toString(); char[] delimiter = { '-', '-', '-' }; string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter); int count = partsOfDate.Length; if (count == 1) { result = Convert.ToDateTime("01-01-" + partsOfDate[0]); } if (count == 2) { result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]); } // result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture); int comp = DateTime.Compare(current, result); if (comp < 0) { typeTime = "tTrigger"; valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString(); Console.WriteLine(valueTime); } else { typeTime = "tTrigger"; valueTime = "End : " + time.getTemporal().toString(); Console.WriteLine(valueTime); } } } else { typeTime = "tStamp"; valueTime = time.getTemporal().toISOString(); Console.WriteLine(valueTime); } } } }
public static System.Collections.ArrayList parse(string filename) { string fileLocation = LUAttributes.filesPathFromRoot + filename; //get metadata Document document = getMetadata(fileLocation); if (document == null) { return(null); } // Path to the folder with models extracted from `stanford-corenlp-3.9.2-models.jar` var jarRoot = Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..\\..\\"), @"Code\\stanford"); var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit,pos, lemma"); props.setProperty("ssplit.newlineIsSentenceBreak", "two"); props.setProperty("tokenize.keepeol", "true"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; Directory.SetCurrentDirectory(jarRoot); var pipeline = new StanfordCoreNLP(props); Directory.SetCurrentDirectory(curDir); // assign lines and columns string[] lines = readAllLinesSkipMD(fileLocation); List <int> linenumbers = new List <int>(); List <int> columnnumbers = new List <int>(); List <CoreMap> blankSentencesList = new List <CoreMap>(); java.util.ArrayList lineTokens; CoreLabel firstTokenInLine; int firstTokenInLinePos = 0; int columnInLine = 0; int lineNumber = 0; foreach (string line in lines) { Annotation annotation2 = new Annotation(line); pipeline.annotate(annotation2); lineTokens = (java.util.ArrayList)annotation2.get(typeof(CoreAnnotations.TokensAnnotation)); if (line.Trim() != "") { firstTokenInLine = (CoreLabel)lineTokens.get(0); firstTokenInLinePos = firstTokenInLine.beginPosition(); } else { firstTokenInLinePos = 0; } foreach (CoreLabel token2 in lineTokens) { columnInLine = token2.beginPosition() - firstTokenInLinePos; token2.set(typeof(CoreAnnotations.LineNumberAnnotation), lineNumber); blankSentencesList.Add(token2); linenumbers.Add(lineNumber); columnnumbers.Add(columnInLine); } lineNumber += 1; } // skip metadata string mdEnd = LUAttributes.metadataEnd; string txt = File.ReadAllText(Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..\\..\\"), fileLocation)); txt = txt.Substring(txt.IndexOf(mdEnd) + mdEnd.Length + 2); var annotation = new Annotation(txt); pipeline.annotate(annotation); //build datatable DataTable wordIndex = new DataTable("wordIndex"); wordIndex.Columns.Add(new DataColumn("value", typeof(string))); wordIndex.Columns.Add(new DataColumn("length", typeof(int))); wordIndex.Columns.Add(new DataColumn("isSymbol", typeof(int))); wordIndex.Columns.Add(new DataColumn("lemma", typeof(string))); wordIndex.Columns.Add(new DataColumn("chapter", typeof(int))); wordIndex.Columns.Add(new DataColumn("line", typeof(int))); wordIndex.Columns.Add(new DataColumn("column", typeof(int))); wordIndex.Columns.Add(new DataColumn("sentence", typeof(int))); wordIndex.Columns.Add(new DataColumn("wordOrdinal", typeof(int))); wordIndex.Columns.Add(new DataColumn("tokenOrdinal", typeof(int))); //extract words into datatable var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as java.util.ArrayList; string word = ""; int indexInSentence = 0, indexOfWordInSentence = 0, chapter = 1, isAposSkip = 0, lastTokenLine = -1; int sentenceCounter = 0; DataRow dr; CoreLabel token; int wordNumInText = 0; foreach (CoreMap sentence in sentences) { indexInSentence = 0; indexOfWordInSentence = 0; var tokens = sentence.get(new CoreAnnotations.TokensAnnotation().getClass()) as java.util.ArrayList; for (int i = 0; i < tokens.size(); i++) { token = (CoreLabel)(tokens.get(i)); isAposSkip = 0; //check if next token contains an apostrophe - also check if both current and next tokens are not symbols if (i + 1 < tokens.size() && ((CoreLabel)tokens.get(i + 1)).word().IndexOf("'") >= 0 && !isSymbol(token.word()) && !isSymbol(((CoreLabel)tokens.get(i + 1)).word())) { //concat words word = String.Concat(token.word(), ((CoreLabel)tokens.get(i + 1)).word()); //skip next word i++; wordNumInText++; isAposSkip = 1; } else { word = token.word(); } //add newline rows while (lastTokenLine + 1 < linenumbers[wordNumInText]) { lastTokenLine++; dr = wordIndex.NewRow(); dr["value"] = "\\n"; dr["length"] = 2; dr["isSymbol"] = 1; dr["lemma"] = "nolemma"; dr["chapter"] = chapter; dr["line"] = lastTokenLine; dr["column"] = 0; dr["sentence"] = -1; dr["wordOrdinal"] = -1; dr["tokenOrdinal"] = 0; wordIndex.Rows.Add(dr); } //check if new chapter if (word == LUAttributes.chapterMark) { chapter++; sentenceCounter = -1; indexInSentence = -1; } dr = wordIndex.NewRow(); word = word.Replace("''", "\"").Replace("``", "\"").Replace("-LRB-", "(").Replace("-RRB-", ")"); dr["value"] = word; dr["length"] = word.Length; dr["isSymbol"] = isSymbol(word); dr["chapter"] = chapter; dr["line"] = linenumbers[wordNumInText]; dr["column"] = columnnumbers[wordNumInText - isAposSkip]; dr["sentence"] = sentenceCounter; dr["tokenOrdinal"] = indexInSentence; if (!isSymbol(word)) { dr["wordOrdinal"] = indexOfWordInSentence; dr["lemma"] = token.lemma().ToLower(); } else { dr["wordOrdinal"] = -1; dr["lemma"] = "nolemma"; } wordIndex.Rows.Add(dr); lastTokenLine = linenumbers[wordNumInText]; wordNumInText++; indexInSentence++; if (!isSymbol(word)) { indexOfWordInSentence++; } } sentenceCounter++; } System.Collections.ArrayList parsedDocList = new System.Collections.ArrayList(); parsedDocList.Add(document); parsedDocList.Add(wordIndex); return(parsedDocList); }
//使用nlp將文章分析後回傳key private List<string> nlp(string sentence) { List<string> return_key = new List<string>(); string Relay_file = ".\\xml"; string Relay_name = "Relay.xml"; string Relay_path = Relay_file+ "\\" + Relay_name; // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar` var jarRoot = @"stanford-corenlp-3.5.2-models\"; // Annotation pipeline configuration var props = new java.util.Properties(); props.setProperty("ner.useSUTime", "false"); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref"); props.setProperty("sutime.binders", "0"); // We should change current directory, so StanfordCoreNLP could find all the model files automatically var curDir = Environment.CurrentDirectory; System.IO.Directory.SetCurrentDirectory(jarRoot); var pipeline = new StanfordCoreNLP(props); System.IO.Directory.SetCurrentDirectory(curDir); // Annotation var annotation = new Annotation(sentence); pipeline.annotate(annotation); //輸出nlp分析結果至Relay.xml FileOutputStream os = new FileOutputStream(new File(Relay_file, Relay_name)); pipeline.xmlPrint(annotation, os); os.close(); //呼叫ner將單字組合為有意義的key組裝 foreach(string k in ner(Relay_path)) { return_key.Add(k); } return return_key; }