/// <summary> /// Setup extended tagger that includes POS, lemma and entity analysis /// </summary> private void SetupExtendedTagger() { PerformanceTester.StartMET("NLP"); // Get path to Stanford NLP models var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models"); // Turn off logging RedwoodConfiguration.current().clear().apply(); var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner"); // Makes Named Entity Recognition work in the library props.setProperty("ner.useSUTime", "0"); props.put("ner.applyFineGrained", "0"); props.put("ner.fine.regexner.mapping", jarRoot + @"\edu\stanford\nlp\models\kbp\english\"); // Set current directory var curDir = Environment.CurrentDirectory; var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models"; Directory.SetCurrentDirectory(jarRoot); // Load Stanford NLP Tagger = new StanfordCoreNLP(props); PerformanceTester.StopMET("NLP"); }
private Annotation PrepareAnnotation() { var props = new Properties(); props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger"); props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz"); props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz"); props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt"); props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse"); props.setProperty("sutime.binders", "0"); props.setProperty("ner.useSUTime", "0"); var pipeline = new StanfordCoreNLP(props); var annotatedText = new Annotation(text); pipeline.annotate(annotatedText); return annotatedText; }
public void Initialize() { var props = new java.util.Properties(); props.setProperty("annotators", "tokenize, ssplit, parse, sentiment"); //if we're using an external sentence segmentation strategy, then this is how we're going to do it //https://stackoverflow.com/a/28017131 if (!useBuiltInSentenceSplitter) { props.put("ssplit.isOneSentence", "true"); } props.setProperty("sutime.binders", "0"); Directory.SetCurrentDirectory(jarRoot); pipeline = new StanfordCoreNLP(props); }
static void Main(string[] args) { var host = "192.168.33.12:9092"; var topic = "test2"; var count = 50000000; var prop = new Properties(); prop.put("bootstrap.servers", host); prop.put("group.id", "test3"); prop.put("auto.offset.reset", "earliest"); prop.put("enable.auto.commit", "true"); prop.put("auto.commit.interval.ms", "1000"); prop.put("socket.receive.buffer.bytes", (2*1024*1024).ToString()); prop.put("fetch.message.max.bytes", (1024*1024).ToString()); var c = new KafkaConsumer(prop, new ByteArrayDeserializer(), new ByteArrayDeserializer()); var topics = new ArrayList(1); topics.add(topic); var time = DateTime.UtcNow; c.subscribe(topics); var bytes = 0; var i = count; var recordCount = 0; while (i > 0) { var r = c.poll(1000); var records = r.records(topic); for (var it = records.iterator(); it.hasNext() && i > 0; i--, recordCount++) { var rec = (ConsumerRecord)it.next(); var b = (byte[]) rec.value(); bytes += b.Length; } Console.WriteLine(recordCount); } var mb = bytes / 1024.0 / 1024.0; var seconds = (DateTime.UtcNow - time).TotalSeconds; Console.WriteLine($"{mb / seconds} MB/sec"); Console.WriteLine($"{count / seconds} Msg/sec"); }