Esempio n. 1
0
        /// <summary>
        /// Setup extended tagger that includes POS, lemma and entity analysis
        /// </summary>
        private void SetupExtendedTagger()
        {
            PerformanceTester.StartMET("NLP");
            // Get path to Stanford NLP models
            var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models");

            // Turn off logging
            RedwoodConfiguration.current().clear().apply();
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
            // Makes Named Entity Recognition work in the library
            props.setProperty("ner.useSUTime", "0");
            props.put("ner.applyFineGrained", "0");
            props.put("ner.fine.regexner.mapping", jarRoot + @"\edu\stanford\nlp\models\kbp\english\");
            // Set current directory
            var curDir          = Environment.CurrentDirectory;
            var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models";

            Directory.SetCurrentDirectory(jarRoot);

            // Load Stanford NLP
            Tagger = new StanfordCoreNLP(props);
            PerformanceTester.StopMET("NLP");
        }
        private Annotation PrepareAnnotation()
        {
            var props = new Properties();
            props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger");
            props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz");
            props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz");
            props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
            props.setProperty("sutime.binders", "0");
            props.setProperty("ner.useSUTime", "0");

            var pipeline = new StanfordCoreNLP(props);
            var annotatedText = new Annotation(text);
            pipeline.annotate(annotatedText);
            return annotatedText;
        }
        public void Initialize()
        {
            var props = new java.util.Properties();


            props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");

            //if we're using an external sentence segmentation strategy, then this is how we're going to do it
            //https://stackoverflow.com/a/28017131
            if (!useBuiltInSentenceSplitter)
            {
                props.put("ssplit.isOneSentence", "true");
            }



            props.setProperty("sutime.binders", "0");
            Directory.SetCurrentDirectory(jarRoot);
            pipeline = new StanfordCoreNLP(props);
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            var host = "192.168.33.12:9092";
            var topic = "test2";
            var count = 50000000;

            var prop = new Properties();
            prop.put("bootstrap.servers", host);
            prop.put("group.id", "test3");
            prop.put("auto.offset.reset", "earliest");
            prop.put("enable.auto.commit", "true");
            prop.put("auto.commit.interval.ms", "1000");
            prop.put("socket.receive.buffer.bytes", (2*1024*1024).ToString());
            prop.put("fetch.message.max.bytes", (1024*1024).ToString());

            var c = new KafkaConsumer(prop, new ByteArrayDeserializer(), new ByteArrayDeserializer());
            
            var topics = new ArrayList(1);
            topics.add(topic);
            var time = DateTime.UtcNow;
            c.subscribe(topics);
            var bytes = 0;
            var i = count;
            var recordCount = 0;
            while (i > 0)
            {
                var r = c.poll(1000);
                var records = r.records(topic);
                for (var it = records.iterator(); it.hasNext() && i > 0; i--, recordCount++)
                {
                    var rec = (ConsumerRecord)it.next();
                    var b = (byte[]) rec.value();
                    bytes += b.Length;
                }
                Console.WriteLine(recordCount);
            }
            var mb = bytes / 1024.0 / 1024.0;
            var seconds = (DateTime.UtcNow - time).TotalSeconds;
            Console.WriteLine($"{mb / seconds} MB/sec");
            Console.WriteLine($"{count / seconds} Msg/sec");
        }