Inheritance: global::java.util.Hashtable
Example #1
0
        /// <summary>
        /// Setup extended tagger that includes POS, lemma and entity analysis
        /// </summary>
        private void SetupExtendedTagger()
        {
            PerformanceTester.StartMET("NLP");
            // Get path to Stanford NLP models
            var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models");

            // Turn off logging
            RedwoodConfiguration.current().clear().apply();
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
            // Makes Named Entity Recognition work in the library
            props.setProperty("ner.useSUTime", "0");
            props.put("ner.applyFineGrained", "0");
            props.put("ner.fine.regexner.mapping", jarRoot + @"\edu\stanford\nlp\models\kbp\english\");
            // Set current directory
            var curDir          = Environment.CurrentDirectory;
            var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models";

            Directory.SetCurrentDirectory(jarRoot);

            // Load Stanford NLP
            Tagger = new StanfordCoreNLP(props);
            PerformanceTester.StopMET("NLP");
        }
Example #2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="jarRootPath">Path to the folder with models extracted from 'stanford-corenlp-3.5.2-models.jar'</param>
        public StanfordLemmatizer(string jarRootPath)
        {
            if (!Directory.Exists(jarRootPath))
            {
                string fullPath = Path.GetFullPath(jarRootPath);
                throw new DirectoryNotFoundException("Folder(s) extracted from 'stanford-corenlp-3.5.2-models.jar' was not found in path: . " +
                                                     "-->" + fullPath + "<--. " +
                                                     "Please make sure correct path is listed in .config file.");
            }

            // Set properties required for lemma
            java.util.Properties props = new java.util.Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            string curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRootPath);
            _pipeLine = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);

            // Instantiate annotation
            _sentencesAnnotation = new CoreAnnotations.SentencesAnnotation();
            _tokensAnnotation    = new CoreAnnotations.TokensAnnotation();
            _lemmaAnnotation     = new CoreAnnotations.LemmaAnnotation();
        }
        public Task SendEMail()
        {
            // Z:\jsc.svn\examples\java\hybrid\ubuntu\UbuntuCommonsEmail\Program.cs
            // https://developers.google.com/appengine/docs/java/mail/usingjavamail

            Properties props = new Properties();
            Session session = Session.getDefaultInstance(props, null);


            try
            {
                Message msg = new MimeMessage(session);
                //msg.setFrom(new InternetAddress("*****@*****.**", "Example.com Admin"));
                msg.setFrom(new InternetAddress(FromAddress, FromName));
                //msg.addRecipient(Message.RecipientType.TO, new InternetAddress("*****@*****.**", "Mr. User"));
                msg.addRecipient(Message.RecipientType.TO, new InternetAddress(ToAddress, ToName));
                //msg.setSubject("Your Example.com account has been activated");
                msg.setSubject(Subject);
                //msg.setText(msgBody);
                msg.setText(MessageString);
                Transport.send(msg);

            }
            catch
            {
                Console.WriteLine("fail!");
            }


            return Task.FromResult(default(object));

        }
Example #4
0
        static void Main(string[] args)
        {
            var host = "192.168.33.12:9092";
            var topic = "test2";
            var count = 50000000;
            var size = 100;

            var prop = new Properties();
            prop.setProperty("bootstrap.servers", "192.168.33.12:9092");
            prop.setProperty("acks", "1");
            prop.setProperty("buffer.memory", "67108864");
            prop.setProperty("batch.size", "8196");
            
            var producer = new KafkaProducer(prop, new ByteArraySerializer(), new ByteArraySerializer());
            var payload = new byte[size];
            for (int i = 0; i < size; i++)
                payload[i] = (byte)'a';

            var record = new ProducerRecord(topic, payload);
            var stats = new Stats(count, 5000, Console.WriteLine);

            for (int i = 0; i < count; i++)
            {
                //var payload = Encoding.UTF8.GetBytes(i.ToString());
                //var record = new ProducerRecord(topic, payload);
                var sendStart = DateTimeExtensions.CurrentTimeMillis();
                var cb = new StatsCallback { Action = stats.NextCompletion(sendStart, payload.Length, stats) };
                producer.send(record, cb);
            }
            producer.close();
            stats.PrintTotal();
        }
Example #5
0
        static void Main()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.7.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2016-10-31\models";

            // Text for processing
            var text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(text);
            pipeline.annotate(annotation);

            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
                stream.close();
            }
        }
Example #6
0
        public void StanfordCoreNlpDemoThatChangeCurrentDirectory()
        {
            const string Text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // we should change current directory so StanfordCoreNLP could find all the model files
            var curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(Config.JarRoot);
            var pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(Text);
            pipeline.annotate(annotation);
    
            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
            }

            this.CustomAnnotationPrint(annotation);
        }
        static void Main()
        {
            // Path to the folder with models
            var segmenterData = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-segmenter-2015-12-09\data";
            var sampleData = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-segmenter-2015-12-09\test.simp.utf8";

            // `test.simple.utf8` contains following text:
            // 面对新世纪,世界各国人民的共同愿望是:继续发展人类以往创造的一切文明成果,克服20世纪困扰着人类的战争和贫
            // 困问题,推进和平与发展的崇高事业,创造一个美好的世界。

            // This is a very simple demo of calling the Chinese Word Segmenter programmatically.
            // It assumes an input file in UTF8. This will run correctly in the distribution home
            // directory. To run in general, the properties for where to find dictionaries or
            // normalizations have to be set.
            // @author Christopher Manning

            // Setup Segmenter loading properties
            var props = new Properties();
            props.setProperty("sighanCorporaDict", segmenterData);
            // Lines below are needed because CTBSegDocumentIteratorFactory accesses it
            props.setProperty("serDictionary", segmenterData + @"\dict-chris6.ser.gz");
            props.setProperty("testFile", sampleData);
            props.setProperty("inputEncoding", "UTF-8");
            props.setProperty("sighanPostProcessing", "true");

            // Load Word Segmenter
            var segmenter = new CRFClassifier(props);
            segmenter.loadClassifierNoExceptions(segmenterData + @"\ctb.gz", props);
            segmenter.classifyAndWriteAnswers(sampleData);
        }
Example #8
0
 public Generator(Properties properties, PrintWriter writer)
 {
   base.\u002Ector();
   Generator generator = this;
   this.properties = properties;
   this.file = (File) null;
   this.writer = writer;
 }
Example #9
0
 public Generator(Properties properties, File file)
 {
   base.\u002Ector();
   Generator generator = this;
   this.properties = properties;
   this.file = file;
   this.writer = (PrintWriter) null;
 }
        /// <summary>
        /// Annotation pipeline configuration. You can easily add more annotators here.
        /// </summary>
        /// <returns></returns>
        private Properties GetPipelineProperties()
        {            
            var props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner, entitymentions, sentiment");
            props.setProperty("ner.useSUTime", "0"); //Turns off NER's SUTime component.

            return props; 
        }
Example #11
0
        public void InitializeNLP()
        {
            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            Directory.SetCurrentDirectory(jarRoot);
            string curDir = Environment.CurrentDirectory;

            // Annotation pipeline configuration
            java.util.Properties props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
            props.setProperty("ner.useSUTime", "0");

            pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);
        }
        private Annotation PrepareAnnotation()
        {
            var props = new Properties();
            props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger");
            props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz");
            props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz");
            props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
            props.setProperty("sutime.binders", "0");
            props.setProperty("ner.useSUTime", "0");

            var pipeline = new StanfordCoreNLP(props);
            var annotatedText = new Annotation(text);
            pipeline.annotate(annotatedText);
            return annotatedText;
        }
Example #13
0
        private static void Main()
        {

            // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2015-12-09\models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // Loading POS Tagger and including them into pipeline
            var tagger = new MaxentTagger(modelsDirectory +
                                          @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";
            var props = new Properties();
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction
            var text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;
            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = cm.get(new CoreAnnotations.TokensAnnotation().getClass()) as List;
                var first = tokens.get(0);
                var last = tokens.get(tokens.size() - 1);
                var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;
                Console.WriteLine("{0} [from char offset {1} to {2}] --> {3}", cm, first, last, time.getTemporal());
            }
        }
Example #14
0
        private void ApplyGlobalConfig()
        {
            var config = Path.Combine(PreferencesFactory.get().getProperty("application.support.path"),
                                      "default.properties");

            if (File.Exists(config))
            {
                try
                {
                    var properties = new java.util.Properties();
                    properties.load(new FileInputStream(config));
                    defaults.putAll(properties);
                }
                catch (Exception e)
                {
                    Log.warn($"Failure while reading {config}", e);
                }
            }
        }
        public StanfordLemmatizer()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar`
            var jarRoot = @"C:\Work\NLP\Stanford\stanford-corenlp-full-2015-12-09\stanford-corenlp-3.6.0-models";
            _separator = Guid.NewGuid().ToString();

            // Text for processing
            // Annotation pipeline configuration
            var props = new Properties();
            //props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(jarRoot);
            _pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);
        }
Example #16
0
        private void ApplyGlobalConfig()
        {
            var config = Path.Combine(SupportDirectoryFinderFactory.get().find().getAbsolute(),
                                      "default.properties");

            if (File.Exists(config))
            {
                try
                {
                    var properties = new java.util.Properties();
                    properties.load(new FileInputStream(config));
                    this.setDefaults(properties);
                }
                catch (Exception e)
                {
                    Log.warn($"Failure while reading {config}", e);
                }
            }
        }
        private Annotation PrepareAnnotation(string text, DateTime currentDate)
        {
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));
            var tagger = new MaxentTagger(modelsDir + @"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));
            var sutimeRules = modelsDir + @"\sutime\defs.sutime.txt,"
                                       + modelsDir + @"\sutime\english.holidays.sutime.txt,"
                                       + modelsDir + @"\sutime\english.sutime.txt";
            var props = new Properties();
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), currentDate.ToString("yyyy-MM-dd"));
            pipeline.annotate(annotation);
            return annotation;
        }
        public void Initialize()
        {
            var props = new java.util.Properties();


            props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");

            //if we're using an external sentence segmentation strategy, then this is how we're going to do it
            //https://stackoverflow.com/a/28017131
            if (!useBuiltInSentenceSplitter)
            {
                props.put("ssplit.isOneSentence", "true");
            }



            props.setProperty("sutime.binders", "0");
            Directory.SetCurrentDirectory(jarRoot);
            pipeline = new StanfordCoreNLP(props);
        }
Example #19
0
        static void Main(string[] args)
        {
            var host = "192.168.33.12:9092";
            var topic = "test2";
            var count = 50000000;

            var prop = new Properties();
            prop.put("bootstrap.servers", host);
            prop.put("group.id", "test3");
            prop.put("auto.offset.reset", "earliest");
            prop.put("enable.auto.commit", "true");
            prop.put("auto.commit.interval.ms", "1000");
            prop.put("socket.receive.buffer.bytes", (2*1024*1024).ToString());
            prop.put("fetch.message.max.bytes", (1024*1024).ToString());

            var c = new KafkaConsumer(prop, new ByteArrayDeserializer(), new ByteArrayDeserializer());
            
            var topics = new ArrayList(1);
            topics.add(topic);
            var time = DateTime.UtcNow;
            c.subscribe(topics);
            var bytes = 0;
            var i = count;
            var recordCount = 0;
            while (i > 0)
            {
                var r = c.poll(1000);
                var records = r.records(topic);
                for (var it = records.iterator(); it.hasNext() && i > 0; i--, recordCount++)
                {
                    var rec = (ConsumerRecord)it.next();
                    var b = (byte[]) rec.value();
                    bytes += b.Length;
                }
                Console.WriteLine(recordCount);
            }
            var mb = bytes / 1024.0 / 1024.0;
            var seconds = (DateTime.UtcNow - time).TotalSeconds;
            Console.WriteLine($"{mb / seconds} MB/sec");
            Console.WriteLine($"{count / seconds} Msg/sec");
        }
Example #20
0
        //*************************************************/
        // METHODS
        //*************************************************/
        #region Methods

        /// <summary>
        /// Setup tagger including POS
        /// </summary>
        private void SetupTagger()
        {
            PerformanceTester.StartMET("NLP");
            // Get path to Stanford NLP models
            var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models");

            // Turn off logging
            RedwoodConfiguration.current().clear().apply();
            // Set properties
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos");
            // Set current directory
            var curDir          = Environment.CurrentDirectory;
            var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models";

            Directory.SetCurrentDirectory(jarRoot);
            // Load Stanford NLP
            Tagger = new StanfordCoreNLP(props);
            PerformanceTester.StopMET("NLP");
        }
Example #21
0
 public static void appendProperty(Properties properties, string name, string separator, params string[] values)
 {
   if (values == null || values.Length == 0)
     return;
   string str1 = "";
   string[] strArray = values;
   int length = strArray.Length;
   for (int index = 0; index < length; ++index)
   {
     string str2 = strArray[index];
     if (str2 != null && String.instancehelper_length(str2) != 0)
     {
       if (String.instancehelper_length(str1) > 0 && !String.instancehelper_endsWith(str1, separator))
         str1 = new StringBuilder().append(str1).append(separator).toString();
       str1 = new StringBuilder().append(str1).append(str2).toString();
     }
   }
   string property = properties.getProperty(name, "");
   if (String.instancehelper_length(property) > 0)
     str1 = new StringBuilder().append(str1).append(separator).toString();
   properties.setProperty(name, new StringBuilder().append(str1).append(property).toString());
 }
Example #22
0
        public void SUTimeDefautTest()
        {
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new PTBTokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            var tagger =
                new MaxentTagger(
                    Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"));
            pipeline.addAnnotator(new POSTaggerAnnotator(tagger));

            var sutimeRules = new[] {
                                      Config.GetModel(@"sutime\defs.sutime.txt"),
                                      Config.GetModel(@"sutime\english.holidays.sutime.txt"),
                                      Config.GetModel(@"sutime\english.sutime.txt")
                                  };

            var props = new Properties();
            props.setProperty("sutime.rules", String.Join(",", sutimeRules));
            props.setProperty("sutime.binders", "0");
            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            const string text = "Three interesting dates are 18 Feb 1997, the 20th of july and 4 days from today.";
            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), "2013-07-14");
            pipeline.annotate(annotation);

            Console.WriteLine(annotation.get(new CoreAnnotations.TextAnnotation().getClass())+"\n");
            var timexAnnsAll = (ArrayList)annotation.get(new TimeAnnotations.TimexAnnotations().getClass());
            foreach (CoreMap cm in timexAnnsAll)
            {
                var tokens = (java.util.List)cm.get(new CoreAnnotations.TokensAnnotation().getClass());
                var first = tokens.get(0);
                var last = tokens.get(tokens.size() - 1);
                var time = (TimeExpression)cm.get(new TimeExpression.Annotation().getClass());
                Console.WriteLine("{0} [from char offset '{1}' to '{2}'] --> {3}",
                    cm, first, last, (time.getTemporal()));
            }
        }
        private void ApplyGlobalConfig()
        {
            var config = Path.Combine(SupportDirectoryFinderFactory.get().find().getAbsolute(),
                                      "default.properties");

            if (File.Exists(config))
            {
                try
                {
                    var properties = new java.util.Properties();
                    properties.load(new FileInputStream(config));
                    foreach (var key in Utils.ConvertFromJavaList <String>(properties.keySet()))
                    {
                        setDefault(key, properties.getProperty(key));
                    }
                }
                catch (Exception e)
                {
                    Log.warn($"Failure while reading {config}", e);
                }
            }
        }
Example #24
0
        static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.WriteLine("usage: StanfordSegmenter.Csharp.Samples.exe filename");
                return;
            }

            var props = new Properties();
            props.setProperty("sighanCorporaDict", @"..\..\..\..\temp\stanford-segmenter-2013-06-20\data");
            // props.setProperty("NormalizationTable", @"..\..\..\..\temp\stanford-segmenter-2013-06-20\data\norm.simp.utf8");
            // props.setProperty("normTableEncoding", "UTF-8");
            // below is needed because CTBSegDocumentIteratorFactory accesses it
            props.setProperty("serDictionary", @"..\..\..\..\temp\stanford-segmenter-2013-06-20\data\dict-chris6.ser.gz");
            props.setProperty("testFile", args[0]);
            props.setProperty("inputEncoding", "UTF-8");
            props.setProperty("sighanPostProcessing", "true");

            var segmenter = new CRFClassifier(props);
            segmenter.loadClassifierNoExceptions(@"..\..\..\..\temp\stanford-segmenter-2013-06-20\data\ctb.gz", props);
            segmenter.classifyAndWriteAnswers(args[0]);
        }
Example #25
0
 public PropertyManager(File file)
 {
     serverProperties = new Properties();
     serverPropertiesFile = file;
     if (file.exists())
     {
         try
         {
             serverProperties.load(new FileInputStream(file));
         }
         catch (Exception exception)
         {
             logger.log(Level.WARNING, (new StringBuilder()).append("Failed to load ").append(file).toString(),
                        exception);
             generateNewProperties();
         }
     }
     else
     {
         logger.log(Level.WARNING, (new StringBuilder()).append(file).append(" does not exist").toString());
         generateNewProperties();
     }
 }
Example #26
0
 public static void Start(string modelLocation = null)
 {
     var curDir = Environment.CurrentDirectory;
     if (!string.IsNullOrEmpty(modelLocation))
     {
         _modelLocation = modelLocation;
     }
     try
     {
         // Annotation pipeline configuration
         var props = new Properties();
         props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
         props.setProperty("sutime.binders", "false");
         props.setProperty("ner.useSUTime", "false");
         // We should change current directory, so StanfordCoreNLP could find all the model files automatically
         Directory.SetCurrentDirectory(HostingEnvironment.MapPath(ModelLocation));
         pipeline = new StanfordCoreNLP(props);
     }
     finally
     {
         Directory.SetCurrentDirectory(curDir);
     }
 }
        public Task SendMailAsync(string from, string recipients, string subject, string body)
        {

            // http://stackoverflow.com/questions/6606529/package-javax-mail-and-javax-mail-internet-do-not-exist
            // partial build of Java and Java.AppEngine are missing it?

            //javax.servlet.Servlet
            var props = new Properties();
            var session = javax.mail.Session.getDefaultInstance(props, null);
            // https://developers.google.com/appengine/docs/java/mail/usingjavamail


            try
            {
                // email
                var msg = new javax.mail.internet.MimeMessage(session);
                msg.setFrom(new javax.mail.internet.InternetAddress(from, from));
                msg.addRecipient(javax.mail.Message.RecipientType.TO,
                                 new javax.mail.internet.InternetAddress(recipients, recipients));
                msg.setSubject(subject);
                msg.setText(body);

                javax.mail.Transport.send(msg);
            }
            catch
            {
                throw;
            }



            var x = new TaskCompletionSource<object>();

            // do we support async yet?
            return x.Task;
        }
        /// <summary>
        /// Train a Stanford NER model from a configuration file
        /// </summary>
        /// <param name="prop">Configuration file</param>
        public bool Train(string prop)
        {
            try
            {
                java.util.Properties props = new java.util.Properties();
                InputStream st = new BufferedInputStream(new FileInputStream(prop));
                InputStreamReader reader = new InputStreamReader(st, "utf-8");
                props.load(reader);
                _crfModel = new CRFClassifier(props);

                _crfModel.train();
                String serializeTo = _crfModel.flags.serializeTo;
                if (serializeTo != null)
                {
                    _crfModel.serializeClassifier(serializeTo);
                }

                return true;
            }
            catch (Exception e)
            {
                System.Console.WriteLine("Unable to train the Standford CRF model" + e.ToString());
                return false;

            }
        }
 public virtual void load(InputStream @in)
 {
   if (@in == null)
   {
     Throwable.__\u003CsuppressFillInStackTrace\u003E();
     throw new NullPointerException();
   }
   else
   {
     IOException ioException;
     try
     {
       BufferedInputStream bufferedInputStream = new BufferedInputStream(@in);
       Properties properties = new Properties();
       properties.load((InputStream) bufferedInputStream);
       ((Hashtable) this.getConfiguration()).putAll((Map) properties);
       bufferedInputStream.close();
       return;
     }
     catch (IOException ex)
     {
       int num = 1;
       ioException = (IOException) ByteCodeHelper.MapException<IOException>((Exception) ex, (ByteCodeHelper.MapFlags) num);
     }
     Log.warn((object) "Unable to read configuration", (Exception) ioException);
   }
 }
Example #30
0
        private void BgWorkerClean_DoWork(object sender, DoWorkEventArgs e)
        {
            DictionaryData DictData = (DictionaryData)e.Argument;


            //report what we're working on
            FilenameLabel.Invoke((MethodInvoker) delegate
            {
                FilenameLabel.Text = "Loading CoreNLP models... please wait...";
            });

            //largely taken from here: https://github.com/sergey-tihon/Stanford.NLP.NET/issues/39
            var jarRoot = @"stanford-corenlp-full-2018-02-27\";
            var props   = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
            props.setProperty("sutime.binders", "0");
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(Path.Combine(Path.GetDirectoryName(AppDomain.CurrentDomain.BaseDirectory), jarRoot));
            var pipeline = new StanfordCoreNLP(props);



            //selects the text encoding based on user selection
            Encoding SelectedEncoding = null;

            this.Invoke((MethodInvoker) delegate()
            {
                SelectedEncoding = Encoding.GetEncoding(EncodingDropdown.SelectedItem.ToString());
            });



            //get the list of files
            var SearchDepth = SearchOption.TopDirectoryOnly;

            if (ScanSubfolderCheckbox.Checked)
            {
                SearchDepth = SearchOption.AllDirectories;
            }
            var files = Directory.EnumerateFiles(DictData.TextFileFolder, "*.txt", SearchDepth);



            //try
            //{

            //open up the output file
            using (StreamWriter outputFile = new StreamWriter(new FileStream(DictData.OutputFileLocation, FileMode.Create), SelectedEncoding))
            {
                using (StreamWriter outputFileSentences = new StreamWriter(new FileStream(AddSuffix(DictData.OutputFileLocation, "_Sentences"), FileMode.Create), SelectedEncoding))
                {
                    //write the header row to the output file
                    StringBuilder HeaderString = new StringBuilder();
                    HeaderString.Append("\"Filename\",\"Sentences\",\"Classification\",\"Classification_M\",\"Classification_SD\"");

                    outputFile.WriteLine(HeaderString.ToString());

                    StringBuilder HeaderStringSentence = new StringBuilder();
                    HeaderStringSentence.Append("\"Filename\",\"SentNumber\",\"SentenceText\",\"Classification\",\"Class_Prob\",\"Class_Number\"");
                    outputFileSentences.WriteLine(HeaderStringSentence.ToString());

                    foreach (string fileName in files)
                    {
                        //set up our variables to report
                        string Filename_Clean = Path.GetFileName(fileName);
                        Dictionary <string, int> DictionaryResults = new Dictionary <string, int>();

                        //report what we're working on
                        FilenameLabel.Invoke((MethodInvoker) delegate
                        {
                            FilenameLabel.Text = "Analyzing: " + Filename_Clean;
                        });



                        //read in the text file, convert everything to lowercase
                        string InputText = System.IO.File.ReadAllText(fileName, SelectedEncoding).Trim();



                        //     _                _                 _____         _
                        //    / \   _ __   __ _| |_   _ _______  |_   _|____  _| |_
                        //   / _ \ | '_ \ / _` | | | | |_  / _ \   | |/ _ \ \/ / __|
                        //  / ___ \| | | | (_| | | |_| |/ /  __/   | |  __/>  <| |_
                        // /_/   \_\_| |_|\__,_|_|\__, /___\___|   |_|\___/_/\_\\__|
                        //                        |___/

                        var annotation = new edu.stanford.nlp.pipeline.Annotation(InputText);
                        pipeline.annotate(annotation);

                        List <double> SentimentValues = new List <double>();

                        var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList;

                        int SentenceCount = 0;

                        foreach (CoreMap sentence in sentences)
                        {
                            SentenceCount++;
                            Tree tree = sentence.get(new SentimentCoreAnnotations.SentimentAnnotatedTree().getClass()) as Tree;

                            //add this sentence to our overall list of sentiment scores
                            SentimentValues.Add(RNNCoreAnnotations.getPredictedClass(tree));

                            // __        __    _ _          ___        _               _
                            // \ \      / / __(_) |_ ___   / _ \ _   _| |_ _ __  _   _| |_
                            //  \ \ /\ / / '__| | __/ _ \ | | | | | | | __| '_ \| | | | __|
                            //   \ V  V /| |  | | ||  __/ | |_| | |_| | |_| |_) | |_| | |_
                            //    \_/\_/ |_|  |_|\__\___|  \___/ \__,_|\__| .__/ \__,_|\__|
                            //                                            |_|

                            string[] OutputString_SentenceLevel = new string[6];

                            string Classification = GetClassification((double)RNNCoreAnnotations.getPredictedClass(tree));


                            OutputString_SentenceLevel[0] = "\"" + Filename_Clean + "\"";
                            OutputString_SentenceLevel[1] = SentenceCount.ToString();
                            OutputString_SentenceLevel[2] = "\"" + sentence.ToString().Replace("\"", "\"\"") + "\"";
                            OutputString_SentenceLevel[3] = Classification;
                            OutputString_SentenceLevel[4] = RNNCoreAnnotations.getPredictedClassProb(tree.label()).ToString();
                            OutputString_SentenceLevel[5] = RNNCoreAnnotations.getPredictedClass(tree).ToString();

                            outputFileSentences.WriteLine(String.Join(",", OutputString_SentenceLevel));
                        }



                        //write output at the file level
                        string[] OutputString = new string[5];
                        OutputString[0] = "\"" + Filename_Clean + "\"";
                        OutputString[1] = SentenceCount.ToString();
                        OutputString[2] = GetClassification(SentimentValues.Average());
                        OutputString[3] = SentimentValues.Average().ToString();
                        OutputString[4] = StandardDeviation(SentimentValues).ToString();

                        outputFile.WriteLine(String.Join(",", OutputString));
                    }



                    //this is the closing bracket for the sentence-level "using" filestream
                }

                //this is the closing bracket for the document-level "using" filestream
            }

            //}
            //catch
            //{
            //    MessageBox.Show("Senti-Gent encountered an issue somewhere while trying to analyze your texts. The most common cause of this is trying to open your output file while Senti-Gent is still running. Did any of your input files move, or is your output file being opened/modified by another application?", "Error while analyzing", MessageBoxButtons.OK, MessageBoxIcon.Error);
            //}
        }
 public HierarchicalConfiguration()
 {
   base.\u002Ector();
   HierarchicalConfiguration hierarchicalConfiguration = this;
   this.configuration = new Properties();
 }
        public void extractTime(string text)
        {
            sentenceInput = text;
            string presentDate = "2015-10-10";
            string curr = Environment.CurrentDirectory;
            var jarRoot = curr + @"\stanford-corenlp-3.5.2-models";
            var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";

            // Annotation pipeline configuration
            var pipeline = new AnnotationPipeline();
            pipeline.addAnnotator(new TokenizerAnnotator(false));
            pipeline.addAnnotator(new WordsToSentencesAnnotator(false));

            // SUTime configuration
            var sutimeRules = modelsDirectory + @"\sutime\defs.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.holidays.sutime.txt,"
                              + modelsDirectory + @"\sutime\english.sutime.txt";

            var props = new Properties();

            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            props.setProperty("sutime.markTimeRanges", "true");
            props.setProperty("sutime.includeRange", "true");

            pipeline.addAnnotator(new TimeAnnotator("sutime", props));

            // Sample text for time expression extraction

            var annotation = new Annotation(text);
            annotation.set(new CoreAnnotations.DocDateAnnotation().getClass(), presentDate);
            pipeline.annotate(annotation);

            //  Console.WriteLine("{0}\n", annotation.get(new CoreAnnotations.TextAnnotation().getClass()));

            var timexAnnsAll = annotation.get(new TimeAnnotations.TimexAnnotations().getClass()) as ArrayList;
            foreach (CoreMap cm in timexAnnsAll)
            {

                var time = cm.get(new TimeExpression.Annotation().getClass()) as TimeExpression;

                string typeTimex = time.getTemporal().getTimexType().toString();
                if (typeTimex.ToLower() == "duration")
                {
                    typeTime = "tPeriod";
                    valueTime = time.getTemporal().toISOString();
                    Console.WriteLine(valueTime);
                }

                if (typeTimex.ToLower() == "time" || typeTimex.ToLower() == "date")
                {
                    string textOftime = time.getText().ToString();

                    char[] delimiterChars = { ' ' };
                    string[] words = textOftime.Split(delimiterChars);

                    string mainword = words[0];
                    var tagger = new MaxentTagger(modelsDirectory + @"\pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger");

                    var sentences = MaxentTagger.tokenizeText(new StringReader(text));
                    var first = sentences.get(0) as ArrayList;
                    int size = first.size();

                    int i = 0;
                    int index = -3;
                    while (i < size)
                    {
                        if (first.get(i).ToString() == mainword)
                            index = i;

                        i++;
                    }
                    var taggedSentence = tagger.tagSentence(first);

                    string checker = taggedSentence.get(index - 1).ToString();
                    if (checker.ToLower() == "after/in" || checker.ToLower() == "since/in")
                    {
                        typeTime = "tTrigger";
                        valueTime = "Start : " + time.getTemporal().toISOString();

                        Console.WriteLine(valueTime);

                    }

                    else if (checker.ToLower() == "before/in")
                    {
                        if (typeTimex == "TIME")
                        {
                            typeTime = "tTrigger";
                            valueTime = "End : " + time.getTemporal().toISOString();

                            Console.WriteLine(valueTime);
                        }
                        else
                        {
                            DateTime result = new DateTime();
                            DateTime current = DateTime.ParseExact(presentDate, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture);
                            string dt = time.getTemporal().toString();
                            char[] delimiter = { '-', '-', '-' };
                            string[] partsOfDate = time.getTemporal().toISOString().Split(delimiter);
                            int count = partsOfDate.Length;
                            if (count == 1)
                            {

                                result = Convert.ToDateTime("01-01-" + partsOfDate[0]);
                            }

                            if (count == 2)
                            {
                                result = Convert.ToDateTime("01-" + partsOfDate[1] + "-" + partsOfDate[0]);
                            }

                            //  result = DateTime.ParseExact(dt, "yyyy-MM-dd", System.Globalization.CultureInfo.InvariantCulture);
                            int comp = DateTime.Compare(current, result);
                            if (comp < 0)
                            {
                                typeTime = "tTrigger";
                                valueTime = "Start now (" + presentDate + ") End :" + time.getTemporal().toString();
                                Console.WriteLine(valueTime);
                            }

                            else
                            {
                                typeTime = "tTrigger";
                                valueTime = "End : " + time.getTemporal().toString();
                                Console.WriteLine(valueTime);
                            }

                        }

                    }

                    else
                    {

                        typeTime = "tStamp";
                        valueTime = time.getTemporal().toISOString();
                        Console.WriteLine(valueTime);

                    }
                }
            }
        }
Example #33
0
        public void StanfordCoreNlpDemoManualConfiguration()
        {
            Console.WriteLine(Environment.CurrentDirectory);
            const string Text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("pos.model", Config.GetModel(@"pos-tagger\english-bidirectional\english-bidirectional-distsim.tagger"));
            props.setProperty("ner.model", Config.GetModel(@"ner\english.all.3class.distsim.crf.ser.gz"));
            props.setProperty("parse.model", Config.GetModel(@"lexparser\englishPCFG.ser.gz"));
    
            props.setProperty("dcoref.demonym", Config.GetModel(@"dcoref\demonyms.txt"));
            props.setProperty("dcoref.states", Config.GetModel(@"dcoref\state-abbreviations.txt"));
            props.setProperty("dcoref.animate", Config.GetModel(@"dcoref\animate.unigrams.txt"));
            props.setProperty("dcoref.inanimate", Config.GetModel(@"dcoref\inanimate.unigrams.txt"));
            props.setProperty("dcoref.male", Config.GetModel(@"dcoref\male.unigrams.txt"));
            props.setProperty("dcoref.neutral", Config.GetModel(@"dcoref\neutral.unigrams.txt"));
            props.setProperty("dcoref.female", Config.GetModel(@"dcoref\female.unigrams.txt"));
            props.setProperty("dcoref.plural", Config.GetModel(@"dcoref\plural.unigrams.txt"));
            props.setProperty("dcoref.singular", Config.GetModel(@"dcoref\singular.unigrams.txt"));
            props.setProperty("dcoref.countries", Config.GetModel(@"dcoref\countries"));
            props.setProperty("dcoref.extra.gender", Config.GetModel(@"dcoref\namegender.combine.txt"));
            props.setProperty("dcoref.states.provinces", Config.GetModel(@"dcoref\statesandprovinces"));
            props.setProperty("dcoref.singleton.predictor", Config.GetModel(@"dcoref\singleton.predictor.ser"));
            props.setProperty("dcoref.big.gender.number", Config.GetModel(@"dcoref\gender.data.gz"));

            var sutimeRules = new[] {
                                      Config.GetModel(@"sutime\defs.sutime.txt"),
                                      Config.GetModel(@"sutime\english.holidays.sutime.txt"),
                                      Config.GetModel(@"sutime\english.sutime.txt")
                                  };
            props.setProperty("sutime.rules", String.Join(",", sutimeRules));
            props.setProperty("sutime.binders", "0");

            var pipeline = new edu.stanford.nlp.pipeline.StanfordCoreNLP(props);

            // Annotation
            var annotation = new Annotation(Text);
            pipeline.annotate(annotation);
    
            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
            }

            this.CustomAnnotationPrint(annotation);
        }
Example #34
0
		private static AppDomain createServletDomain(ServletConfig config)
		{
				string rootPath = J2EEUtils.GetApplicationRealPath(config.getServletContext ());
				AppDomainSetup domainSetup = new AppDomainSetup();
				string name = config.getServletName();//.getServletContextName();
				if (name == null)
					name = "GH Application";
				domainSetup.ApplicationName = name;
				domainSetup.ConfigurationFile = Path.Combine (rootPath, "Web.config");
				domainSetup.PrivateBinPath = Path.Combine (rootPath, "WEB-INF/lib");

				AppDomain servletDomain = AppDomain.CreateDomain(name, null, domainSetup);





				//servletDomain.SetData(IAppDomainConfig.APP_PHYS_DIR, J2EEUtils.GetApplicationPhysicalPath(config));
				//servletDomain.SetData(IAppDomainConfig.WEB_APP_DIR, rootPath);

				servletDomain.SetData(IAppDomainConfig.APP_PHYS_DIR, J2EEUtils.GetApplicationPhysicalPath(config.getServletContext ()));
				servletDomain.SetData(IAppDomainConfig.WEB_APP_DIR, rootPath);
				servletDomain.SetData(IAppDomainConfig.SERVLET_CONFIG, config);

				//Set DataDirectory substitution string (http://blogs.msdn.com/dataaccess/archive/2005/10/28/486273.aspx)
				string dataDirectory = config.getServletContext ().getInitParameter ("DataDirectory");
				if (dataDirectory == null)
					dataDirectory = "App_Data";

				if (!Path.IsPathRooted (dataDirectory)) {
					java.io.InputStream inputStream = config.getServletContext ().getResourceAsStream ("/WEB-INF/classes/appData.properties");
					string root;
					if (inputStream != null) {
						try {
							Properties props = new Properties ();
							props.load (inputStream);
							root = props.getProperty ("root.folder");
						}
						finally {
							inputStream.close ();
						}
					}
					else
						root = config.getServletContext ().getRealPath ("/");

					if (root == null)
						root = String.Empty;

					dataDirectory = Path.Combine (root, dataDirectory);
				}

				if (dataDirectory [dataDirectory.Length - 1] != Path.DirectorySeparatorChar)
					dataDirectory += Path.DirectorySeparatorChar;

				servletDomain.SetData ("DataDirectory", dataDirectory);

				if (config.getServletContext ().getRealPath ("/") == null)
					servletDomain.SetData(".appStartTime", DateTime.UtcNow);

				// The BaseDir is the full path to the physical dir of the app
				// and allows the application to modify files in the case of
				// open deployment.
				string webApp_baseDir = config.getServletContext().getRealPath("");
				if (webApp_baseDir == null || webApp_baseDir == "")
					webApp_baseDir = rootPath;
				servletDomain.SetData(IAppDomainConfig.APP_BASE_DIR , webApp_baseDir);
				Debug.WriteLine("Initialization of webapp " + webApp_baseDir);
				//servletDomain.SetData(".hostingVirtualPath", "/");
				//servletDomain.SetData(".hostingInstallDir", "/");
				return servletDomain;
		}
        /// <summary>
        /// Initializes this instance.
        /// </summary>
        private void Initialize()
        {
            if (pipeline == null)
            {
                // Todo: How to get this ourselves
                //var jarRoot = @"C:\Users\karlbuha\Documents\Visual Studio 2012\Projects\ServiceMe\RestServiceV1\NLPModules\";
                var jarRoot = ConfigurationManager.AppSettings["NlpModulePath"];
                //var jarRoot = @"F:\sitesroot\0\bin\NlpModules\";

                // Annotation pipeline configuration
                var props = new Properties();
                props.setProperty("annotators", "tokenize, ssplit, pos, lemma");
                props.setProperty("sutime.binders", "0");
                props.setProperty("ner.useSUTime", "false");

                // We should change current directory, so StanfordCoreNLP could find all the model files automatically
                var curDir = Environment.CurrentDirectory;
                Directory.SetCurrentDirectory(jarRoot);
                NlpProvider.pipeline = new StanfordCoreNLP(props);
                Directory.SetCurrentDirectory(curDir);

                NlpProvider.relevantPos = ConfigurationManager.AppSettings["NlpFos"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries).ToList();
            }
        }
 protected internal static Properties getPreferences()
 {
   if (BaseTestRunner.fPreferences == null)
   {
     BaseTestRunner.fPreferences = new Properties();
     ((Hashtable) BaseTestRunner.fPreferences).put((object) "loading", (object) "true");
     ((Hashtable) BaseTestRunner.fPreferences).put((object) "filterstack", (object) "true");
     BaseTestRunner.readPreferences();
   }
   return BaseTestRunner.fPreferences;
 }
 protected internal static void setPreferences(Properties preferences)
 {
   BaseTestRunner.fPreferences = preferences;
 }
Example #38
0
 public static Session getDefaultInstance(Properties arg0, Authenticator arg1)
 {
     return null;
 }
        //使用nlp將文章分析後回傳key
        private List<string> nlp(string sentence)
        {
            List<string> return_key = new List<string>();
            string Relay_file = ".\\xml";
            string Relay_name = "Relay.xml";
            string Relay_path = Relay_file+ "\\" + Relay_name;

            // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar`
            var jarRoot = @"stanford-corenlp-3.5.2-models\";

            // Annotation pipeline configuration
            var props = new java.util.Properties();
            props.setProperty("ner.useSUTime", "false");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            System.IO.Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            System.IO.Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(sentence);
            pipeline.annotate(annotation);

            //輸出nlp分析結果至Relay.xml
            FileOutputStream os = new FileOutputStream(new File(Relay_file, Relay_name));
            pipeline.xmlPrint(annotation, os);
            os.close();

            //呼叫ner將單字組合為有意義的key組裝
            foreach(string k in ner(Relay_path))
            {
                return_key.Add(k);
            }

            return return_key;
        }
Example #40
0
        public static System.Collections.ArrayList parse(string filename)
        {
            string fileLocation = LUAttributes.filesPathFromRoot + filename;

            //get metadata
            Document document = getMetadata(fileLocation);

            if (document == null)
            {
                return(null);
            }

            // Path to the folder with models extracted from `stanford-corenlp-3.9.2-models.jar`
            var jarRoot = Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..\\..\\"), @"Code\\stanford");

            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit,pos, lemma");
            props.setProperty("ssplit.newlineIsSentenceBreak", "two");
            props.setProperty("tokenize.keepeol", "true");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            // assign lines and columns

            string[]   lines         = readAllLinesSkipMD(fileLocation);
            List <int> linenumbers   = new List <int>();
            List <int> columnnumbers = new List <int>();

            List <CoreMap> blankSentencesList = new List <CoreMap>();

            java.util.ArrayList lineTokens;
            CoreLabel           firstTokenInLine;
            int firstTokenInLinePos = 0;

            int columnInLine = 0;
            int lineNumber   = 0;

            foreach (string line in lines)
            {
                Annotation annotation2 = new Annotation(line);
                pipeline.annotate(annotation2);
                lineTokens = (java.util.ArrayList)annotation2.get(typeof(CoreAnnotations.TokensAnnotation));

                if (line.Trim() != "")
                {
                    firstTokenInLine    = (CoreLabel)lineTokens.get(0);
                    firstTokenInLinePos = firstTokenInLine.beginPosition();
                }
                else
                {
                    firstTokenInLinePos = 0;
                }
                foreach (CoreLabel token2 in lineTokens)
                {
                    columnInLine = token2.beginPosition() - firstTokenInLinePos;
                    token2.set(typeof(CoreAnnotations.LineNumberAnnotation), lineNumber);
                    blankSentencesList.Add(token2);
                    linenumbers.Add(lineNumber);
                    columnnumbers.Add(columnInLine);
                }
                lineNumber += 1;
            }


            // skip metadata
            string mdEnd = LUAttributes.metadataEnd;
            string txt   = File.ReadAllText(Path.Combine(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..\\..\\"), fileLocation));

            txt = txt.Substring(txt.IndexOf(mdEnd) + mdEnd.Length + 2);
            var annotation = new Annotation(txt);

            pipeline.annotate(annotation);

            //build datatable
            DataTable wordIndex = new DataTable("wordIndex");

            wordIndex.Columns.Add(new DataColumn("value", typeof(string)));
            wordIndex.Columns.Add(new DataColumn("length", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("isSymbol", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("lemma", typeof(string)));
            wordIndex.Columns.Add(new DataColumn("chapter", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("line", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("column", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("sentence", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("wordOrdinal", typeof(int)));
            wordIndex.Columns.Add(new DataColumn("tokenOrdinal", typeof(int)));

            //extract words into datatable
            var       sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as java.util.ArrayList;
            string    word = "";
            int       indexInSentence = 0, indexOfWordInSentence = 0, chapter = 1, isAposSkip = 0, lastTokenLine = -1;
            int       sentenceCounter = 0;
            DataRow   dr;
            CoreLabel token;
            int       wordNumInText = 0;

            foreach (CoreMap sentence in sentences)
            {
                indexInSentence       = 0;
                indexOfWordInSentence = 0;

                var tokens = sentence.get(new CoreAnnotations.TokensAnnotation().getClass()) as java.util.ArrayList;

                for (int i = 0; i < tokens.size(); i++)
                {
                    token      = (CoreLabel)(tokens.get(i));
                    isAposSkip = 0;

                    //check if next token contains an apostrophe - also check if both current and next tokens are not symbols
                    if (i + 1 < tokens.size() && ((CoreLabel)tokens.get(i + 1)).word().IndexOf("'") >= 0 && !isSymbol(token.word()) && !isSymbol(((CoreLabel)tokens.get(i + 1)).word()))
                    {
                        //concat words
                        word = String.Concat(token.word(), ((CoreLabel)tokens.get(i + 1)).word());
                        //skip next word
                        i++;
                        wordNumInText++;
                        isAposSkip = 1;
                    }
                    else
                    {
                        word = token.word();
                    }

                    //add newline rows
                    while (lastTokenLine + 1 < linenumbers[wordNumInText])
                    {
                        lastTokenLine++;
                        dr                 = wordIndex.NewRow();
                        dr["value"]        = "\\n";
                        dr["length"]       = 2;
                        dr["isSymbol"]     = 1;
                        dr["lemma"]        = "nolemma";
                        dr["chapter"]      = chapter;
                        dr["line"]         = lastTokenLine;
                        dr["column"]       = 0;
                        dr["sentence"]     = -1;
                        dr["wordOrdinal"]  = -1;
                        dr["tokenOrdinal"] = 0;
                        wordIndex.Rows.Add(dr);
                    }

                    //check if new chapter
                    if (word == LUAttributes.chapterMark)
                    {
                        chapter++;
                        sentenceCounter = -1;
                        indexInSentence = -1;
                    }


                    dr                 = wordIndex.NewRow();
                    word               = word.Replace("''", "\"").Replace("``", "\"").Replace("-LRB-", "(").Replace("-RRB-", ")");
                    dr["value"]        = word;
                    dr["length"]       = word.Length;
                    dr["isSymbol"]     = isSymbol(word);
                    dr["chapter"]      = chapter;
                    dr["line"]         = linenumbers[wordNumInText];
                    dr["column"]       = columnnumbers[wordNumInText - isAposSkip];
                    dr["sentence"]     = sentenceCounter;
                    dr["tokenOrdinal"] = indexInSentence;
                    if (!isSymbol(word))
                    {
                        dr["wordOrdinal"] = indexOfWordInSentence;
                        dr["lemma"]       = token.lemma().ToLower();
                    }
                    else
                    {
                        dr["wordOrdinal"] = -1;
                        dr["lemma"]       = "nolemma";
                    }
                    wordIndex.Rows.Add(dr);
                    lastTokenLine = linenumbers[wordNumInText];
                    wordNumInText++;
                    indexInSentence++;
                    if (!isSymbol(word))
                    {
                        indexOfWordInSentence++;
                    }
                }

                sentenceCounter++;
            }

            System.Collections.ArrayList parsedDocList = new System.Collections.ArrayList();
            parsedDocList.Add(document);
            parsedDocList.Add(wordIndex);

            return(parsedDocList);
        }
Example #41
0
        protected override void setDefaults()
        {
            base.setDefaults();

            this.setDefault("application.name", Application.ProductName);
            this.setDefault("application.datafolder.name", Application.ProductName);
            this.setDefault("oauth.handler.scheme",
                            String.Format("x-{0}-action", StringUtils.deleteWhitespace(Application.ProductName.ToLower())));

            this.setDefault("application.version", ApplicationVersion);
            this.setDefault("application.revision", ApplicationRevision);
            this.setDefault("application.language", GetDefaultLanguage());
            this.setDefault("application.language.custom", false.ToString());
            this.setDefault("application.localization.enable", true.ToString());

            this.setDefault("update.feed.release", "https://version.cyberduck.io/windows/changelog.rss");
            this.setDefault("update.feed.beta", "https://version.cyberduck.io/windows/beta/changelog.rss");
            this.setDefault("update.feed.nightly", "https://version.cyberduck.io/windows/nightly/changelog.rss");

            // Importers
            this.setDefault("bookmark.import.winscp.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "WinSCP.ini"));
            this.setDefault("bookmark.import.filezilla.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "FileZilla",
                                         "sitemanager.xml"));
            this.setDefault("bookmark.import.smartftp.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "SmartFTP",
                                         "Client 2.0", "Favorites"));
            this.setDefault("bookmark.import.totalcommander.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "GHISLER",
                                         "wcx_ftp.ini"));
            this.setDefault("bookmark.import.flashfxp3.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "FlashFXP", "3",
                                         "Sites.dat"));
            this.setDefault("bookmark.import.flashfxp4.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "FlashFXP", "4",
                                         "Sites.dat"));
            this.setDefault("bookmark.import.flashfxp4.common.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.CommonApplicationData), "FlashFXP",
                                         "4",
                                         "Sites.dat"));
            this.setDefault("bookmark.import.wsftp.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "Ipswitch", "WS_FTP",
                                         "Sites"));
            this.setDefault("bookmark.import.fireftp.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "Mozilla", "Firefox",
                                         "Profiles"));
            this.setDefault("bookmark.import.s3browser.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "S3Browser",
                                         "settings.ini"));
            this.setDefault("bookmark.import.crossftp.location", Path.Combine(HomeFolder, ".crossftp", "sites.xml"));
            this.setDefault("bookmark.import.cloudberry.s3.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "CloudBerry S3 Explorer for Amazon S3", "settings.list"));
            this.setDefault("bookmark.import.cloudberry.google.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "CloudBerry Explorer for Google Storage", "settings.list"));
            this.setDefault("bookmark.import.cloudberry.azure.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "CloudBerry Explorer for Azure Blob Storage", "settings.list"));
            this.setDefault("bookmark.import.expandrive3.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "ExpanDrive", "favorites.js"));
            this.setDefault("bookmark.import.expandrive4.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "ExpanDrive", "expandrive4.favorites.js"));
            this.setDefault("bookmark.import.expandrive5.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "ExpanDrive", "expandrive5.favorites.js"));
            this.setDefault("bookmark.import.expandrive6.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
                                         "ExpanDrive", "expandrive6.favorites.js"));
            this.setDefault("bookmark.import.netdrive2.location",
                            Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData),
                                         "NetDrive2", "drives.dat"));

            //disable reminder for protocol handler registration
            this.setDefault("defaulthandler.reminder", false.ToString());

            this.setDefault("update.check.last", "0");
            this.setDefault("update.check.privilege", true.ToString());

            this.setDefault("queue.download.folder", DefaultDownloadPath);
            this.setDefault("queue.upload.permissions.default", true.ToString());

            this.setDefault("queue.dock.badge", true.ToString());

            this.setDefault("ssh.knownhosts",
                            Path.Combine(new RoamingSupportDirectoryFinder().find().getAbsolute(), "known_hosts"));
            this.setDefault("browser.enterkey.rename", false.ToString());
            this.setDefault("terminal.openssh.enable", true.ToString());
            this.setDefault("terminal.windowssubsystemlinux.enable", true.ToString());
            this.setDefault("terminal.command.ssh", Path.Combine(HomeFolder, "putty.exe"));
            this.setDefault("terminal.command.ssh.args", "-ssh {0} {1}@{2} -t -P {3} -m \"{4}\"");
            this.setDefault("terminal.command.openssh.args", "{1} {0}@{2} -t -p {3} \"cd '{4}'; $SHELL\"");

            this.setDefault("editor.bundleIdentifier", new SystemWatchEditorFactory.Notepad().getIdentifier());

            this.setDefault("notifications.timeout.milliseconds", "300");

            //default browser toolbar set
            this.setDefault("browser.toolbar", true.ToString());
            this.setDefault("browser.toolbar.openconnection", true.ToString());
            this.setDefault("browser.toolbar.quickconnect", true.ToString());
            this.setDefault("browser.toolbar.action", true.ToString());
            this.setDefault("browser.toolbar.info", true.ToString());
            this.setDefault("browser.toolbar.refresh", true.ToString());
            this.setDefault("browser.toolbar.edit", true.ToString());
            this.setDefault("browser.toolbar.openinbrowser", false.ToString());
            this.setDefault("browser.toolbar.openinterminal", false.ToString());
            this.setDefault("browser.toolbar.newfolder", false.ToString());
            this.setDefault("browser.toolbar.delete", false.ToString());
            this.setDefault("browser.toolbar.download", false.ToString());
            this.setDefault("browser.toolbar.upload", true.ToString());
            this.setDefault("browser.toolbar.transfers", true.ToString());

            //default transfer toolbar set
            this.setDefault("transfer.toolbar.resume", true.ToString());
            this.setDefault("transfer.toolbar.reload", true.ToString());
            this.setDefault("transfer.toolbar.stop", true.ToString());
            this.setDefault("transfer.toolbar.remove", true.ToString());
            this.setDefault("transfer.toolbar.cleanup", false.ToString());
            this.setDefault("transfer.toolbar.log", false.ToString());
            this.setDefault("transfer.toolbar.open", true.ToString());
            this.setDefault("transfer.toolbar.show", true.ToString());

            // Resolve symbolic links downloading target file instead. Cannot create symbolic links on FAT.
            this.setDefault("path.symboliclink.resolve", true.ToString());
            // Resolve local links uploading target file instead. Currently not supporting shortcuts on Windows.
            this.setDefault("local.symboliclink.resolve", true.ToString());

            this.setDefault("local.user.home", HomeFolder);
            this.setDefault("local.delimiter", "\\");
            this.setDefault("local.normalize.tilde", false.ToString());

            // SSL Keystore
            // Add mscapi security provider
            Security.addProvider(new SunMSCAPI());
            this.setDefault("connection.ssl.keystore.type", "Windows-MY");
            this.setDefault("connection.ssl.keystore.provider", "SunMSCAPI");

            // Override secure random strong algorithm. Outputs bytes from the Windows CryptGenRandom() API
            this.setDefault("connection.ssl.securerandom.algorithm", "Windows-PRNG");
            this.setDefault("connection.ssl.securerandom.provider", "SunMSCAPI");

            // Enable Integrated Windows Authentication
            this.setDefault("connection.proxy.windows.authentication.enable", true.ToString());

            this.setDefault("webdav.ntlm.environment", true.ToString());
            if (getBoolean("webdav.ntlm.environment"))
            {
                // NTLM Windows Domain
                try
                {
                    // Gets the network domain name associated with the current user
                    this.setDefault("webdav.ntlm.domain", Environment.UserDomainName);
                }
                catch (PlatformNotSupportedException e)
                {
                    // The operating system does not support retrieving the network domain name.
                }
                catch (InvalidOperationException e)
                {
                    // The network domain name cannot be retrieved.
                }
                try
                {
                    this.setDefault("webdav.ntlm.workstation", Environment.MachineName);
                }
                catch (InvalidOperationException e)
                {
                    // The name of this computer cannot be obtained.
                }
            }
            if (Utils.IsRunningAsUWP)
            {
                // Running from Windows Store
                this.setDefault("update.check", $"{false}");
                this.setDefault("tmp.dir", ApplicationData.Current.TemporaryFolder.Path);
            }
            // Apply global configuration
            var config = Path.Combine(new RoamingSupportDirectoryFinder().find().getAbsolute(),
                                      "default.properties");

            if (File.Exists(config))
            {
                try
                {
                    var properties = new java.util.Properties();
                    properties.load(new FileInputStream(config));
                    foreach (var key in Utils.ConvertFromJavaList <String>(properties.keySet()))
                    {
                        setDefault(key, properties.getProperty(key));
                    }
                }
                catch (Exception e)
                {
                    Log.warn($"Failure while reading {config}", e);
                }
            }
        }