public SUTimePipeline(Properties props)
        {
            // By default, we want to tokenize the text, split it into
            // sentences, and then put it through the sutime annotator.
            // We also want to pos tag it and put it through the number and
            // qen annotators.
            // Since there will be different options for the sutime annotator,
            // we will actually create a new sutime annotator for each query.
            // This should be inexpensive.
            if (props.GetProperty("annotators") == null)
            {
                props.SetProperty("annotators", "tokenize, ssplit, pos");
            }
            //      "tokenize, ssplit, pos, number, qen");

            /*    if (props.getProperty("customAnnotatorClass.number") == null) {
             * props.setProperty("customAnnotatorClass.number",
             * "edu.stanford.nlp.pipeline.NumberAnnotator");
             * }
             * if (props.getProperty("customAnnotatorClass.qen") == null) {
             * props.setProperty("customAnnotatorClass.qen",
             * "edu.stanford.nlp.pipeline.QuantifiableEntityNormalizingAnnotator");
             * }    */
            // this replicates the tokenizer behavior in StanfordCoreNLP
            props.SetProperty("tokenize.options", "invertible,ptb3Escaping=true");
            this.pipeline = new StanfordCoreNLP(props);
        }
        static ChinesePOSExtractor()
        {
            var props = new java.util.Properties();

            props.setProperty("annotators", "segment, ssplit, pos");
            props.setProperty("customAnnotatorClass.segment", "edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");

            props.setProperty("segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
            props.setProperty("segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese");
            props.setProperty("segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
            props.setProperty("segment.sighanPostProcessing", "true");

            //sentence split
            props.setProperty("ssplit.boundaryTokenRegex", "[.]|[!?]+|[。]|[!?]+");

            //pos
            props.setProperty("pos.model", "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger");

            //ner
            props.setProperty("ner.model", "edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz");
            props.setProperty("ner.applyNumericClassifiers", "false");
            props.setProperty("ner.useSUTime", "false");

            //# parse
            props.setProperty("parse.model", "edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz");

            pipeline = new StanfordCoreNLP(props);
        }
        /// <summary>
        /// Training and configuration takes several seconds. Training will happen automatically if you just call parse, but I've broken 
        /// this out to enable front-loading. this way, you can train once and re-use the instance. Or perhaps build a singleton.
        /// </summary>
        public void TrainAndConfigure()
        {
            var config = new PipelineConfigurator();

            Pipeline = config.GetPipeline();
            IsTrainedAndReady = true;  
        }
示例#4
0
 public virtual void TestPrereqAnnotatorsBasic()
 {
     NUnit.Framework.Assert.AreEqual("tokenize,ssplit,pos,parse", StanfordCoreNLP.EnsurePrerequisiteAnnotators(new string[] { "parse" }, new Properties()));
     NUnit.Framework.Assert.AreEqual("tokenize,ssplit,pos,depparse", StanfordCoreNLP.EnsurePrerequisiteAnnotators(new string[] { "depparse" }, new Properties()));
     NUnit.Framework.Assert.AreEqual("tokenize,ssplit,pos,depparse", StanfordCoreNLP.EnsurePrerequisiteAnnotators(new string[] { "depparse", "tokenize" }, new Properties()));
     NUnit.Framework.Assert.AreEqual("tokenize,ssplit,pos,lemma,depparse,natlog", StanfordCoreNLP.EnsurePrerequisiteAnnotators(new string[] { "natlog", "tokenize" }, new Properties()));
 }
        /// <summary>Set the properties to the paths they appear at on the servlet.</summary>
        /// <remarks>
        /// Set the properties to the paths they appear at on the servlet.
        /// See build.xml for where these paths get copied.
        /// </remarks>
        /// <exception cref="Javax.Servlet.ServletException">Thrown by the implementation</exception>
        public override void Init()
        {
            Properties commonProps = new _Properties_43();

            try
            {
                string dataDir = GetServletContext().GetRealPath("/WEB-INF/data");
                Runtime.SetProperty("de.jollyday.config", GetServletContext().GetRealPath("/WEB-INF/classes/holidays/jollyday.properties"));
                commonProps.SetProperty("pos.model", dataDir + "/english-left3words-distsim.tagger");
                commonProps.SetProperty("ner.model", dataDir + "/english.all.3class.distsim.crf.ser.gz," + dataDir + "/english.conll.4class.distsim.crf.ser.gz," + dataDir + "/english.muc.7class.distsim.crf.ser.gz");
                commonProps.SetProperty("depparse.model", dataDir + "/english_SD.gz");
                commonProps.SetProperty("parse.model", dataDir + "/englishPCFG.ser.gz");
                commonProps.SetProperty("sutime.rules", dataDir + "/defs.sutime.txt," + dataDir + "/english.sutime.txt," + dataDir + "/english.hollidays.sutime.txt");
                commonProps.SetProperty("openie.splitter.model", dataDir + "/clauseSplitterModel.ser.gz");
                commonProps.SetProperty("openie.affinity_models", dataDir);
            }
            catch (ArgumentNullException)
            {
                log.Info("Could not load servlet context. Are you on the command line?");
            }
            if (this.pipeline == null)
            {
                Properties fullProps = new Properties(commonProps);
                fullProps.SetProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,ner,natlog,openie");
                this.pipeline = new StanfordCoreNLP(fullProps);
            }
            if (this.backoff == null)
            {
                Properties backoffProps = new Properties(commonProps);
                backoffProps.SetProperty("annotators", "parse,natlog,openie");
                backoffProps.SetProperty("enforceRequirements", "false");
                this.backoff = new StanfordCoreNLP(backoffProps);
            }
        }
        public StanfordCoreNLP LoadNlp(IContentProvider jsonContentProvider)
        {
            _jsonContentProvider = jsonContentProvider;

            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, entitymentions");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            if (!_jarRoot.IsNullOrEmpty())
            {
                Directory.SetCurrentDirectory(_jarRoot);
            }
            else
            {
                throw new NullReferenceException("ERROR: Core NLP Java model directory not specified. Set string JarRootDir.");
            }


            // UNCOMMENT ME TO SUPRESS LOGGING OUTPUT
            //RedwoodConfiguration.empty().capture(java.lang.System.err).apply();
            var pipeline = new StanfordCoreNLP(props);

            //RedwoodConfiguration.current().clear().apply();

            Directory.SetCurrentDirectory(curDir);

            return(pipeline);
        }
示例#7
0
        public NLPProcessor()
        {
            const string pathToCoreModels  = @"C:\NLPModels\";
            const string pathToTaggerModel = @"C:\NLPModels\english-left3words-distsim.tagger";
            const string pathToNerTagger   = @"C:\NLPModels\english.all.7class.distsim.crf.ser.gz";
            const string sutimeRules       =
                pathToCoreModels + @"sutime\defs.sutime.txt," +
                pathToCoreModels + @"sutime\english.holidays.sutime.txt," +
                pathToCoreModels + @"sutime\english.sutime.txt";

            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
            props.setProperty("sutime.rules", sutimeRules);
            props.setProperty("sutime.binders", "0");
            props.setProperty("pos.model", pathToTaggerModel);
            props.setProperty("ner.model", pathToNerTagger);

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(pathToCoreModels);
            _pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);
        }
        private void SetupCoreNLP()
        {
            try
            {
                var propsFile = System.IO.Path.Combine(_modelPath, "StanfordCoreNLP-spanish.properties");

                // Annotation pipeline configuration

                var props = new Properties();

                props.load(new FileReader(propsFile));
                props.put("ner.useSUTime", "0");
                props.put("threads", "10");
                props.put("tokenize.verbose", "true");
                props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse, kbp, coref,entitymentions, quote");

                var curDir = Environment.CurrentDirectory;

                Directory.SetCurrentDirectory(_modelPath);

                _pipeline = new StanfordCoreNLP(props);

                Directory.SetCurrentDirectory(curDir);
            }
            catch (Exception e)
            {
                throw new SpanishCoreNLPSetupException(e.Message, e);
            }
        }
        public virtual void TestDefaultNoNLsPipeline()
        {
            string         t      = "Text with \n\n a new \nline.";
            IList <string> tWords = Arrays.AsList("Text", "with", "a", "new", "line", ".");
            Properties     props  = new Properties();

            props.SetProperty("annotators", "tokenize");
            Annotation      ann      = new Annotation(t);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(ann);
            IEnumerator <string> it = tWords.GetEnumerator();

            foreach (CoreLabel word in ann.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it.Current, word.Word());
            }
            NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it.MoveNext());
            IEnumerator <string> it2 = tWords.GetEnumerator();

            foreach (CoreLabel word_1 in ann.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it2.Current, word_1.Get(typeof(CoreAnnotations.TextAnnotation)));
            }
            NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it2.MoveNext());
        }
示例#10
0
        private static void ModifyUsingCoreNLPNER(Annotation doc)
        {
            Properties ann = new Properties();

            ann.SetProperty("annotators", "pos, lemma, ner");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);

            pipeline.Annotate(doc);
            foreach (ICoreMap sentence in doc.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                IList <EntityMention> entities = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation));
                if (entities != null)
                {
                    IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                    foreach (EntityMention en in entities)
                    {
                        //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType());
                        Span s = en.GetExtent();
                        ICounter <string> allNertagforSpan = new ClassicCounter <string>();
                        for (int i = s.Start(); i < s.End(); i++)
                        {
                            allNertagforSpan.IncrementCount(tokens[i].Ner());
                        }
                        string entityNertag = Counters.Argmax(allNertagforSpan);
                        en.SetType(entityNertag);
                    }
                }
            }
        }
示例#11
0
        static void Main()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.9.1-models.jar`
            var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2018-10-05\models";

            // Text for processing
            var text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(text);

            pipeline.annotate(annotation);

            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
                stream.close();
            }
        }
        /// <summary>
        /// Executes Sentiment and EntitiesMentioned analysis.
        /// </summary>
        public IOutcome<AnalysisResult> Analyze(StanfordCoreNLP pipeline, string text)
        {
            //Create annotated document
            Annotation doc = new Annotation(text);
            pipeline.annotate(doc);

            //Validate
            var sentences = doc.get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences == null)           
                return Outcomes.Outcomes
                               .Failure<AnalysisResult>()
                               .WithMessage("No sentences detected.");

            //Analyze
            var result = new AnalysisResult()
            {
                Sentiment = GetSentiment((ArrayList)sentences),
                MentionedEntities = GetMentions(doc)
            };

            return Outcomes.Outcomes
                           .Success<AnalysisResult>()
                           .WithValue(result);
        }
        private static void RunPipeline(StanfordCoreNLP pipeline, string text, PrintWriter @out)
        {
            Annotation annotation = new Annotation(text);

            pipeline.Annotate(annotation);
            // An Annotation is a Map and you can get and use the various analyses individually.
            @out.Println();
            // The toString() method on an Annotation just prints the text of the Annotation
            // But you can see what is in it with other methods like toShorterString()
            @out.Println("The top level annotation");
            @out.Println(annotation.ToShorterString());
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap sentence in sentences)
            {
                // Print out token annotations
                foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
                {
                    // Print out words, lemma, ne, and normalized ne
                    string word       = token.Get(typeof(CoreAnnotations.TextAnnotation));
                    string lemma      = token.Get(typeof(CoreAnnotations.LemmaAnnotation));
                    string pos        = token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                    string ne         = token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                    string normalized = token.Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation));
                    @out.Println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
                }
            }
            @out.Flush();
        }
示例#14
0
        } = false;                                             // I'd, don't, you're, ...



        protected void InitializeProcessor()
        {
            this.props.setProperty("annotators", "tokenize, ssplit, pos, lemma"); // lemma needs ssplit and pos, pos needs ssplit
            this.pipeline = new StanfordCoreNLP(this.props);

            this.InitializeStopwords();
        }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
            string          file     = props.GetProperty("file");
            string          loadFile = props.GetProperty("loadFile");

            if (loadFile != null && !loadFile.IsEmpty())
            {
                Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false);
                InputStream @is = new FileInputStream(loadFile);
                Pair <Annotation, InputStream> pair = ser.Read(@is);
                pair.second.Close();
                Annotation anno = pair.first;
                System.Console.Out.WriteLine(anno.ToShorterString(StringUtils.EmptyStringArray));
                @is.Close();
            }
            else
            {
                if (file != null && !file.Equals(string.Empty))
                {
                    string     text = IOUtils.SlurpFile(file);
                    Annotation doc  = new Annotation(text);
                    pipeline.Annotate(doc);
                    Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false);
                    TextWriter os = new TextWriter(new FileOutputStream(file + ".ser"));
                    ser.Write(doc, os).Close();
                    log.Info("Serialized annotation saved in " + file + ".ser");
                }
                else
                {
                    log.Info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]");
                }
            }
        }
示例#16
0
        static void Main()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.7.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2016-10-31\models";

            // Text for processing
            var text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(text);
            pipeline.annotate(annotation);

            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
                stream.close();
            }
        }
 /// <summary>Annotate a document (which is usually just a sentence).</summary>
 public virtual void Annotate(StanfordCoreNLP pipeline, Annotation ann)
 {
     if (ann.Get(typeof(CoreAnnotations.SentencesAnnotation)) == null)
     {
         pipeline.Annotate(ann);
     }
     else
     {
         if (ann.Get(typeof(CoreAnnotations.SentencesAnnotation)).Count == 1)
         {
             ICoreMap sentence = ann.Get(typeof(CoreAnnotations.SentencesAnnotation))[0];
             foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
             {
                 token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                 token.Remove(typeof(NaturalLogicAnnotations.PolarityAnnotation));
             }
             sentence.Remove(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation));
             sentence.Remove(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation));
             sentence.Remove(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
             sentence.Remove(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
             sentence.Remove(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
             pipeline.Annotate(ann);
         }
     }
 }
示例#18
0
        /// <summary>
        /// Setup extended tagger that includes POS, lemma and entity analysis
        /// </summary>
        private void SetupExtendedTagger()
        {
            PerformanceTester.StartMET("NLP");
            // Get path to Stanford NLP models
            var jarRoot = Path.Combine(Utility.GetResourcesFolder(), @"stanford-corenlp-3.9.2-models");

            // Turn off logging
            RedwoodConfiguration.current().clear().apply();
            var props = new java.util.Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner");
            // Makes Named Entity Recognition work in the library
            props.setProperty("ner.useSUTime", "0");
            props.put("ner.applyFineGrained", "0");
            props.put("ner.fine.regexner.mapping", jarRoot + @"\edu\stanford\nlp\models\kbp\english\");
            // Set current directory
            var curDir          = Environment.CurrentDirectory;
            var modelsDirectory = curDir + "\\" + jarRoot + @"\edu\stanford\nlp\models";

            Directory.SetCurrentDirectory(jarRoot);

            // Load Stanford NLP
            Tagger = new StanfordCoreNLP(props);
            PerformanceTester.StopMET("NLP");
        }
 public static void Main(string[] args)
 {
     try
     {
         Properties props = StringUtils.ArgsToProperties(args);
         props.SetProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
         StanfordCoreNLP pipeline = new StanfordCoreNLP();
         string          sentence = "Barack Obama lives in America. Obama works for the Federal Goverment.";
         Annotation      doc      = new Annotation(sentence);
         pipeline.Annotate(doc);
         Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator r = new Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator(props);
         r.Annotate(doc);
         foreach (ICoreMap s in doc.Get(typeof(CoreAnnotations.SentencesAnnotation)))
         {
             System.Console.Out.WriteLine("For sentence " + s.Get(typeof(CoreAnnotations.TextAnnotation)));
             IList <RelationMention> rls = s.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
             foreach (RelationMention rl in rls)
             {
                 System.Console.Out.WriteLine(rl.ToString());
             }
         }
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            long   startTime = Runtime.CurrentTimeMillis();
            string text      = "俄罗斯 航空 公司 一 名 官员 在 9号 说 , " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 9号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 , " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 , "
                               + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 , 但是 却 在 北京 受阻 , 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 , "
                               + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 , 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 9号 马可 被 送回 莫斯科 一 事 看 起来 , 中共 很 可能 会 放弃 米洛舍维奇 。";

            args = new string[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" };
            Annotation      document = new Annotation(text);
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(document);
            System.Console.Out.WriteLine("---");
            System.Console.Out.WriteLine("coref chains");
            foreach (CorefChain cc in document.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)).Values)
            {
                System.Console.Out.WriteLine("\t" + cc);
            }
            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("mentions");
                foreach (Mention m in sentence.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)))
                {
                    System.Console.Out.WriteLine("\t" + m);
                }
            }
            long endTime = Runtime.CurrentTimeMillis();
            long time    = (endTime - startTime) / 1000;

            System.Console.Out.WriteLine("Running time " + time / 60 + "min " + time % 60 + "s");
        }
示例#21
0
 private IAnnotator GetParser()
 {
     if (parserProcessor == null)
     {
         IAnnotator parser = StanfordCoreNLP.GetExistingAnnotator("parse");
         if (parser == null)
         {
             Properties emptyProperties = new Properties();
             parser = new ParserAnnotator("coref.parse.md", emptyProperties);
         }
         if (parser == null)
         {
             // TODO: these assertions rule out the possibility of alternately named parse/pos annotators
             throw new AssertionError("Failed to get parser - this should not be possible");
         }
         if (parser.Requires().Contains(typeof(CoreAnnotations.PartOfSpeechAnnotation)))
         {
             IAnnotator tagger = StanfordCoreNLP.GetExistingAnnotator("pos");
             if (tagger == null)
             {
                 throw new AssertionError("Parser required tagger, but failed to find the pos annotator");
             }
             IList <IAnnotator> annotators = Generics.NewArrayList();
             annotators.Add(tagger);
             annotators.Add(parser);
             parserProcessor = new AnnotationPipeline(annotators);
         }
         else
         {
             parserProcessor = parser;
         }
     }
     return(parserProcessor);
 }
        //-------------------------------------------------------------------------------------- Stanford Core NLP -----------------------------------------
        //-- Better for Entity recognition

        public static void buildPipeline(string text)
        {//https://interviewbubble.com/getting-started-with-stanford-corenlp-a-stanford-corenlp-tutorial/
            // Path to the folder with models extracted from `stanford-corenlp-3.7.0-models.jar`
            var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2016-10-31\models";
            // creates a StanfordCoreNLP object, with POS tagging, lemmatization,
            // NER, parsing, and coreference resolution
            Properties props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            // create an empty Annotation just with the given text
            Annotation document = new Annotation(text);

            // run all Annotators on this text
            pipeline.annotate(document);
            //Finished processing the document here
            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(document, new PrintWriter(stream));
                Debug.WriteLine(stream.toString());
                stream.close();
            }
        }
示例#23
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="jarRootPath">Path to the folder with models extracted from 'stanford-corenlp-3.5.2-models.jar'</param>
        public StanfordLemmatizer(string jarRootPath)
        {
            if (!Directory.Exists(jarRootPath))
            {
                string fullPath = Path.GetFullPath(jarRootPath);
                throw new DirectoryNotFoundException("Folder(s) extracted from 'stanford-corenlp-3.5.2-models.jar' was not found in path: . " +
                                                     "-->" + fullPath + "<--. " +
                                                     "Please make sure correct path is listed in .config file.");
            }

            // Set properties required for lemma
            java.util.Properties props = new java.util.Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            string curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRootPath);
            _pipeLine = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);

            // Instantiate annotation
            _sentencesAnnotation = new CoreAnnotations.SentencesAnnotation();
            _tokensAnnotation    = new CoreAnnotations.TokensAnnotation();
            _lemmaAnnotation     = new CoreAnnotations.LemmaAnnotation();
        }
        public virtual void TestHyphens()
        {
            string     test  = "Hyphen-ated words should be split except when school-aged-children eat " + "anti-disestablishmentariansm for breakfast at the o-kay choral infront of some explor-o-toriums.";
            Properties props = new Properties();

            props.SetProperty("annotators", "tokenize");
            Annotation      ann      = new Annotation(test);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(ann);
            IList <CoreLabel> toks = ann.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(21, toks.Count);
            Properties props2 = new Properties();

            props2.SetProperty("annotators", "tokenize");
            props2.SetProperty("tokenize.options", "splitHyphenated=true");
            Annotation      ann2      = new Annotation(test);
            StanfordCoreNLP pipeline2 = new StanfordCoreNLP(props2);

            pipeline2.Annotate(ann2);
            IList <CoreLabel> toks2 = ann2.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(27, toks2.Count);
        }
示例#25
0
        // static demo class
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                System.Console.Error.WriteLine("TokensRegexMatcher rules file [outFile]");
                return;
            }
            string      rules = args[0];
            PrintWriter @out;

            if (args.Length > 2)
            {
                @out = new PrintWriter(args[2]);
            }
            else
            {
                @out = new PrintWriter(System.Console.Out);
            }
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1]));

            pipeline.Annotate(annotation);
            // Load lines of file as TokenSequencePatterns
            IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>();

            foreach (string line in ObjectBank.GetLineIterator(rules))
            {
                TokenSequencePattern pattern = TokenSequencePattern.Compile(line);
                tokenSequencePatterns.Add(pattern);
            }
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                @out.Println("Sentence #" + ++i);
                @out.Print("  Tokens:");
                foreach (CoreLabel token in tokens)
                {
                    @out.Print(' ');
                    @out.Print(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag"));
                }
                @out.Println();
                MultiPatternMatcher <ICoreMap>           multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns);
                IList <ISequenceMatchResult <ICoreMap> > answers      = multiMatcher.FindNonOverlapping(tokens);
                int j = 0;
                foreach (ISequenceMatchResult <ICoreMap> matched in answers)
                {
                    @out.Println("  Match #" + ++j);
                    for (int k = 0; k <= matched.GroupCount(); k++)
                    {
                        @out.Println("    group " + k + " = " + matched.Group(k));
                    }
                }
            }
            @out.Flush();
        }
示例#26
0
        public ParserAnnotator(string annotatorName, Properties props)
        {
            string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc);

            if (model == null)
            {
                throw new ArgumentException("No model specified for Parser annotator " + annotatorName);
            }
            this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false);
            string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags"));
            this.parser            = LoadModel(model, Verbose, flags);
            this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1);
            string treeMapClass = props.GetProperty(annotatorName + ".treemap");

            if (treeMapClass == null)
            {
                this.treeMap = null;
            }
            else
            {
                this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props);
            }
            this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1);
            this.kBest        = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1);
            this.keepPunct    = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true);
            string buildGraphsProperty = annotatorName + ".buildgraphs";

            if (!this.parser.GetTLPParams().SupportsBasicDependencies())
            {
                if (PropertiesUtils.GetBool(props, buildGraphsProperty))
                {
                    log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies");
                }
                this.BuildGraphs = false;
            }
            else
            {
                this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true);
            }
            if (this.BuildGraphs)
            {
                bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false);
                parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies);
                ITreebankLanguagePack tlp         = parser.GetTLPParams().TreebankLanguagePack();
                IPredicate <string>   punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter();
                this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder());
            }
            else
            {
                this.gsf = null;
            }
            this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1));
            bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props);

            this.saveBinaryTrees   = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary);
            this.noSquash          = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false);
            this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }
示例#27
0
        /// <summary>A debugging method to try entity linking sentences from the console.</summary>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,entitymentions,entitylink");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            IOUtils.Console("sentence> ", null);
        }
示例#28
0
 public virtual IAnnotator GetParser()
 {
     if (parserProcessor == null)
     {
         parserProcessor = StanfordCoreNLP.GetExistingAnnotator("parse");
         System.Diagnostics.Debug.Assert((parserProcessor != null));
     }
     return(parserProcessor);
 }
示例#29
0
 public void StanfordCore(string jarRoot = @"..\..\models")
 {
     var props = new java.util.Properties();
     props.setProperty("annotators", "tokenize, ssplit, pos, lemma");
     // props.setProperty("ner.useSUTime", "0"); 
     var curDir = Environment.CurrentDirectory;
     Directory.SetCurrentDirectory(jarRoot);
     pipeline = new StanfordCoreNLP(props);
     Directory.SetCurrentDirectory(curDir);
 }
示例#30
0
        public virtual void TestFromCoreMapCrashCheck()
        {
            StanfordCoreNLP pipeline = new StanfordCoreNLP(new _Properties_107());
            Annotation      ann      = new Annotation("This is a sentence.");

            pipeline.Annotate(ann);
            ICoreMap map = ann.Get(typeof(CoreAnnotations.SentencesAnnotation))[0];

            new Sentence(map);
        }
        /// <summary>A debugging method to try relation extraction from the console.</summary>
        /// <exception cref="System.IO.IOException">If any IO problem</exception>
        public static void Main(string[] args)
        {
            Properties props = StringUtils.ArgsToProperties(args);

            props.SetProperty("annotators", "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,coref,kbp");
            props.SetProperty("regexner.mapping", "ignorecase=true,validpospattern=^(NN|JJ).*,edu/stanford/nlp/models/kbp/regexner_caseless.tab;edu/stanford/nlp/models/kbp/regexner_cased.tab");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            IOUtils.Console("sentence> ", null);
        }
示例#32
0
        private static Annotation TestAnnoation(string text, string[] args)
        {
            Annotation      document = new Annotation(text);
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP corenlp  = new StanfordCoreNLP(props);

            corenlp.Annotate(document);
            Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator hcoref = new Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator(props);
            hcoref.Annotate(document);
            return(document);
        }
 /// <summary>Populates options from StanfordCoreNLP pipeline.</summary>
 public static AnnotationOutputter.Options GetOptions(StanfordCoreNLP pipeline)
 {
     AnnotationOutputter.Options options = new AnnotationOutputter.Options();
     options.relationsBeam          = pipeline.GetBeamPrintingOption();
     options.constituentTreePrinter = pipeline.GetConstituentTreePrinter();
     options.encoding           = pipeline.GetEncoding();
     options.printSingletons    = pipeline.GetPrintSingletons();
     options.beamPrintingOption = pipeline.GetBeamPrintingOption();
     options.pretty             = pipeline.GetPrettyPrint();
     options.includeText        = pipeline.GetIncludeText();
     return(options);
 }
        public static ProcessedEntity Classify(this StanfordCoreNLP nlp, string source)
        {
            CoreDocument document = new CoreDocument(source);

            nlp.annotate(document);

            return(document.sentences()
                   .toArray()
                   .OfType <CoreSentence>()
                   .Select(s => new ParsedSentence(s))
                   .Aggregate(new ProcessedEntity(), (r, s) => ProcessedEntity.Union(r, s.ToProcessedEntity())));
        }
        private Annotation PrepareAnnotation()
        {
            var props = new Properties();
            props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger");
            props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz");
            props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz");
            props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
            props.setProperty("sutime.binders", "0");
            props.setProperty("ner.useSUTime", "0");

            var pipeline = new StanfordCoreNLP(props);
            var annotatedText = new Annotation(text);
            pipeline.annotate(annotatedText);
            return annotatedText;
        }
        /// <summary>
        /// Creates the StanfordCoreNLP instance, which annotates chunks of text. We then use the
        /// annotations to perform our analysis. The pipeline can be configured with various annotators;
        /// we use only the ones we need, for perf reasons.
        /// </summary>
        public StanfordCoreNLP GetPipeline()
        {
            var modelFolder = ConfigurationManager.AppSettings["CoreNLP.ModelDirectory"];

            ValidateModelFolder(modelFolder);

            //Switch the current directory momentarily so Pipeline can find the models. 
            //Wish there was a better way.
            var currentDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(modelFolder);

            var props = GetPipelineProperties();
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(currentDir); //Switch it back.
          
            return pipeline;
        }
        public StanfordLemmatizer()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.6.0-models.jar`
            var jarRoot = @"C:\Work\NLP\Stanford\stanford-corenlp-full-2015-12-09\stanford-corenlp-3.6.0-models";
            _separator = Guid.NewGuid().ToString();

            // Text for processing
            // Annotation pipeline configuration
            var props = new Properties();
            //props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(jarRoot);
            _pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);
        }
示例#38
0
文件: NLP.cs 项目: gbolinder/OTAP
 public static void Start(string modelLocation = null)
 {
     var curDir = Environment.CurrentDirectory;
     if (!string.IsNullOrEmpty(modelLocation))
     {
         _modelLocation = modelLocation;
     }
     try
     {
         // Annotation pipeline configuration
         var props = new Properties();
         props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
         props.setProperty("sutime.binders", "false");
         props.setProperty("ner.useSUTime", "false");
         // We should change current directory, so StanfordCoreNLP could find all the model files automatically
         Directory.SetCurrentDirectory(HostingEnvironment.MapPath(ModelLocation));
         pipeline = new StanfordCoreNLP(props);
     }
     finally
     {
         Directory.SetCurrentDirectory(curDir);
     }
 }
        /// <summary>
        /// Initializes this instance.
        /// </summary>
        private void Initialize()
        {
            if (pipeline == null)
            {
                // Todo: How to get this ourselves
                //var jarRoot = @"C:\Users\karlbuha\Documents\Visual Studio 2012\Projects\ServiceMe\RestServiceV1\NLPModules\";
                var jarRoot = ConfigurationManager.AppSettings["NlpModulePath"];
                //var jarRoot = @"F:\sitesroot\0\bin\NlpModules\";

                // Annotation pipeline configuration
                var props = new Properties();
                props.setProperty("annotators", "tokenize, ssplit, pos, lemma");
                props.setProperty("sutime.binders", "0");
                props.setProperty("ner.useSUTime", "false");

                // We should change current directory, so StanfordCoreNLP could find all the model files automatically
                var curDir = Environment.CurrentDirectory;
                Directory.SetCurrentDirectory(jarRoot);
                NlpProvider.pipeline = new StanfordCoreNLP(props);
                Directory.SetCurrentDirectory(curDir);

                NlpProvider.relevantPos = ConfigurationManager.AppSettings["NlpFos"].Split(new string[] { "," }, StringSplitOptions.RemoveEmptyEntries).ToList();
            }
        }
        //使用nlp將文章分析後回傳key
        private List<string> nlp(string sentence)
        {
            List<string> return_key = new List<string>();
            string Relay_file = ".\\xml";
            string Relay_name = "Relay.xml";
            string Relay_path = Relay_file+ "\\" + Relay_name;

            // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar`
            var jarRoot = @"stanford-corenlp-3.5.2-models\";

            // Annotation pipeline configuration
            var props = new java.util.Properties();
            props.setProperty("ner.useSUTime", "false");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            System.IO.Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            System.IO.Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(sentence);
            pipeline.annotate(annotation);

            //輸出nlp分析結果至Relay.xml
            FileOutputStream os = new FileOutputStream(new File(Relay_file, Relay_name));
            pipeline.xmlPrint(annotation, os);
            os.close();

            //呼叫ner將單字組合為有意義的key組裝
            foreach(string k in ner(Relay_path))
            {
                return_key.Add(k);
            }

            return return_key;
        }