/// <summary>
        /// Executes Sentiment and EntitiesMentioned analysis.
        /// </summary>
        public IOutcome<AnalysisResult> Analyze(StanfordCoreNLP pipeline, string text)
        {
            //Create annotated document
            Annotation doc = new Annotation(text);
            pipeline.annotate(doc);

            //Validate
            var sentences = doc.get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences == null)           
                return Outcomes.Outcomes
                               .Failure<AnalysisResult>()
                               .WithMessage("No sentences detected.");

            //Analyze
            var result = new AnalysisResult()
            {
                Sentiment = GetSentiment((ArrayList)sentences),
                MentionedEntities = GetMentions(doc)
            };

            return Outcomes.Outcomes
                           .Success<AnalysisResult>()
                           .WithValue(result);
        }
예제 #2
0
        static void Main()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.9.1-models.jar`
            var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2018-10-05\models";

            // Text for processing
            var text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(text);

            pipeline.annotate(annotation);

            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
                stream.close();
            }
        }
        //-------------------------------------------------------------------------------------- Stanford Core NLP -----------------------------------------
        //-- Better for Entity recognition

        public static void buildPipeline(string text)
        {//https://interviewbubble.com/getting-started-with-stanford-corenlp-a-stanford-corenlp-tutorial/
            // Path to the folder with models extracted from `stanford-corenlp-3.7.0-models.jar`
            var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2016-10-31\models";
            // creates a StanfordCoreNLP object, with POS tagging, lemmatization,
            // NER, parsing, and coreference resolution
            Properties props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            // create an empty Annotation just with the given text
            Annotation document = new Annotation(text);

            // run all Annotators on this text
            pipeline.annotate(document);
            //Finished processing the document here
            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(document, new PrintWriter(stream));
                Debug.WriteLine(stream.toString());
                stream.close();
            }
        }
예제 #4
0
        /// <summary>
        /// Gets stem text
        /// </summary>
        /// <param name="text">Text to stem</param>
        /// <returns>Text that is stemmed</returns>
        public string GetStemmedText(string text)
        {
            try
            {
                // Annotation
                var annotation = new Annotation(text);
                _pipeLine.annotate(annotation);

                // Sentence
                ArrayList sentences = annotation.get(_sentencesAnnotation.getClass()) as ArrayList;
                CoreMap   sentence  = sentences.get(0) as CoreMap;

                // Token
                ArrayList tokens = sentence.get(_tokensAnnotation.getClass()) as ArrayList;
                CoreLabel token  = tokens.get(0) as CoreLabel;

                // Lemma
                string lemma = token.get(_lemmaAnnotation.getClass()).ToString();

                return(lemma);
            }
            catch (Exception)
            {
                return(null);
            }
        }
        public void analyseText(string text, out float posScoreSum, out float negScoreSum)
        {
            float posScore, negScore;
            var   annotation = new Annotation(text);

            thePipeline.annotate(annotation);

            posScoreSum = 0;
            negScoreSum = 0;
            var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (Annotation sentence in sentences as ArrayList)
            {
                var tokens = sentence.get(typeof(CoreAnnotations.TokensAnnotation));
                foreach (CoreLabel token in tokens as ArrayList)
                {
                    string word      = token.get(typeof(CoreAnnotations.TextAnnotation)).ToString();
                    string pos       = token.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)).ToString();
                    string wordLemma = token.get(typeof(CoreAnnotations.LemmaAnnotation)).ToString();

                    var basicPOS = getBasicPOS(pos);
                    if (basicPOS != null)
                    {
                        theSWNDictionary.GetScore(wordLemma, basicPOS, out posScore, out negScore);
                        posScoreSum += posScore;
                        negScoreSum += negScore;
                    }
                }
            }
        }
예제 #6
0
        static void Main()
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.7.0-models.jar`
            var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-corenlp-full-2016-10-31\models";

            // Text for processing
            var text = "Kosgi Santosh sent an email to Stanford University. He didn't get a reply.";

            // Annotation pipeline configuration
            var props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse, ner,dcoref");
            props.setProperty("ner.useSUTime", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(text);
            pipeline.annotate(annotation);

            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));
                Console.WriteLine(stream.toString());
                stream.close();
            }
        }
예제 #7
0
        public Tvn.Cosine.Text.Nlp.Document Process(string text)
        {
            lock (syncLock)
            {
                var sentences = new List <Sentence>();
                var tokens    = new List <Token>();

                var annotation = new Annotation(text);
                pipeline.annotate(annotation);

                var sentencesStanford = (java.util.List)annotation.get(typeof(CoreAnnotations.SentencesAnnotation));
                if (sentencesStanford != null && sentencesStanford.size() > 0)
                {
                    for (int i = 0; i < sentencesStanford.size(); ++i)
                    {
                        var sentence       = (CoreMap)sentencesStanford.get(i);
                        var sentiment      = (string)sentence.get(typeof(SentimentClass));
                        var tree           = (Tree)sentence.get(typeof(SentimentAnnotatedTree));
                        var score          = RNNCoreAnnotations.getPredictions(tree).getMatrix().getData();
                        var sentDic        = new Dictionary <Sentiment, double>();
                        var tokensSentence = getTokens((java.util.List)sentence.get(typeof(CoreAnnotations.TokensAnnotation)));
                        var ner            = getNamedEntities(tokensSentence);

                        for (uint s = 0; s < score.Length; ++s)
                        {
                            sentDic[new Sentiment(s, s.ToString())] = score[s];
                        }

                        sentences.Add(new Sentence(sentence.ToString(), tokensSentence, ner, new Sentiment(0, sentiment), sentDic));
                        tokens.AddRange(tokensSentence);
                    }
                }
                return(new Tvn.Cosine.Text.Nlp.Document(text, sentences, tokens));
            }
        }
예제 #8
0
        public Annotation Annotate(string text)
        {
            var annotation = new Annotation(text);

            _pipeline.annotate(annotation);
            return(annotation);
        }
        public async Task <List <MCQuote> > GetQuotes(string data)
        {
            List <MCQuote> quotes = new List <MCQuote>();

            try
            {
                var curDir = Environment.CurrentDirectory;
                Directory.SetCurrentDirectory(_modelPath);

                CoreDocument doc = new CoreDocument(data);

                _pipeline.annotate(doc);

                for (int i = 0; i < doc.quotes().size(); i++)
                {
                    try
                    {
                        CoreQuote q = (CoreQuote)doc.quotes().get(i);
                        quotes.Add(new MCQuote(q));
                    }
                    catch (Exception e)
                    { }
                }

                Directory.SetCurrentDirectory(curDir);
            }
            catch (Exception e)
            {
                throw new SpanishCoreNLPQuoteException(e.Message, e);
            }
            return(quotes);
        }
예제 #10
0
        /*Stem the given word with, return the stemmed word
         */
        public List <string> Stem(string word)
        {
            if (pipeline == null)
            {
                Initial();
            }
            var lemmas = new List <String>();
            // create an empty Annotation just with the given text
            var document = new Annotation(word);

            // run all Annotators on this text
            try
            {
                pipeline.annotate(document);
            }
            catch (Exception)
            {
                return(null);
            }
            // Iterate over all of the sentences found
            var senObj    = new edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation();
            var obj       = document.get(senObj.getClass());
            var tokenObj  = new edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation();
            var lemmaObj  = new edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation();
            var sentences = (ArrayList)obj;

            foreach (CoreMap sentence in sentences)
            {
                // Iterate over all tokens in a sentence
                lemmas.AddRange(from CoreLabel token in (ArrayList)sentence.get(tokenObj.getClass())
                                select(string) token.get(lemmaObj.getClass()));
            }
            return(lemmas);
        }
        /// <summary>
        /// Accepts a document (text) file, lets CoreNLP process (annotate) it
        /// and returns an adapter that allows iterating the output
        ///
        /// also, the text file (normalized) is returned
        /// </summary>
        /// <param name="file"></param>
        /// <returns>returns null if fails</returns>
        public StanfordDocumentFacade Annotate(FileInfo file)
        {
            if (!this.Initialized)
            {
                return(null);
            }

            try {
                string data;
                NormalizeFile(file, out data);
                var annotation = new edu.stanford.nlp.pipeline.Annotation(data);
                _pipeLine.annotate(annotation);
                return(new StanfordDocumentFacade(annotation, data));
            } catch (Exception ex) {
                throw ex;
            }
        }
예제 #12
0
        public static JObject ProcessText(string text)
        {
            var annotation = new Annotation(text);

            using (java.io.StringWriter writer = new java.io.StringWriter())
            {
                pipeline.annotate(annotation);
                pipeline.jsonPrint(annotation, writer);
                return(JObject.Parse(writer.toString()));
            }
        }
예제 #13
0
        public static ProcessedEntity Classify(this StanfordCoreNLP nlp, string source)
        {
            CoreDocument document = new CoreDocument(source);

            nlp.annotate(document);

            return(document.sentences()
                   .toArray()
                   .OfType <CoreSentence>()
                   .Select(s => new ParsedSentence(s))
                   .Aggregate(new ProcessedEntity(), (r, s) => ProcessedEntity.Union(r, s.ToProcessedEntity())));
        }
예제 #14
0
        public string[] BreakIntoWords(string paragraph)
        {
            // create an empty Annotation just with the given text
            Annotation document = new Annotation(paragraph);

            // run all Annotators on this text
            _pipline.annotate(document);

            // these are all the sentences in this document
            // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
            object        obj       = document.get(new CoreAnnotations.SentencesAnnotation().getClass());
            ArrayList     sentences = obj as ArrayList;
            List <string> words     = new List <string>(sentences.size() * 10); // Guess how many words per statement
            int           i         = 0;

            foreach (CoreMap sentence in sentences)
            {
                words.AddRange(_wordBreaker.BreakParagraph(sentence.ToString()));
            }

            return(words.ToArray());
        }
예제 #15
0
        public List <string> TokenizeAndLemmatize(string documentText)
        {
            var annotation = new Annotation(documentText);

            _pipeline.annotate(annotation);

            var ret = new List <string>();

            var tokenKey = ClassLiteral <CoreAnnotations.TokensAnnotation> .Value;
            var lemmaKey = ClassLiteral <CoreAnnotations.LemmaAnnotation> .Value;

            var tokenItems = annotation.get(tokenKey) as ArrayList;

            if (tokenItems == null)
            {
                return(ret);
            }

            ret.AddRange(tokenItems.OfType <CoreLabel>().Select(tmp => (string)tmp.get(lemmaKey)));

            return(ret);
        }
예제 #16
0
        public void Analysis(string path)
        {
            var document = this.ReadAnnotation(path);


            Properties props = new Properties();

            //props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,relation,,mention,coref");



            //var modelsDirectory = jarRoot ;

            // Loading POS Tagger

            //String modPath = @"D:\Tesis2016\Jarvis\Lincoln\Models\";
            //props.put("pos.model", modPath + @"pos-tagger\english-bidirectional-distsim.tagger");
            //props.put("ner.model", modPath + "ner/english.all.3class.distsim.crf.ser.gz");
            //props.put("parse.model", modPath + "lexparser/englishPCFG.ser.gz");
            //props.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            //props.put("sutime.binders", "0");
            //props.put("sutime.rules", modPath + "sutime/defs.sutime.txt, " + modPath + "sutime/english.sutime.txt");
            //props.put("dcoref.demonym", modPath + "dcoref/demonyms.txt");
            //props.put("dcoref.states", modPath + "dcoref/state-abbreviations.txt");
            //props.put("dcoref.animate", modPath + "dcoref/animate.unigrams.txt");
            //props.put("dcoref.inanimate", modPath + "dcoref/inanimate.unigrams.txt");
            //props.put("dcoref.big.gender.number", modPath + "dcoref/gender.data.gz");
            //props.put("dcoref.countries", modPath + "dcoref/countries");
            //props.put("dcoref.states.provinces", modPath + "dcoref/statesandprovinces");
            //props.put("dcoref.singleton.model", modPath + "dcoref/singleton.predictor.ser");
            //props.put("ner.useSUTime", "0");

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");
            props.setProperty("ner.useSUTime", "false");

            var jarRoot = @"D:\Tesis2016\Jarvis\Lincoln\Models";
            var curDir  = Environment.CurrentDirectory;

            System.IO.Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            pipeline.annotate(document);
            System.IO.Directory.SetCurrentDirectory(curDir);
            FileOutputStream os = new FileOutputStream(new File("coreference_output.xml"));

            pipeline.xmlPrint(document, os);
        }
        private Annotation PrepareAnnotation()
        {
            var props = new Properties();
            props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger");
            props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz");
            props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz");
            props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
            props.setProperty("sutime.binders", "0");
            props.setProperty("ner.useSUTime", "0");

            var pipeline = new StanfordCoreNLP(props);
            var annotatedText = new Annotation(text);
            pipeline.annotate(annotatedText);
            return annotatedText;
        }
예제 #18
0
        public int SentiAnalysis(string text)
        {
            // Annotation
            var annotation = new edu.stanford.nlp.pipeline.Annotation(text);

            pipeline.annotate(annotation);

            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.prettyPrint(annotation, new PrintWriter(stream));

                int mainSentiment = 0;
                int longest       = 0;

                String[] sentimentText = { "Very Negative", "Negative", "Neutral", "Positive", "Very Positive" };

                NumberFormat NF = new DecimalFormat("0.0000");

                var sentences = annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()) as ArrayList;

                foreach (CoreMap sentence in sentences)
                {
                    Tree tree = (Tree)sentence.get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree));


                    int sentiment = edu.stanford.nlp.neural.rnn.RNNCoreAnnotations.getPredictedClass(tree);

                    String partText = sentence.ToString();

                    try
                    {
                    }
                    catch (IndexOutOfRangeException e)
                    {
                    }
                    if (partText.Length > longest)
                    {
                        mainSentiment = sentiment;
                        longest       = partText.Length;
                    }

                    return(sentiment);
                }
            }

            return(-1);
        }
예제 #19
0
        private Annotation PrepareAnnotation()
        {
            var props = new Properties();

            props.put("pos.model", modelsDir + "pos-tagger/english-left3words/english-left3words-distsim.tagger");
            props.put("ner.model", modelsDir + "ner/english.conll.4class.distsim.crf.ser.gz");
            props.put("parse.model", modelsDir + "lexparser/englishPCFG.ser.gz");
            props.put("sutime.rules", modelsDir + "sutime/defs.sutime.txt, " + modelsDir + "sutime/english.sutime.txt");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
            props.setProperty("sutime.binders", "0");
            props.setProperty("ner.useSUTime", "0");

            var pipeline      = new StanfordCoreNLP(props);
            var annotatedText = new Annotation(text);

            pipeline.annotate(annotatedText);
            return(annotatedText);
        }
예제 #20
0
        public void Parse(string sentence)
        {
            if (pipeline == null)
            {
                Initial();
            }
            Annotation context = new Annotation(sentence);

            pipeline.annotate(context);
            this.tokens = (ArrayList)context.get(tokenObj.getClass());
            var sentences = (ArrayList)context.get(senObj.getClass());

            foreach (CoreMap sen in sentences)
            {
                this.dependencies = (SemanticGraph)sen.get(depObj.getClass());
                break;
            }
        }
예제 #21
0
        public List <string> Tokenize(string sequence)
        {
            if (sequence == null)
            {
                throw new Exception("Sequence should not be null for tokenizer.");
            }
            if (pipeline == null)
            {
                Initial();
            }
            var document = new Annotation(sequence);

            pipeline.annotate(document);

            var tokens = (ArrayList)document.get(tokenObj.getClass());

            return((from CoreMap token in tokens select token.ToString()).ToList());
        }
예제 #22
0
        private NlpAnalysis AnalyseText(NlpQuery query)
        {
            var analysis = new NlpAnalysis();

            if (IsInitialized)
            {
                var text       = query.Text;
                var annotation = new Annotation(text);
                m_pipeline.annotate(annotation);

                var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation)) as ArrayList;
                if (sentences != null)
                {
                    analysis.Sentences = sentences;
                }
            }
            return(analysis);
        }
        // Sample from https://stanfordnlp.github.io/CoreNLP/coref.html
        static void Main()
        {
            var jarRoot = @"..\..\..\..\data\paket-files\nlp.stanford.edu\stanford-corenlp-full-2018-02-27\models";

            Annotation document = new Annotation("Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
            Properties props    = new Properties();

            props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,mention,coref");
            props.setProperty("ner.useSUTime", "0");

            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            pipeline.annotate(document);

            var corefChainAnnotation    = new CorefCoreAnnotations.CorefChainAnnotation().getClass();
            var sentencesAnnotation     = new CoreAnnotations.SentencesAnnotation().getClass();
            var corefMentionsAnnotation = new CorefCoreAnnotations.CorefMentionsAnnotation().getClass();

            Console.WriteLine("---");
            Console.WriteLine("coref chains");
            var corefChain = document.get(corefChainAnnotation) as Map;

            foreach (CorefChain cc in corefChain.values().toArray())
            {
                Console.WriteLine($"\t{cc}");
            }
            var sentences = document.get(sentencesAnnotation) as ArrayList;

            foreach (CoreMap sentence in sentences.toArray())
            {
                Console.WriteLine("---");
                Console.WriteLine("mentions");
                var corefMentions = sentence.get(corefMentionsAnnotation) as ArrayList;
                foreach (Mention m in corefMentions)
                {
                    Console.WriteLine("\t" + m);
                }
            }
        }
예제 #24
0
        public static IEnumerable <POS> Extract(string text, ref NLPCount count)
        {
            var segment = new List <POS>();

            if (string.IsNullOrEmpty(text))
            {
                return(segment);
            }

            var document = new Annotation(text);

            pipeline.annotate(document);

            var sentencesAnnotation    = new SentencesAnnotation();
            var tokensAnnotation       = new TokensAnnotation();
            var textAnnotation         = new TextAnnotation();
            var partOfSpeechAnnotation = new PartOfSpeechAnnotation();

            java.util.ArrayList sentenceArrayList = (java.util.ArrayList)document.get(sentencesAnnotation.getClass());
            var sentences = sentenceArrayList.toArray();

            count.SentenceCount += sentences.Length;
            for (int i = 0; i < sentences.Length; i++)
            {
                var sentence   = (edu.stanford.nlp.util.CoreMap)sentences[i];
                var tokenArray = ((java.util.ArrayList)sentence.get(tokensAnnotation.getClass()));
                var tokens     = tokenArray.toArray();
                count.WordsPhraseCount += tokens.Length;
                for (int j = 0; j < tokens.Length; j++)
                {
                    var    coreLabel = (edu.stanford.nlp.ling.CoreLabel)tokens[j];
                    string posTag    = (string)coreLabel.get(partOfSpeechAnnotation.getClass());
                    string word      = (string)coreLabel.get(textAnnotation.getClass());
                    if (word.Length <= 100)
                    {
                        segment.Add(new POS()
                        {
                            Text = word, PosTag = posTag
                        });
                    }
                }
            }
            return(segment.ToList());
        }
예제 #25
0
        public Emotion GetEmotion(string text)
        {
            var annotation = new Annotation(text);

            stanfordNLP.annotate(annotation);

            return
                ((annotation.get(sentencesAnnotationClassName) as ArrayList)
                 .toArray().Select(ParseEmotion).FirstOrDefault());

            Emotion ParseEmotion(object s)
            {
                var sentence     = s as Annotation;
                var sentenceTree = sentence.get(emotionAnnotationTreeClassName) as Tree;
                var emotion      = RNNCoreAnnotations.getPredictedClass(sentenceTree);

                return(new Emotion(emotion));
            }
        }
예제 #26
0
        public static IEnumerable <string> FindWords(string text)
        {
            // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar`
            var jarRoot = @"stanford-corenlp-3.4-models\";

            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(text);

            pipeline.annotate(annotation);

            // these are all the sentences in this document
            // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
            var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences == null)
            {
                return(Enumerable.Empty <string>());
            }

            var words = new List <string>();

            foreach (Annotation sentence in sentences as ArrayList)
            {
                words.Add(sentence.toString());
            }

            return(words);
        }
예제 #27
0
        public void CorefTest()
        {
            Annotation document = new Annotation("Barack Obama was born in Hawaii.  He is the president. Obama was elected in 2008.");
            Properties props    = new Properties();

            props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse,coref");
            props.setProperty("ner.useSUTime", "false");

            var curDir = Environment.CurrentDirectory;

            Directory.SetCurrentDirectory(Files.CoreNlp.JarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            pipeline.annotate(document);

            var corefChainAnnotation    = new CorefCoreAnnotations.CorefChainAnnotation().getClass();
            var sentencesAnnotation     = new CoreAnnotations.SentencesAnnotation().getClass();
            var corefMentionsAnnotation = new CorefCoreAnnotations.CorefMentionsAnnotation().getClass();

            Console.WriteLine("---");
            Console.WriteLine("coref chains");
            var corefChain = (Map)document.get(corefChainAnnotation);

            foreach (CorefChain cc in corefChain.values().toArray())
            {
                Console.WriteLine($"\t{cc}");
            }
            var sentences = (ArrayList)document.get(sentencesAnnotation);

            foreach (CoreMap sentence in sentences.toArray())
            {
                Console.WriteLine("---");
                Console.WriteLine("mentions");
                var corefMentions = (ArrayList)sentence.get(corefMentionsAnnotation);
                foreach (Mention m in corefMentions)
                {
                    Console.WriteLine("\t" + m);
                }
            }
        }
        public static string findSentiment(String line)
        {
            //    // Path to models extracted from `stanford-parser-3.5.1-models.jar`
            //    var jarRoot = @"..\..\..\..\paket-files\nlp.stanford.edu\stanford-parser-full-2015-01-30\models\";
            //    var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";
            //var modelsDirectory = @"\edu\stanford\nlp\models";

            var jarRoot = @"..\..\src\stanford-corenlp\";

            //var modelsDirectory = jarRoot + @"\edu\stanford\nlp\models";
            // Loading english PCFG parser from file
            //var lp = LexicalizedParser.loadModel(modelsDirectory + @"\lexparser\englishPCFG.ser.gz");
            //try
            {
                Properties prop = new Properties();
                prop.setProperty("annotators", "tokenize, ssplit, parse, sentiment");

                var curDir = Environment.CurrentDirectory;
                System.IO.Directory.SetCurrentDirectory(jarRoot);

                StanfordCoreNLP pipeline = new StanfordCoreNLP(prop);
                System.IO.Directory.SetCurrentDirectory(curDir);

                String[] polarity = { "Very Negative", "Negative", "Neutral", "Positive", "Very Positive" };
                int      score    = 0;

                if ((line != null) && (line.Length > 0))
                {
                    Annotation annotation = new Annotation(line);
                    pipeline.annotate(annotation);

                    foreach (CoreMap sent in (dynamic)annotation.get(new CoreAnnotations.SentencesAnnotation().getClass()))
                    {
                        Tree tree = (Tree)sent.get(new SentimentCoreAnnotations.AnnotatedTree().getClass());
                        score = RNNCoreAnnotations.getPredictedClass(tree);
                        //Console.WriteLine("The polarity of the satement is "+polarity[score]);
                    }
                }
                return(polarity[score]);
            }
            //catch { return "nothing"; }
        }
예제 #29
0
        public void StanfordCoreNLPForChinese()
        {
            // Text for processing
            var text = "王尼玛跑的很快.";
            // Annotation pipeline configuration
            //var props = new Properties();
            //props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            //props.setProperty("sutime.binders", "0");
            //
            string props = "StanfordCoreNLP-chinese.properties";
            // We should change current directory, so StanfordCoreNLP could find all the model files automatically 
            var curDir = Environment.CurrentDirectory;
            var pipeline = new StanfordCoreNLP(props);
            // Annotation
            var annotation = new Annotation(text);
            pipeline.annotate(annotation);

            var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation));

        }
예제 #30
0
        public static String Parse_Data(String sent, StanfordCoreNLP pipeline1)
        {// extract meaningful words from user query
            // Text for processing
            var text = sent;
            // Annotation
            var annotation = new edu.stanford.nlp.pipeline.Annotation(text);

            pipeline1.annotate(annotation);
            // Result - Pretty Print
            string output;

            using (var stream = new ByteArrayOutputStream())
            {
                pipeline1.prettyPrint(annotation, new PrintWriter(stream));
                System.Console.WriteLine(" it's stanford time ");
                output = stream.toString();
                stream.close();
            }
            return(output);
        }
예제 #31
0
        public static AnnotationObject Annotate(string content)
        {
            // Annotation
            var annotation = new Annotation(content);

            Pipeline.annotate(annotation);

            // Result - Print
            using var stream = new ByteArrayOutputStream();

            Pipeline.jsonPrint(annotation, new PrintWriter(stream));

            //-----
            string serialized   = stream.toString().Replace("\n", "");
            var    deserialized = Newtonsoft.Json.JsonConvert.DeserializeObject <AnnotationObject>(serialized);

            //-----
            stream.close();

            return(deserialized);
        }
예제 #32
0
        public NlpResult DeserializeInput(StanfordCoreNLP pipeline, NlpResult nlpResult, string stringInput)
        {
            // Annotation
            var annotation = new Annotation(stringInput);

            pipeline.annotate(annotation);

            // Result - Pretty Print
            using (var stream = new ByteArrayOutputStream())
            {
                pipeline.jsonPrint(annotation, new PrintWriter(stream));

                _jsonContentProvider.PopulateFromString(nlpResult, stream.toString());

                Debug.WriteLine(stream.toString());

                stream.close();
            }

            return(nlpResult);
        }
        private List <string> Split(string sentence2)
        {
            var jarRoot = "stanford-corenlp-full-2018-10-05/stanford-corenlp-3.9.2-models";



            // Annotation pipeline configuration
            var props = new Properties();

            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;

            System.Console.WriteLine(curDir);
            Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);

            Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(sentence2);

            pipeline.annotate(annotation);

            // these are all the sentences in this document
            // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
            var sentences = annotation.get(typeof(CoreAnnotations.SentencesAnnotation));

            if (sentences == null)
            {
                return(null);;
            }
            foreach (Annotation sentence in sentences as ArrayList)
            {
                System.Console.WriteLine(sentence);
            }
            return(null);
        }
예제 #34
0
        //使用nlp將文章分析後回傳key
        private List<string> nlp(string sentence)
        {
            List<string> return_key = new List<string>();
            string Relay_file = ".\\xml";
            string Relay_name = "Relay.xml";
            string Relay_path = Relay_file+ "\\" + Relay_name;

            // Path to the folder with models extracted from `stanford-corenlp-3.4-models.jar`
            var jarRoot = @"stanford-corenlp-3.5.2-models\";

            // Annotation pipeline configuration
            var props = new java.util.Properties();
            props.setProperty("ner.useSUTime", "false");
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");
            props.setProperty("sutime.binders", "0");

            // We should change current directory, so StanfordCoreNLP could find all the model files automatically
            var curDir = Environment.CurrentDirectory;
            System.IO.Directory.SetCurrentDirectory(jarRoot);
            var pipeline = new StanfordCoreNLP(props);
            System.IO.Directory.SetCurrentDirectory(curDir);

            // Annotation
            var annotation = new Annotation(sentence);
            pipeline.annotate(annotation);

            //輸出nlp分析結果至Relay.xml
            FileOutputStream os = new FileOutputStream(new File(Relay_file, Relay_name));
            pipeline.xmlPrint(annotation, os);
            os.close();

            //呼叫ner將單字組合為有意義的key組裝
            foreach(string k in ner(Relay_path))
            {
                return_key.Add(k);
            }

            return return_key;
        }