Exemplos de código com StanfordCoreNLP.Annotate em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: OpenIEServlet.cs Projeto: zerouid/Stanford.CoreNLP.NET

 /// <summary>Annotate a document (which is usually just a sentence).</summary>
 public virtual void Annotate(StanfordCoreNLP pipeline, Annotation ann)
 {
     if (ann.Get(typeof(CoreAnnotations.SentencesAnnotation)) == null)
     {
         pipeline.Annotate(ann);
     }
     else
     {
         if (ann.Get(typeof(CoreAnnotations.SentencesAnnotation)).Count == 1)
         {
             ICoreMap sentence = ann.Get(typeof(CoreAnnotations.SentencesAnnotation))[0];
             foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
             {
                 token.Remove(typeof(NaturalLogicAnnotations.OperatorAnnotation));
                 token.Remove(typeof(NaturalLogicAnnotations.PolarityAnnotation));
             }
             sentence.Remove(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation));
             sentence.Remove(typeof(NaturalLogicAnnotations.EntailedSentencesAnnotation));
             sentence.Remove(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
             sentence.Remove(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));
             sentence.Remove(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
             pipeline.Annotate(ann);
         }
     }
 }

Exemplo n.º 2

0

Exibir arquivo

        //test - run from your top javanlp directory to get the files etc.
        public static void Main(string[] args)
        {
            Properties      props           = StringUtils.PropFileToProperties("projects/core/src/edu/stanford/nlp/classify/mood.prop");
            StanfordCoreNLP pipeline        = new StanfordCoreNLP(props);
            Annotation      happyAnnotation = new Annotation("I am so glad this is awesome");

            pipeline.Annotate(happyAnnotation);
            Annotation sadAnnotation = new Annotation("I am so gloomy and depressed");

            pipeline.Annotate(sadAnnotation);
            Annotation bothAnnotation = new Annotation("I am so gloomy gloomy gloomy gloomy glad");

            pipeline.Annotate(bothAnnotation);
        }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ChineseHcorefDemo.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            long   startTime = Runtime.CurrentTimeMillis();
            string text      = "俄罗斯 航空 公司 一 名 官员 在 ９号 说 ， " + "米洛舍维奇 的 儿子 马可·米洛舍维奇 ９号 早上 持 外交 护照 从 俄国 首都 莫斯科 搭机 飞往 中国 大陆 北京 ， " + "可是 就 在 稍后 就 返回 莫斯科 。 " + "这 名 俄国 航空 公司 官员 说 马可 是 因为 护照 问题 而 在 北京 机场 被 中共 遣返 莫斯科 。 " + "北京 机场 方面 的 这 项 举动 清楚 显示 中共 有意 放弃 在 总统 大选 落败 的 前 南斯拉夫 总统 米洛舍维奇 ， "
                               + "因此 他 在 南斯拉夫 受到 民众 厌恶 的 儿子 马可 才 会 在 北京 机场 被 中共 当局 送回 莫斯科 。 " + "马可 持 外交 护照 能够 顺利 搭机 离开 莫斯科 ， 但是 却 在 北京 受阻 ， 可 算是 踢到 了 铁板 。 " + "可是 这 项 消息 和 先前 外界 谣传 中共 当局 准备 提供 米洛舍维奇 和 他 的 家人 安全 庇护所 有 着 很 大 的 出入 ," + " 一般 认为 在 去年 米洛舍维奇 挥兵 攻打 科索沃 境内 阿尔巴尼亚 一 分离主义 分子 的 时候 ， "
                               + "强力 反对 北约 组织 攻击 南斯拉夫 的 中共 ， 会 全力 保护 米洛舍维奇 和 他 的 家人 及 亲信 。 " + "可是 从 ９号 马可 被 送回 莫斯科 一 事 看 起来 ， 中共 很 可能 会 放弃 米洛舍维奇 。";

            args = new string[] { "-props", "edu/stanford/nlp/hcoref/properties/zh-coref-default.properties" };
            Annotation      document = new Annotation(text);
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(document);
            System.Console.Out.WriteLine("---");
            System.Console.Out.WriteLine("coref chains");
            foreach (CorefChain cc in document.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)).Values)
            {
                System.Console.Out.WriteLine("\t" + cc);
            }
            foreach (ICoreMap sentence in document.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                System.Console.Out.WriteLine("---");
                System.Console.Out.WriteLine("mentions");
                foreach (Mention m in sentence.Get(typeof(CorefCoreAnnotations.CorefMentionsAnnotation)))
                {
                    System.Console.Out.WriteLine("\t" + m);
                }
            }
            long endTime = Runtime.CurrentTimeMillis();
            long time    = (endTime - startTime) / 1000;

            System.Console.Out.WriteLine("Running time " + time / 60 + "min " + time % 60 + "s");
        }

Exemplo n.º 4

0

Exibir arquivo

Arquivo: TokensRegexRetokenizeDemo.cs Projeto: zerouid/Stanford.CoreNLP.NET

        private static void RunPipeline(StanfordCoreNLP pipeline, string text, PrintWriter @out)
        {
            Annotation annotation = new Annotation(text);

            pipeline.Annotate(annotation);
            // An Annotation is a Map and you can get and use the various analyses individually.
            @out.Println();
            // The toString() method on an Annotation just prints the text of the Annotation
            // But you can see what is in it with other methods like toShorterString()
            @out.Println("The top level annotation");
            @out.Println(annotation.ToShorterString());
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap sentence in sentences)
            {
                // Print out token annotations
                foreach (CoreLabel token in sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))
                {
                    // Print out words, lemma, ne, and normalized ne
                    string word       = token.Get(typeof(CoreAnnotations.TextAnnotation));
                    string lemma      = token.Get(typeof(CoreAnnotations.LemmaAnnotation));
                    string pos        = token.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                    string ne         = token.Get(typeof(CoreAnnotations.NamedEntityTagAnnotation));
                    string normalized = token.Get(typeof(CoreAnnotations.NormalizedNamedEntityTagAnnotation));
                    @out.Println("token: " + "word=" + word + ", lemma=" + lemma + ", pos=" + pos + ", ne=" + ne + ", normalized=" + normalized);
                }
            }
            @out.Flush();
        }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: TokenizerAnnotatorTest.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        public virtual void TestDefaultNoNLsPipeline()
        {
            string         t      = "Text with \n\n a new \nline.";
            IList <string> tWords = Arrays.AsList("Text", "with", "a", "new", "line", ".");
            Properties     props  = new Properties();

            props.SetProperty("annotators", "tokenize");
            Annotation      ann      = new Annotation(t);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(ann);
            IEnumerator <string> it = tWords.GetEnumerator();

            foreach (CoreLabel word in ann.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it.Current, word.Word());
            }
            NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it.MoveNext());
            IEnumerator <string> it2 = tWords.GetEnumerator();

            foreach (CoreLabel word_1 in ann.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it2.Current, word_1.Get(typeof(CoreAnnotations.TextAnnotation)));
            }
            NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it2.MoveNext());
        }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: TokenizerAnnotatorTest.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        public virtual void TestHyphens()
        {
            string     test  = "Hyphen-ated words should be split except when school-aged-children eat " + "anti-disestablishmentariansm for breakfast at the o-kay choral infront of some explor-o-toriums.";
            Properties props = new Properties();

            props.SetProperty("annotators", "tokenize");
            Annotation      ann      = new Annotation(test);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(ann);
            IList <CoreLabel> toks = ann.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(21, toks.Count);
            Properties props2 = new Properties();

            props2.SetProperty("annotators", "tokenize");
            props2.SetProperty("tokenize.options", "splitHyphenated=true");
            Annotation      ann2      = new Annotation(test);
            StanfordCoreNLP pipeline2 = new StanfordCoreNLP(props2);

            pipeline2.Annotate(ann2);
            IList <CoreLabel> toks2 = ann2.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(27, toks2.Count);
        }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: RelationExtractorAnnotator.cs Projeto: zerouid/Stanford.CoreNLP.NET

 public static void Main(string[] args)
 {
     try
     {
         Properties props = StringUtils.ArgsToProperties(args);
         props.SetProperty("annotators", "tokenize,ssplit,lemma,pos,parse,ner");
         StanfordCoreNLP pipeline = new StanfordCoreNLP();
         string          sentence = "Barack Obama lives in America. Obama works for the Federal Goverment.";
         Annotation      doc      = new Annotation(sentence);
         pipeline.Annotate(doc);
         Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator r = new Edu.Stanford.Nlp.Pipeline.RelationExtractorAnnotator(props);
         r.Annotate(doc);
         foreach (ICoreMap s in doc.Get(typeof(CoreAnnotations.SentencesAnnotation)))
         {
             System.Console.Out.WriteLine("For sentence " + s.Get(typeof(CoreAnnotations.TextAnnotation)));
             IList <RelationMention> rls = s.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
             foreach (RelationMention rl in rls)
             {
                 System.Console.Out.WriteLine(rl.ToString());
             }
         }
     }
     catch (Exception e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }

Exemplo n.º 8

0

Exibir arquivo

        private static void ModifyUsingCoreNLPNER(Annotation doc)
        {
            Properties ann = new Properties();

            ann.SetProperty("annotators", "pos, lemma, ner");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false);

            pipeline.Annotate(doc);
            foreach (ICoreMap sentence in doc.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                IList <EntityMention> entities = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation));
                if (entities != null)
                {
                    IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                    foreach (EntityMention en in entities)
                    {
                        //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType());
                        Span s = en.GetExtent();
                        ICounter <string> allNertagforSpan = new ClassicCounter <string>();
                        for (int i = s.Start(); i < s.End(); i++)
                        {
                            allNertagforSpan.IncrementCount(tokens[i].Ner());
                        }
                        string entityNertag = Counters.Argmax(allNertagforSpan);
                        en.SetType(entityNertag);
                    }
                }
            }
        }

Exemplo n.º 9

0

Exibir arquivo

Arquivo: CustomAnnotationSerializer.cs Projeto: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
            string          file     = props.GetProperty("file");
            string          loadFile = props.GetProperty("loadFile");

            if (loadFile != null && !loadFile.IsEmpty())
            {
                Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false);
                InputStream @is = new FileInputStream(loadFile);
                Pair <Annotation, InputStream> pair = ser.Read(@is);
                pair.second.Close();
                Annotation anno = pair.first;
                System.Console.Out.WriteLine(anno.ToShorterString(StringUtils.EmptyStringArray));
                @is.Close();
            }
            else
            {
                if (file != null && !file.Equals(string.Empty))
                {
                    string     text = IOUtils.SlurpFile(file);
                    Annotation doc  = new Annotation(text);
                    pipeline.Annotate(doc);
                    Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer ser = new Edu.Stanford.Nlp.Pipeline.CustomAnnotationSerializer(false, false);
                    TextWriter os = new TextWriter(new FileOutputStream(file + ".ser"));
                    ser.Write(doc, os).Close();
                    log.Info("Serialized annotation saved in " + file + ".ser");
                }
                else
                {
                    log.Info("usage: CustomAnnotationSerializer [-file file] [-loadFile file]");
                }
            }
        }

Exemplo n.º 10

0

Exibir arquivo

        // static demo class
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                System.Console.Error.WriteLine("TokensRegexMatcher rules file [outFile]");
                return;
            }
            string      rules = args[0];
            PrintWriter @out;

            if (args.Length > 2)
            {
                @out = new PrintWriter(args[2]);
            }
            else
            {
                @out = new PrintWriter(System.Console.Out);
            }
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation = new Annotation(IOUtils.SlurpFileNoExceptions(args[1]));

            pipeline.Annotate(annotation);
            // Load lines of file as TokenSequencePatterns
            IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>();

            foreach (string line in ObjectBank.GetLineIterator(rules))
            {
                TokenSequencePattern pattern = TokenSequencePattern.Compile(line);
                tokenSequencePatterns.Add(pattern);
            }
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                @out.Println("Sentence #" + ++i);
                @out.Print("  Tokens:");
                foreach (CoreLabel token in tokens)
                {
                    @out.Print(' ');
                    @out.Print(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag"));
                }
                @out.Println();
                MultiPatternMatcher <ICoreMap>           multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns);
                IList <ISequenceMatchResult <ICoreMap> > answers      = multiMatcher.FindNonOverlapping(tokens);
                int j = 0;
                foreach (ISequenceMatchResult <ICoreMap> matched in answers)
                {
                    @out.Println("  Match #" + ++j);
                    for (int k = 0; k <= matched.GroupCount(); k++)
                    {
                        @out.Println("    group " + k + " = " + matched.Group(k));
                    }
                }
            }
            @out.Flush();
        }

Exemplo n.º 11

0

Exibir arquivo

        public virtual void TestFromCoreMapCrashCheck()
        {
            StanfordCoreNLP pipeline = new StanfordCoreNLP(new _Properties_107());
            Annotation      ann      = new Annotation("This is a sentence.");

            pipeline.Annotate(ann);
            ICoreMap map = ann.Get(typeof(CoreAnnotations.SentencesAnnotation))[0];

            new Sentence(map);
        }

Exemplo n.º 12

0

Exibir arquivo

        private static Annotation TestAnnoation(string text, string[] args)
        {
            Annotation      document = new Annotation(text);
            Properties      props    = StringUtils.ArgsToProperties(args);
            StanfordCoreNLP corenlp  = new StanfordCoreNLP(props);

            corenlp.Annotate(document);
            Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator hcoref = new Edu.Stanford.Nlp.Pipeline.HybridCorefAnnotator(props);
            hcoref.Annotate(document);
            return(document);
        }

Exemplo n.º 13

0

Exibir arquivo

Arquivo: CoNLLOutputterTest.cs Projeto: zerouid/Stanford.CoreNLP.NET

        public virtual void TestSimpleSentence()
        {
            Annotation      ann      = new Annotation("CoNLL is neat. Better than XML.");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize, ssplit"));

            pipeline.Annotate(ann);
            string actual   = new CoNLLOutputter().Print(ann);
            string expected = "1\tCoNLL\t_\t_\t_\t_\t_\n" + "2\tis\t_\t_\t_\t_\t_\n" + "3\tneat\t_\t_\t_\t_\t_\n" + "4\t.\t_\t_\t_\t_\t_\n" + '\n' + "1\tBetter\t_\t_\t_\t_\t_\n" + "2\tthan\t_\t_\t_\t_\t_\n" + "3\tXML\t_\t_\t_\t_\t_\n" + "4\t.\t_\t_\t_\t_\t_\n"
                              + '\n';

            NUnit.Framework.Assert.AreEqual(expected, actual);
        }

Exemplo n.º 14

0

Exibir arquivo

Arquivo: CoNLLOutputterTest.cs Projeto: zerouid/Stanford.CoreNLP.NET

        public virtual void TestCustomSimpleSentence()
        {
            Annotation      ann        = new Annotation("CoNLL is neat. Better than XML.");
            string          outputKeys = "word,pos";
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "outputFormatOptions", outputKeys));

            pipeline.Annotate(ann);
            string actual   = new CoNLLOutputter(outputKeys).Print(ann);
            string expected = "CoNLL\t_\n" + "is\t_\n" + "neat\t_\n" + ".\t_\n" + '\n' + "Better\t_\n" + "than\t_\n" + "XML\t_\n" + ".\t_\n" + '\n';

            NUnit.Framework.Assert.AreEqual(expected, actual);
        }

Exemplo n.º 15

0

Exibir arquivo

        /// <summary>Reads an annotation from the given filename using the requested input.</summary>
        public static IList <Annotation> GetAnnotations(StanfordCoreNLP tokenizer, SentimentPipeline.Input inputFormat, string filename, bool filterUnknown)
        {
            switch (inputFormat)
            {
            case SentimentPipeline.Input.Text:
            {
                string     text       = IOUtils.SlurpFileNoExceptions(filename);
                Annotation annotation = new Annotation(text);
                tokenizer.Annotate(annotation);
                IList <Annotation> annotations = Generics.NewArrayList();
                foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    Annotation nextAnnotation = new Annotation(sentence.Get(typeof(CoreAnnotations.TextAnnotation)));
                    nextAnnotation.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence));
                    annotations.Add(nextAnnotation);
                }
                return(annotations);
            }

            case SentimentPipeline.Input.Trees:
            {
                IList <Tree> trees;
                if (filterUnknown)
                {
                    trees = SentimentUtils.ReadTreesWithGoldLabels(filename);
                    trees = SentimentUtils.FilterUnknownRoots(trees);
                }
                else
                {
                    MemoryTreebank treebank = new MemoryTreebank("utf-8");
                    treebank.LoadPath(filename, null);
                    trees = new List <Tree>(treebank);
                }
                IList <Annotation> annotations = Generics.NewArrayList();
                foreach (Tree tree in trees)
                {
                    ICoreMap sentence = new Annotation(SentenceUtils.ListToString(tree.Yield()));
                    sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree);
                    IList <ICoreMap> sentences  = Java.Util.Collections.SingletonList(sentence);
                    Annotation       annotation = new Annotation(string.Empty);
                    annotation.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences);
                    annotations.Add(annotation);
                }
                return(annotations);
            }

            default:
            {
                throw new ArgumentException("Unknown format " + inputFormat);
            }
            }
        }

Exemplo n.º 16

0

Exibir arquivo

Arquivo: SUTimePipeline.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        public virtual Annotation Process(string sentence, string dateString, IAnnotator timeAnnotator)
        {
            log.Info("Processing text \"" + sentence + "\" with dateString = " + dateString);
            Annotation anno = new Annotation(sentence);

            if (dateString != null && !dateString.IsEmpty())
            {
                anno.Set(typeof(CoreAnnotations.DocDateAnnotation), dateString);
            }
            pipeline.Annotate(anno);
            timeAnnotator.Annotate(anno);
            return(anno);
        }

Exemplo n.º 17

0

Exibir arquivo

        public virtual void TestFromCoreMapCorrectnessCheck()
        {
            StanfordCoreNLP pipeline = new StanfordCoreNLP(new _Properties_119());
            Annotation      ann      = new Annotation("This is a sentence.");

            pipeline.Annotate(ann);
            ICoreMap map = ann.Get(typeof(CoreAnnotations.SentencesAnnotation))[0];
            Sentence s   = new Sentence(map);

            NUnit.Framework.Assert.AreEqual(ann.Get(typeof(CoreAnnotations.TextAnnotation)), s.Text());
            NUnit.Framework.Assert.AreEqual("This", s.Word(0));
            NUnit.Framework.Assert.AreEqual(5, s.Length());
        }

Exemplo n.º 18

0

Exibir arquivo

        private static void RunSentence(string text, int num_sentences)
        {
            Annotation doc   = new Annotation(text);
            Properties props = PropertiesUtils.AsProperties("annotators", "tokenize,ssplit", "tokenize.language", "en");
            //Annotator annotator = new TokenizerAnnotator("en");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(doc);
            // now check what's up...
            IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            NUnit.Framework.Assert.IsNotNull(sentences);
            NUnit.Framework.Assert.AreEqual(num_sentences, sentences.Count);
        }

Exemplo n.º 19

0

Exibir arquivo

        public virtual void TestSentenceSplitting()
        {
            string text = "Date :\n01/02/2012\nContent :\nSome words are here .\n";
            // System.out.println(text);
            Properties      props     = PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "ssplit.eolonly", "true", "tokenize.whitespace", "true");
            StanfordCoreNLP pipeline  = new StanfordCoreNLP(props);
            Annotation      document1 = new Annotation(text);

            pipeline.Annotate(document1);
            IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));

            // System.out.println("* Num of sentences in text = "+sentences.size());
            // System.out.println("Sentences is " + sentences);
            NUnit.Framework.Assert.AreEqual(4, sentences.Count);
        }

Exemplo n.º 20

0

Exibir arquivo

        public virtual void TestTwoNewlineIsSentenceBreakSettings()
        {
            string          text      = "This is \none sentence\n\nThis is not another.";
            Properties      props     = PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "ssplit.newlineIsSentenceBreak", "two");
            StanfordCoreNLP pipeline  = new StanfordCoreNLP(props);
            Annotation      document1 = new Annotation(text);

            pipeline.Annotate(document1);
            IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));

            NUnit.Framework.Assert.AreEqual(2, sentences.Count);
            // make sure that there are the correct # of tokens (does contain NL tokens)
            IList <CoreLabel> tokens = document1.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(9, tokens.Count);
        }

Exemplo n.º 21

0

Exibir arquivo

        public virtual void TestTokenizeNLsDoesntChangeSsplitResults()
        {
            string          text      = "This is one sentence\n\nThis is not another with default ssplit settings.";
            Properties      props     = PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "tokenize.options", "tokenizeNLs");
            StanfordCoreNLP pipeline  = new StanfordCoreNLP(props);
            Annotation      document1 = new Annotation(text);

            pipeline.Annotate(document1);
            IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));

            NUnit.Framework.Assert.AreEqual(1, sentences.Count);
            // make sure that there are the correct # of tokens
            // (does NOT contain NL tokens)
            IList <CoreLabel> tokens = document1.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(13, tokens.Count);
        }

Exemplo n.º 22

0

Exibir arquivo

        public virtual void TestSpanishDatelineSeparation()
        {
            Properties props = PropertiesUtils.AsProperties("annotators", "tokenize, cleanxml, ssplit", "tokenize.language", "es", "tokenize.options", "tokenizeNLs,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two", "ssplit.boundaryMultiTokenRegex"
                                                            , "/\\*NL\\*/ /\\p{Lu}[-\\p{L}]+/+ ( /,/  /[-\\p{L}]+/+ )? " + "( /,/ /[1-3]?[0-9]/ /\\p{Ll}{3,3}/ )? /=LRB=/ /\\p{Lu}\\p{L}+/ /=RRB=/ /--/");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            NUnit.Framework.Assert.AreEqual(dateLineSpanishTexts.Length, dateLineSpanishTokens.Length, "Bad test data");
            for (int i = 0; i < dateLineSpanishTexts.Length; i++)
            {
                Annotation document1 = new Annotation(dateLineSpanishTexts[i]);
                pipeline.Annotate(document1);
                IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));
                NUnit.Framework.Assert.AreEqual(2, sentences.Count, "For " + dateLineSpanishTexts[i] + " annotation is " + document1);
                IList <CoreLabel> sentenceOneTokens = sentences[0].Get(typeof(CoreAnnotations.TokensAnnotation));
                string            sentenceOne       = SentenceUtils.ListToString(sentenceOneTokens);
                NUnit.Framework.Assert.AreEqual(dateLineSpanishTokens[i], sentenceOne, "Bad tokens in dateline");
            }
        }

Exemplo n.º 23

0

Exibir arquivo

        public virtual void TestKbpSectionMatching()
        {
            Properties props = PropertiesUtils.AsProperties("annotators", "tokenize,cleanxml,ssplit", "tokenize.language", "es", "tokenize.options", "tokenizeNLs,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two", "ssplit.tokenPatternsToDiscard"
                                                            , "\\n,\\*NL\\*", "ssplit.boundaryMultiTokenRegex", "/\\*NL\\*/ /\\p{Lu}[-\\p{L}]+/+ /,/ ( /[-\\p{L}]+/+ /,/ )? " + "/[1-3]?[0-9]/ /\\p{Ll}{3,5}/ /=LRB=/ /\\p{Lu}\\p{L}+/ /=RRB=/ /--/", "clean.xmltags", "headline|text|post", "clean.singlesentencetags"
                                                            , "HEADLINE|AUTHOR", "clean.sentenceendingtags", "TEXT|POST|QUOTE", "clean.turntags", "POST|QUOTE", "clean.speakertags", "AUTHOR", "clean.datetags", "DATE_TIME", "clean.doctypetags", "DOC", "clean.docAnnotations", "docID=doc[id]", "clean.sectiontags"
                                                            , "HEADLINE|POST", "clean.sectionAnnotations", "sectionID=post[id],sectionDate=post[datetime],author=post[author]", "clean.quotetags", "quote", "clean.quoteauthorattributes", "orig_author", "clean.tokenAnnotations", "link=a[href],speaker=post[author],speaker=quote[orig_author]"
                                                            );
            string document = "<doc id=\"SPA_DF_000389_20090909_G00A09SM4\">\n" + "<headline>\n" + "Problema para Activar Restaurar Sistema En Win Ue\n" + "</headline>\n" + "<post author=\"mysecondskin\" datetime=\"2009-09-09T00:00:00\" id=\"p1\">\n" +
                              "hola portalianos tengo un problemita,mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5,he tratado de arreglárselo pero no he podido dar con la solución y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar\n"
                              + "ojala alguien me pueda ayudar\n" + "vale socios\n" + "</post>\n" + "<post author=\"pajenri\" datetime=\"2009-09-09T00:00:00\" id=\"p2\">\n" + "<quote orig_author=\"mysecondskin\">\n" + "hola portalianos tengo un problemita,mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5,he tratado de arreglárselo pero no he podido dar con la solución y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar\n"
                              + "ojala alguien me pueda ayudar\n" + "vale socios\n" + "</quote>\n" + "\n" + "por lo que tengo entendido esa opcion en los win ue vienen eliminadas no desactivadas, asi que para activarla habria que reinstalar un xp limpio no tuneado. como dato es tipico en sistemas tuneados comos el win ue que suceda esto. el restaurador salva mas de lo que se cree. si toy equibocado con la info que alguien me corrija\n"
                              + "</post>\n" + "<post author=\"UnknownCnR\" datetime=\"2009-09-09T00:00:00\" id=\"p3\">\n" + "<a href=\"http://www.sendspace.com/file/54pxbl\">http://www.sendspace.com/file/54pxbl</a>\n" + "\n" + "Con este registro podras activarlo ;)\n"
                              + "</post>\n" + "<post author=\"mysecondskin\" datetime=\"2009-09-11T00:00:00\" id=\"p4\">\n" + "gracias pero de verdad esa solucion no sirve\n" + "</post>\n" + "</doc>\n";

            string[][] sections = new string[][] { new string[] { null, null, "Problema para Activar Restaurar Sistema En Win Ue\n" }, new string[] { "mysecondskin", "2009-09-09T00:00:00", "hola portalianos tengo un problemita , mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5 , he tratado de arreglárselo pero no he podido dar con la solución y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar ojala alguien me pueda ayudar vale socios\n" }, new string[] { "pajenri", "2009-09-09T00:00:00", "(QUOTING: mysecondskin) hola portalianos tengo un problemita , mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5 , he tratado de arreglárselo pero no he podido dar con la solución y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar ojala alguien me pueda ayudar vale socios\n"
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                + "por lo que tengo entendido esa opcion en los win ue vienen eliminadas no desactivadas , asi que para activarla habria que reinstalar un xp limpio no tuneado .\n" + "como dato es tipico en sistemas tuneados comos el win ue que suceda esto .\n"
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                + "el restaurador salva mas de lo que se cree .\n" + "si toy equibocado con la info que alguien me corrija\n" }, new string[] { "UnknownCnR", "2009-09-09T00:00:00", "http://www.sendspace.com/file/54pxbl\n" + "Con este registro podras activarlo ;=RRB=\n" }, new string[] { "mysecondskin", "2009-09-11T00:00:00", "gracias pero de verdad esa solucion no sirve\n" } };
            StanfordCoreNLP pipeline     = new StanfordCoreNLP(props);
            Annotation      testDocument = new Annotation(document);

            pipeline.Annotate(testDocument);
            // check the forum posts
            int num = 0;

            foreach (ICoreMap discussionForumPost in testDocument.Get(typeof(CoreAnnotations.SectionsAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual(sections[num][0], discussionForumPost.Get(typeof(CoreAnnotations.AuthorAnnotation)));
                NUnit.Framework.Assert.AreEqual(sections[num][1], discussionForumPost.Get(typeof(CoreAnnotations.SectionDateAnnotation)));
                StringBuilder sb = new StringBuilder();
                foreach (ICoreMap sentence in discussionForumPost.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    bool sentenceQuoted = (sentence.Get(typeof(CoreAnnotations.QuotedAnnotation)) != null) && sentence.Get(typeof(CoreAnnotations.QuotedAnnotation));
                    System.Console.Error.WriteLine("Sentence " + sentence + " quoted=" + sentenceQuoted);
                    string sentenceAuthor     = sentence.Get(typeof(CoreAnnotations.AuthorAnnotation));
                    string potentialQuoteText = sentenceQuoted ? "(QUOTING: " + sentenceAuthor + ") " : string.Empty;
                    sb.Append(potentialQuoteText);
                    sb.Append(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)).Stream().Map(null).Collect(Collectors.Joining(" ")));
                    sb.Append('\n');
                }
                NUnit.Framework.Assert.AreEqual(sections[num][2], sb.ToString());
                num++;
            }
            NUnit.Framework.Assert.AreEqual(sections.Length, num, "Too few sections");
        }

Exemplo n.º 24

0

Exibir arquivo

Arquivo: OpenIEDemo.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        // static main
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            // Create the Stanford CoreNLP pipeline
            Properties      props    = PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
            // Annotate an example document.
            string text;

            if (args.Length > 0)
            {
                text = IOUtils.SlurpFile(args[0]);
            }
            else
            {
                text = "Obama was born in Hawaii. He is our president.";
            }
            Annotation doc = new Annotation(text);

            pipeline.Annotate(doc);
            // Loop over sentences in the document
            int sentNo = 0;

            foreach (ICoreMap sentence in doc.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                System.Console.Out.WriteLine("Sentence #" + ++sentNo + ": " + sentence.Get(typeof(CoreAnnotations.TextAnnotation)));
                // Print SemanticGraph
                System.Console.Out.WriteLine(sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)).ToString(SemanticGraph.OutputFormat.List));
                // Get the OpenIE triples for the sentence
                ICollection <RelationTriple> triples = sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation));
                // Print the triples
                foreach (RelationTriple triple in triples)
                {
                    System.Console.Out.WriteLine(triple.confidence + "\t" + triple.SubjectLemmaGloss() + "\t" + triple.RelationLemmaGloss() + "\t" + triple.ObjectLemmaGloss());
                }
                // Alternately, to only run e.g., the clause splitter:
                IList <SentenceFragment> clauses = new OpenIE(props).ClausesInSentence(sentence);
                foreach (SentenceFragment clause in clauses)
                {
                    System.Console.Out.WriteLine(clause.parseTree.ToString(SemanticGraph.OutputFormat.List));
                }
                System.Console.Out.WriteLine();
            }
        }

Exemplo n.º 25

0

Exibir arquivo

        public virtual void TestTwoNewlineIsSentenceBreakTokenizeNLs()
        {
            string          text      = "This is \none sentence\n\nThis is not another.";
            Properties      props     = PropertiesUtils.AsProperties("annotators", "tokenize, ssplit", "tokenize.language", "en", "tokenize.options", "tokenizeNLs,invertible,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two");
            StanfordCoreNLP pipeline  = new StanfordCoreNLP(props);
            Annotation      document1 = new Annotation(text);

            pipeline.Annotate(document1);
            IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));

            NUnit.Framework.Assert.AreEqual(2, sentences.Count);
            // make sure that there are the correct # of tokens (does contain NL tokens)
            IList <CoreLabel> tokens = document1.Get(typeof(CoreAnnotations.TokensAnnotation));

            NUnit.Framework.Assert.AreEqual(9, tokens.Count);
            IList <CoreLabel> sentenceTwoTokens = sentences[1].Get(typeof(CoreAnnotations.TokensAnnotation));
            string            sentenceTwo       = SentenceUtils.ListToString(sentenceTwoTokens);

            NUnit.Framework.Assert.AreEqual("This is not another .", sentenceTwo, "Bad tokens in sentence");
        }

Exemplo n.º 26

0

Exibir arquivo

Arquivo: JSONOutputterTest.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        public virtual void TestSimpleDocument()
        {
            Annotation      ann      = new Annotation("JSON is neat. Better than XML.");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(new _Properties_88());

            pipeline.Annotate(ann);
            string actual   = new JSONOutputter().Print(ann);
            string expected = Indent("{\n" + "\t\"sentences\": [\n" + "\t\t{\n" + "\t\t\t\"index\": 0,\n" + "\t\t\t\"tokens\": [\n" + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 1,\n" + "\t\t\t\t\t\"word\": \"JSON\",\n" + "\t\t\t\t\t\"originalText\": \"JSON\",\n"
                                     + "\t\t\t\t\t\"characterOffsetBegin\": 0,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 4,\n" + "\t\t\t\t\t\"before\": \"\",\n" + "\t\t\t\t\t\"after\": \" \"\n" + "\t\t\t\t},\n" + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 2,\n" + "\t\t\t\t\t\"word\": \"is\",\n"
                                     + "\t\t\t\t\t\"originalText\": \"is\",\n" + "\t\t\t\t\t\"characterOffsetBegin\": 5,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 7,\n" + "\t\t\t\t\t\"before\": \" \",\n" + "\t\t\t\t\t\"after\": \" \"\n" + "\t\t\t\t},\n" + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 3,\n"
                                     + "\t\t\t\t\t\"word\": \"neat\",\n" + "\t\t\t\t\t\"originalText\": \"neat\",\n" + "\t\t\t\t\t\"characterOffsetBegin\": 8,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 12,\n" + "\t\t\t\t\t\"before\": \" \",\n" + "\t\t\t\t\t\"after\": \"\"\n" + "\t\t\t\t},\n"
                                     + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 4,\n" + "\t\t\t\t\t\"word\": \".\",\n" + "\t\t\t\t\t\"originalText\": \".\",\n" + "\t\t\t\t\t\"characterOffsetBegin\": 12,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 13,\n" + "\t\t\t\t\t\"before\": \"\",\n"
                                     + "\t\t\t\t\t\"after\": \" \"\n" + "\t\t\t\t}\n" + "\t\t\t]\n" + "\t\t},\n" + "\t\t{\n" + "\t\t\t\"index\": 1,\n" + "\t\t\t\"tokens\": [\n" + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 1,\n" + "\t\t\t\t\t\"word\": \"Better\",\n" + "\t\t\t\t\t\"originalText\": \"Better\",\n"
                                     + "\t\t\t\t\t\"characterOffsetBegin\": 14,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 20,\n" + "\t\t\t\t\t\"before\": \" \",\n" + "\t\t\t\t\t\"after\": \" \"\n" + "\t\t\t\t},\n" + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 2,\n" + "\t\t\t\t\t\"word\": \"than\",\n"
                                     + "\t\t\t\t\t\"originalText\": \"than\",\n" + "\t\t\t\t\t\"characterOffsetBegin\": 21,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 25,\n" + "\t\t\t\t\t\"before\": \" \",\n" + "\t\t\t\t\t\"after\": \" \"\n" + "\t\t\t\t},\n" + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 3,\n"
                                     + "\t\t\t\t\t\"word\": \"XML\",\n" + "\t\t\t\t\t\"originalText\": \"XML\",\n" + "\t\t\t\t\t\"characterOffsetBegin\": 26,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 29,\n" + "\t\t\t\t\t\"before\": \" \",\n" + "\t\t\t\t\t\"after\": \"\"\n" + "\t\t\t\t},\n"
                                     + "\t\t\t\t{\n" + "\t\t\t\t\t\"index\": 4,\n" + "\t\t\t\t\t\"word\": \".\",\n" + "\t\t\t\t\t\"originalText\": \".\",\n" + "\t\t\t\t\t\"characterOffsetBegin\": 29,\n" + "\t\t\t\t\t\"characterOffsetEnd\": 30,\n" + "\t\t\t\t\t\"before\": \"\",\n"
                                     + "\t\t\t\t\t\"after\": \"\"\n" + "\t\t\t\t}\n" + "\t\t\t]\n" + "\t\t}\n" + "\t]\n" + "}");

            NUnit.Framework.Assert.AreEqual(expected, actual);
        }

Exemplo n.º 27

0

Exibir arquivo

        public virtual void TestKbpSpanishWorks()
        {
            Properties props = PropertiesUtils.AsProperties("annotators", "tokenize, cleanxml, ssplit", "tokenize.language", "es", "tokenize.options", "tokenizeNLs,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two", "ssplit.tokenPatternsToDiscard"
                                                            , "\\n,\\*NL\\*", "ssplit.boundaryMultiTokenRegex", "/\\*NL\\*/ /\\p{Lu}[-\\p{L}]+/+ /,/ ( /[-\\p{L}]+/+ /,/ )? " + "/[1-3]?[0-9]/ /\\p{Ll}{3,5}/ /=LRB=/ /\\p{Lu}\\p{L}+/ /=RRB=/ /--/", "clean.xmltags", "headline|text|post", "clean.singlesentencetags"
                                                            , "HEADLINE|AUTHOR", "clean.sentenceendingtags", "TEXT|POST|QUOTE", "clean.turntags", "POST|QUOTE", "clean.speakertags", "AUTHOR", "clean.datetags", "DATE_TIME", "clean.doctypetags", "DOC", "clean.docAnnotations", "docID=doc[id]", "clean.sectiontags"
                                                            , "HEADLINE|POST", "clean.sectionAnnotations", "sectionID=post[id],sectionDate=post[datetime],author=post[author]", "clean.quotetags", "quote", "clean.quoteauthorattributes", "orig_author", "clean.tokenAnnotations", "link=a[href],speaker=post[author],speaker=quote[orig_author]"
                                                            );
            StanfordCoreNLP pipeline  = new StanfordCoreNLP(props);
            Annotation      document1 = new Annotation(kbpSpanishDocument);

            pipeline.Annotate(document1);
            IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));

            for (int i = 0; i < Math.Min(kbpSpanishSentences.Length, sentences.Count); i++)
            {
                ICoreMap sentence     = sentences[i];
                string   sentenceText = SentenceUtils.ListToString(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)));
                NUnit.Framework.Assert.AreEqual(kbpSpanishSentences[i], sentenceText, "Bad sentence #" + i);
            }
            NUnit.Framework.Assert.AreEqual(kbpSpanishSentences.Length, sentences.Count, "Bad total number of sentences");
        }

Exemplo n.º 28

0

Exibir arquivo

        public virtual void TestViaCoreNlp()
        {
            string     testManyTags = " <xml>   <foo>       <bar>This sentence should  " + "   </bar>be invertible.   </foo>   </xml> ";
            Annotation anno         = new Annotation(testManyTags);
            Properties props        = PropertiesUtils.AsProperties("annotators", "tokenize, ssplit, cleanxml", "tokenizer.options", "invertible,ptb3Escaping=true", "cleanxml.xmltags", ".*", "cleanxml.sentenceendingtags", "p", "cleanxml.datetags", string.Empty,
                                                                   "cleanxml.allowflawedxml", "false");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            pipeline.Annotate(anno);
            CheckInvert(anno, testManyTags);
            IList <CoreLabel> annotationLabels = anno.Get(typeof(CoreAnnotations.TokensAnnotation));

            for (int i = 0; i < 3; ++i)
            {
                CheckContext(annotationLabels[i], "xml", "foo", "bar");
            }
            for (int i_1 = 3; i_1 < 5; ++i_1)
            {
                CheckContext(annotationLabels[i_1], "xml", "foo");
            }
        }

Exemplo n.º 29

0

Exibir arquivo

Arquivo: TokensRegexMatcherDemo.cs Projeto: zerouid/Stanford.CoreNLP.NET

        // static main only
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            StanfordCoreNLP pipeline   = new StanfordCoreNLP(PropertiesUtils.AsProperties("annotators", "tokenize,ssplit,pos,lemma,ner"));
            Annotation      annotation = new Annotation("Casey is 21. Sally Atkinson's age is 30.");

            pipeline.Annotate(annotation);
            IList <ICoreMap>             sentences             = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            IList <TokenSequencePattern> tokenSequencePatterns = new List <TokenSequencePattern>();

            string[] patterns = new string[] { "(?$who [ ner: PERSON]+ ) /is/ (?$age [ pos: CD ] )", "(?$who [ ner: PERSON]+ ) /'s/ /age/ /is/ (?$age [ pos: CD ] )" };
            foreach (string line in patterns)
            {
                TokenSequencePattern pattern = TokenSequencePattern.Compile(line);
                tokenSequencePatterns.Add(pattern);
            }
            MultiPatternMatcher <ICoreMap> multiMatcher = TokenSequencePattern.GetMultiPatternMatcher(tokenSequencePatterns);
            int i = 0;

            foreach (ICoreMap sentence in sentences)
            {
                IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                System.Console.Out.WriteLine("Sentence #" + ++i);
                System.Console.Out.Write("  Tokens:");
                foreach (CoreLabel token in tokens)
                {
                    System.Console.Out.Write(' ');
                    System.Console.Out.Write(token.ToShortString("Text", "PartOfSpeech", "NamedEntityTag"));
                }
                System.Console.Out.WriteLine();
                IList <ISequenceMatchResult <ICoreMap> > answers = multiMatcher.FindNonOverlapping(tokens);
                int j = 0;
                foreach (ISequenceMatchResult <ICoreMap> matched in answers)
                {
                    System.Console.Out.WriteLine("  Match #" + ++j);
                    System.Console.Out.WriteLine("    match: " + matched.Group(0));
                    System.Console.Out.WriteLine("      who: " + matched.Group("$who"));
                    System.Console.Out.WriteLine("      age: " + matched.Group("$age"));
                }
            }
        }

Exemplo n.º 30

0

Exibir arquivo

        public virtual void TestDatelineSeparation()
        {
            Properties props = PropertiesUtils.AsProperties("annotators", "tokenize, cleanxml, ssplit", "tokenize.language", "en", "ssplit.newlineIsSentenceBreak", "two", "ssplit.boundaryMultiTokenRegex", "( /\\*NL\\*/ /\\p{Lu}[-\\p{L}]+/+ /,/ ( /[-\\p{L}]+/+ /,/ )? "
                                                            + "/\\p{Lu}\\p{Ll}{2,5}\\.?/ /[1-3]?[0-9]/ /-LRB-/ /\\p{Lu}\\p{L}+/ /-RRB-/ /--/ | " + "/\\*NL\\*/ /\\p{Lu}[-\\p{Lu}]+/+ ( /,/ /[-\\p{L}]+/+ )? /-/ )");
            StanfordCoreNLP pipeline = new StanfordCoreNLP(props);

            NUnit.Framework.Assert.AreEqual(dateLineTexts.Length, dateLineTokens.Length, "Bad test data");
            for (int i = 0; i < dateLineTexts.Length; i++)
            {
                Annotation document1 = new Annotation(dateLineTexts[i]);
                pipeline.Annotate(document1);
                IList <ICoreMap> sentences = document1.Get(typeof(CoreAnnotations.SentencesAnnotation));
                // for (CoreMap sentence : sentences) {
                //   String sentenceText = SentenceUtils.listToString(sentence.get(CoreAnnotations.TokensAnnotation.class));
                //   System.err.println(sentenceText);
                // }
                NUnit.Framework.Assert.AreEqual(2, sentences.Count, "For " + dateLineTexts[i] + " annotation is " + document1);
                IList <CoreLabel> sentenceOneTokens = sentences[0].Get(typeof(CoreAnnotations.TokensAnnotation));
                string            sentenceOne       = SentenceUtils.ListToString(sentenceOneTokens);
                NUnit.Framework.Assert.AreEqual(dateLineTokens[i], sentenceOne, "Bad tokens in dateline");
            }
        }

Exemplos de StanfordCoreNLP.Annotate em C# (CSharp)