Пример #1
0
        public virtual void TestMergeChunks()
        {
            // Create 4 sentences
            string     text       = "I have created sentence1.  And then sentence2.  Now sentence3. Finally sentence4.";
            IAnnotator tokenizer  = new TokenizerAnnotator("en");
            IAnnotator ssplit     = new WordsToSentencesAnnotator();
            Annotation annotation = new Annotation(text);

            tokenizer.Annotate(annotation);
            ssplit.Annotate(annotation);
            // Get sentences
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));

            NUnit.Framework.Assert.AreEqual("4 sentence expected", 4, sentences.Count);
            // Merge last 3 into one
            ChunkAnnotationUtils.MergeChunks(sentences, text, 1, 4);
            NUnit.Framework.Assert.AreEqual("2 sentence expected", 2, sentences.Count);
        }
        public virtual void TestNewVersion()
        {
            Annotation ann       = new Annotation(text);
            IAnnotator annotator = new TokenizerAnnotator("en");

            annotator.Annotate(ann);
            IEnumerator <string> it = tokenWords.GetEnumerator();

            foreach (CoreLabel word in ann.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it.Current, word.Word());
            }
            NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it.MoveNext());
            IEnumerator <string> it2 = tokenWords.GetEnumerator();

            foreach (CoreLabel word_1 in ann.Get(typeof(CoreAnnotations.TokensAnnotation)))
            {
                NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it2.Current, word_1.Get(typeof(CoreAnnotations.TextAnnotation)));
            }
            NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it2.MoveNext());
        }