public virtual void TestMergeChunks() { // Create 4 sentences string text = "I have created sentence1. And then sentence2. Now sentence3. Finally sentence4."; IAnnotator tokenizer = new TokenizerAnnotator("en"); IAnnotator ssplit = new WordsToSentencesAnnotator(); Annotation annotation = new Annotation(text); tokenizer.Annotate(annotation); ssplit.Annotate(annotation); // Get sentences IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); NUnit.Framework.Assert.AreEqual("4 sentence expected", 4, sentences.Count); // Merge last 3 into one ChunkAnnotationUtils.MergeChunks(sentences, text, 1, 4); NUnit.Framework.Assert.AreEqual("2 sentence expected", 2, sentences.Count); }
public virtual void TestNewVersion() { Annotation ann = new Annotation(text); IAnnotator annotator = new TokenizerAnnotator("en"); annotator.Annotate(ann); IEnumerator <string> it = tokenWords.GetEnumerator(); foreach (CoreLabel word in ann.Get(typeof(CoreAnnotations.TokensAnnotation))) { NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it.Current, word.Word()); } NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it.MoveNext()); IEnumerator <string> it2 = tokenWords.GetEnumerator(); foreach (CoreLabel word_1 in ann.Get(typeof(CoreAnnotations.TokensAnnotation))) { NUnit.Framework.Assert.AreEqual("Bung token in new CoreLabel usage", it2.Current, word_1.Get(typeof(CoreAnnotations.TextAnnotation))); } NUnit.Framework.Assert.IsFalse("Too few tokens in new CoreLabel usage", it2.MoveNext()); }