private static void RunTest(string input, string[] expected, string[] sentenceFinalPuncWords, bool whitespaceTokenize) { IList <string> results = new List <string>(); DocumentPreprocessor document = new DocumentPreprocessor(new BufferedReader(new StringReader(input))); if (sentenceFinalPuncWords != null) { document.SetSentenceFinalPuncWords(sentenceFinalPuncWords); } if (whitespaceTokenize) { document.SetTokenizerFactory(null); document.SetSentenceDelimiter("\n"); } foreach (IList <IHasWord> sentence in document) { results.Add(SentenceUtils.ListToString(sentence)); } NUnit.Framework.Assert.AreEqual("Should be " + expected.Length + " sentences but got " + results.Count + ": " + results, expected.Length, results.Count); for (int i = 0; i < results.Count; ++i) { NUnit.Framework.Assert.AreEqual("Failed on sentence " + i, expected[i], results[i]); } }
public virtual void TestPlainTextIterator() { string test = "This is a one line test . \n"; string[] expectedResults = new string[] { "This", "is", "a", "one", "line", "test", "." }; DocumentPreprocessor document = new DocumentPreprocessor(new BufferedReader(new StringReader(test))); document.SetTokenizerFactory(null); document.SetSentenceDelimiter("\n"); IEnumerator <IList <IHasWord> > iterator = document.GetEnumerator(); // we test twice because this call should not eat any text NUnit.Framework.Assert.IsTrue(iterator.MoveNext()); NUnit.Framework.Assert.IsTrue(iterator.MoveNext()); IList <IHasWord> words = iterator.Current; NUnit.Framework.Assert.AreEqual(expectedResults.Length, words.Count); for (int i = 0; i < expectedResults.Length; ++i) { NUnit.Framework.Assert.AreEqual(expectedResults[i], words[i].Word()); } // we test twice to make sure we don't blow up on multiple calls NUnit.Framework.Assert.IsFalse(iterator.MoveNext()); NUnit.Framework.Assert.IsFalse(iterator.MoveNext()); try { iterator.Current; throw new AssertionError("iterator.next() should have blown up"); } catch (NoSuchElementException) { } // yay, this is what we want // just in case NUnit.Framework.Assert.IsFalse(iterator.MoveNext()); }