public void EnglishTextPreprocessor_Preprocess() { // arrange var preprocessor = new TextPreprocessor( new EnglishSimpleTokenizer(), new EnglishStopwords(), new EnglishSimpleNormalizer(), new EnglishPorterStemmer()); var text = @"Jack London was born on January 12, 1876. By age 30 London was internationally famous for his books Call of the Wild (1903), The Sea Wolf (1904) and other literary and journalistic accomplishments."; // act var dict = preprocessor.Preprocess(text); // assert Assert.AreEqual(dict.Count, 15); Assert.AreEqual(dict[0], "jack"); Assert.AreEqual(dict[1], "london"); Assert.AreEqual(dict[2], "born"); Assert.AreEqual(dict[3], "januari"); Assert.AreEqual(dict[4], "ag"); Assert.AreEqual(dict[5], "london"); Assert.AreEqual(dict[6], "internation"); Assert.AreEqual(dict[7], "famou"); Assert.AreEqual(dict[8], "book"); Assert.AreEqual(dict[9], "wild"); Assert.AreEqual(dict[10], "sea"); Assert.AreEqual(dict[11], "wolf"); Assert.AreEqual(dict[12], "literari"); Assert.AreEqual(dict[13], "journalist"); Assert.AreEqual(dict[14], "accomplish"); }
public void NoReplacements() { var processed = pp.Preprocess(testString); Assert.AreEqual(testString, processed, "Empty replacement list should not change string!"); }