Exemple #1
0
        public void EnglishTextPreprocessor_Preprocess()
        {
            // arrange
            var preprocessor = new TextPreprocessor(
                new EnglishSimpleTokenizer(),
                new EnglishStopwords(),
                new EnglishSimpleNormalizer(),
                new EnglishPorterStemmer());
            var text =
                @"Jack London was born on January 12, 1876.  By age 30 London was internationally famous for his books
        Call of the Wild (1903), The Sea Wolf (1904) and other literary and journalistic accomplishments.";

            // act
            var dict = preprocessor.Preprocess(text);

            // assert
            Assert.AreEqual(dict.Count, 15);
            Assert.AreEqual(dict[0], "jack");
            Assert.AreEqual(dict[1], "london");
            Assert.AreEqual(dict[2], "born");
            Assert.AreEqual(dict[3], "januari");
            Assert.AreEqual(dict[4], "ag");
            Assert.AreEqual(dict[5], "london");
            Assert.AreEqual(dict[6], "internation");
            Assert.AreEqual(dict[7], "famou");
            Assert.AreEqual(dict[8], "book");
            Assert.AreEqual(dict[9], "wild");
            Assert.AreEqual(dict[10], "sea");
            Assert.AreEqual(dict[11], "wolf");
            Assert.AreEqual(dict[12], "literari");
            Assert.AreEqual(dict[13], "journalist");
            Assert.AreEqual(dict[14], "accomplish");
        }
        public void NoReplacements()
        {
            var processed = pp.Preprocess(testString);

            Assert.AreEqual(testString, processed, "Empty replacement list should not change string!");
        }