Пример #1
0
        public void testNormalScenario()
        {
            TagExtractor extractor = new TagExtractor("abstract", 10);

            Assert.AreEqual(extractor.count(), 0);

            LangProfile profile = new LangProfile("en");

            // normal
            extractor.setTag("abstract");
            extractor.add("This is a sample text.");
            profile.update(extractor.closeTag());
            Assert.AreEqual(extractor.count(), 1);
            Assert.AreEqual(profile.n_words[0], 17);  // Thisisasampletext
            Assert.AreEqual(profile.n_words[1], 22);  // _T, Th, hi, ...
            Assert.AreEqual(profile.n_words[2], 17);  // _Th, Thi, his, ...

            // too short
            extractor.setTag("abstract");
            extractor.add("sample");
            profile.update(extractor.closeTag());
            Assert.AreEqual(extractor.count(), 1);

            // other tags
            extractor.setTag("div");
            extractor.add("This is a sample text which is enough long.");
            profile.update(extractor.closeTag());
            Assert.AreEqual(extractor.count(), 1);
        }