public void ProcessTest()
        {
            var provider = new DocumentFrequencyProvider();
            var token1   = MockRepository.GenerateStub <IToken>();

            token1.Stub(t => t.Text)
            .Return("мама");
            var token2 = MockRepository.GenerateStub <IToken>();

            token2.Stub(t => t.Text)
            .Return("мыла");
            var token3 = MockRepository.GenerateStub <IToken>();

            token3.Stub(t => t.Text)
            .Return("мама");

            provider.ProcessText(new[] { token1, token2, token3 });

            Assert.AreEqual(1, provider.CorpusSize);
            Assert.AreEqual(1, provider.GeDocumentsWithTokenCount(token1));
            var testToken = MockRepository.GenerateStub <IToken>();

            testToken.Stub(tt => tt.Text)
            .Return("кот");
            Assert.AreEqual(0, provider.GeDocumentsWithTokenCount(testToken));
        }
Пример #2
0
        private static void SaveFrequencies(DocumentFrequencyProvider frequencyProvider)
        {
            var serializer = new DataContractSerializer(typeof(DocumentFrequencyProvider));

            using (var file = new FileStream(DfContainerFile, FileMode.Create))
            {
                serializer.WriteObject(file, frequencyProvider);
            }
        }
Пример #3
0
        private static void Main(string[] args)
        {
            var htmlCleaner       = new HtmlCleaner(new[] { new TelegraphHtmlCleaner() });
            var htmlLoader        = new HtmlLoader();
            var articleProvider   = new ArticleProvider(htmlCleaner, htmlLoader);
            var frequencyProvider = new DocumentFrequencyProvider();
            var tokenizer         = new Tokenizer();

            string[] articleUrls = File.ReadAllLines(ArticlesUrlsFile, Encoding.Unicode);
            foreach (string articleUrl in articleUrls)
            {
                IArticle             article = articleProvider.Get(articleUrl);
                IEnumerable <IToken> tokens  = tokenizer.Tokenize(article.Text);
                frequencyProvider.ProcessText(tokens);
            }

            SaveFrequencies(frequencyProvider);
        }
        public void ProcessTestNullTokensExc()
        {
            var provider = new DocumentFrequencyProvider();

            provider.ProcessText(null);
        }
        public void EmptyProviderTest()
        {
            var provider = new DocumentFrequencyProvider();

            Assert.AreEqual(0, provider.CorpusSize);
        }