public void ProcessTest() { var provider = new DocumentFrequencyProvider(); var token1 = MockRepository.GenerateStub <IToken>(); token1.Stub(t => t.Text) .Return("мама"); var token2 = MockRepository.GenerateStub <IToken>(); token2.Stub(t => t.Text) .Return("мыла"); var token3 = MockRepository.GenerateStub <IToken>(); token3.Stub(t => t.Text) .Return("мама"); provider.ProcessText(new[] { token1, token2, token3 }); Assert.AreEqual(1, provider.CorpusSize); Assert.AreEqual(1, provider.GeDocumentsWithTokenCount(token1)); var testToken = MockRepository.GenerateStub <IToken>(); testToken.Stub(tt => tt.Text) .Return("кот"); Assert.AreEqual(0, provider.GeDocumentsWithTokenCount(testToken)); }
private static void Main(string[] args) { var htmlCleaner = new HtmlCleaner(new[] { new TelegraphHtmlCleaner() }); var htmlLoader = new HtmlLoader(); var articleProvider = new ArticleProvider(htmlCleaner, htmlLoader); var frequencyProvider = new DocumentFrequencyProvider(); var tokenizer = new Tokenizer(); string[] articleUrls = File.ReadAllLines(ArticlesUrlsFile, Encoding.Unicode); foreach (string articleUrl in articleUrls) { IArticle article = articleProvider.Get(articleUrl); IEnumerable <IToken> tokens = tokenizer.Tokenize(article.Text); frequencyProvider.ProcessText(tokens); } SaveFrequencies(frequencyProvider); }
public void ProcessTestNullTokensExc() { var provider = new DocumentFrequencyProvider(); provider.ProcessText(null); }