public void ProcessTest() { var provider = new DocumentFrequencyProvider(); var token1 = MockRepository.GenerateStub <IToken>(); token1.Stub(t => t.Text) .Return("мама"); var token2 = MockRepository.GenerateStub <IToken>(); token2.Stub(t => t.Text) .Return("мыла"); var token3 = MockRepository.GenerateStub <IToken>(); token3.Stub(t => t.Text) .Return("мама"); provider.ProcessText(new[] { token1, token2, token3 }); Assert.AreEqual(1, provider.CorpusSize); Assert.AreEqual(1, provider.GeDocumentsWithTokenCount(token1)); var testToken = MockRepository.GenerateStub <IToken>(); testToken.Stub(tt => tt.Text) .Return("кот"); Assert.AreEqual(0, provider.GeDocumentsWithTokenCount(testToken)); }
private static void SaveFrequencies(DocumentFrequencyProvider frequencyProvider) { var serializer = new DataContractSerializer(typeof(DocumentFrequencyProvider)); using (var file = new FileStream(DfContainerFile, FileMode.Create)) { serializer.WriteObject(file, frequencyProvider); } }
private static void Main(string[] args) { var htmlCleaner = new HtmlCleaner(new[] { new TelegraphHtmlCleaner() }); var htmlLoader = new HtmlLoader(); var articleProvider = new ArticleProvider(htmlCleaner, htmlLoader); var frequencyProvider = new DocumentFrequencyProvider(); var tokenizer = new Tokenizer(); string[] articleUrls = File.ReadAllLines(ArticlesUrlsFile, Encoding.Unicode); foreach (string articleUrl in articleUrls) { IArticle article = articleProvider.Get(articleUrl); IEnumerable <IToken> tokens = tokenizer.Tokenize(article.Text); frequencyProvider.ProcessText(tokens); } SaveFrequencies(frequencyProvider); }
public void ProcessTestNullTokensExc() { var provider = new DocumentFrequencyProvider(); provider.ProcessText(null); }
public void EmptyProviderTest() { var provider = new DocumentFrequencyProvider(); Assert.AreEqual(0, provider.CorpusSize); }