public async Task MaximumCrawlTime() { TestModule.SetupInMemoryStorage(); // Setup Stopwatch timer; using (var c = new Crawler(new Uri("http://ncrawler.codeplex.com"), new HtmlDocumentProcessor()) { // Custom step to visualize crawl MaximumThreadCount = 10, MaximumCrawlDepth = 10, MaximumCrawlTime = TimeSpan.FromSeconds(2) }) { timer = Stopwatch.StartNew(); // Run await c.CrawlAsync(); timer.Stop(); } // Allow time for gracefull finish Assert.Less(timer.ElapsedMilliseconds, 10000); }
private async void RefreshDirectory() { Entries[SelectedDirectory].Clear(); CrawlerResults.Clear(); SelectedCrawlerResult = null; Loading = Visibility.Visible; var result = await Crawler.CrawlAsync(SelectedDirectory); if (!String.IsNullOrEmpty(result.Error)) { Error = result.Error; CrawlerResults.Clear(); } else { Error = String.Empty; foreach (var item in result.Items) { Entries[SelectedDirectory].Add(item); } CrawlerResults.Clear(); UpdateCrawlerResults(Entries[SelectedDirectory]); } Loading = Visibility.Hidden; }
public async void TestBadAddress() { using (var crawler = new Crawler(baseAddress: "https://lost/fdsjaklfdjsalfdsajlkfjdlsaffdkla;sfjkld")) { Assert.Equal("https://lost/fdsjaklfdjsalfdsajlkfjdlsaffdkla;sfjkld", crawler.BaseAddress); var result = await crawler.CrawlAsync(); Assert.NotNull(result.Error); } }
static void Main(string[] args) { //var crawler = new Crawler(@"C:\Temp"); var task = Crawler.CrawlAsync("http://www.se-radio.net/", @"C:\Temp", 2, false); task.Wait(); foreach (var uri in task.Result) { Console.WriteLine(uri.Key + " - " + uri.Value); } Console.ReadLine(); }
private async Task <IDictionary <string, string> > CrawlAsync(string uri, int nestedLevel, bool isNeedUploadOtherDomens) { return(await Crawler.CrawlAsync(uri, nestedLevel, isNeedUploadOtherDomens, loaderMoq.Object, fileSystemMoq.Object, new HtmlParserFactory(), 10)); }
public async void TestCrawlerAgainstActivePage() { string html = null; // execute async request to crawl base address await retryPolicy.ExecuteAsync(async() => { // get the page content var response = await client.GetAsync(""); if (!response.IsSuccessStatusCode) { throw new Exception($"FAILED WITH STATUS CODE: {response.StatusCode}"); } // read the string content html = await response.Content.ReadAsStringAsync(); }); // load the HTML document // and count the words var document = new HtmlDocument(); document.LoadHtml(html); var endName = "noscript"; var rootNode = document.DocumentNode.Descendants("div") .Where(node => node.GetAttributeValue("class", "").Equals("row")) .First(); using (var crawler = new Crawler(baseAddress: "https://www.crawler-test.com/content/word_count_100_words")) { var result = await crawler.CrawlAsync(rootNode, endName); Assert.NotEmpty(crawler.WordDict); Assert.NotEmpty(crawler.MostFrequent); Assert.Null(result.Error); Assert.NotNull(result.Words); Assert.Equal(5, result.Words.Values.Max()); var sum = 0; foreach (var v in crawler.WordDict.Values) { sum += v; } var key = ""; foreach (var i in crawler.WordDict) { if (i.Value == result.Words.Values.Max()) { key = i.Key; } } Assert.Equal("he", key); Assert.Equal(100, sum); } }