Exemplo n.º 1
0
        public async Task MaximumCrawlTime()
        {
            TestModule.SetupInMemoryStorage();

            // Setup
            Stopwatch timer;

            using (var c = new Crawler(new Uri("http://ncrawler.codeplex.com"), new HtmlDocumentProcessor())
            {
                // Custom step to visualize crawl
                MaximumThreadCount = 10,
                MaximumCrawlDepth = 10,
                MaximumCrawlTime = TimeSpan.FromSeconds(2)
            })
            {
                timer = Stopwatch.StartNew();

                // Run
                await c.CrawlAsync();

                timer.Stop();
            }

            // Allow time for gracefull finish
            Assert.Less(timer.ElapsedMilliseconds, 10000);
        }
Exemplo n.º 2
0
        private async void RefreshDirectory()
        {
            Entries[SelectedDirectory].Clear();
            CrawlerResults.Clear();
            SelectedCrawlerResult = null;

            Loading = Visibility.Visible;

            var result = await Crawler.CrawlAsync(SelectedDirectory);

            if (!String.IsNullOrEmpty(result.Error))
            {
                Error = result.Error;
                CrawlerResults.Clear();
            }
            else
            {
                Error = String.Empty;
                foreach (var item in result.Items)
                {
                    Entries[SelectedDirectory].Add(item);
                }

                CrawlerResults.Clear();
                UpdateCrawlerResults(Entries[SelectedDirectory]);
            }

            Loading = Visibility.Hidden;
        }
Exemplo n.º 3
0
        public async void TestBadAddress()
        {
            using (var crawler = new Crawler(baseAddress: "https://lost/fdsjaklfdjsalfdsajlkfjdlsaffdkla;sfjkld"))
            {
                Assert.Equal("https://lost/fdsjaklfdjsalfdsajlkfjdlsaffdkla;sfjkld", crawler.BaseAddress);
                var result = await crawler.CrawlAsync();

                Assert.NotNull(result.Error);
            }
        }
Exemplo n.º 4
0
        static void Main(string[] args)
        {
            //var crawler = new Crawler(@"C:\Temp");
            var task = Crawler.CrawlAsync("http://www.se-radio.net/", @"C:\Temp", 2, false);

            task.Wait();

            foreach (var uri in task.Result)
            {
                Console.WriteLine(uri.Key + " - " + uri.Value);
            }

            Console.ReadLine();
        }
Exemplo n.º 5
0
 private async Task <IDictionary <string, string> > CrawlAsync(string uri, int nestedLevel, bool isNeedUploadOtherDomens)
 {
     return(await Crawler.CrawlAsync(uri, nestedLevel, isNeedUploadOtherDomens, loaderMoq.Object, fileSystemMoq.Object, new HtmlParserFactory(), 10));
 }
Exemplo n.º 6
0
        public async void TestCrawlerAgainstActivePage()
        {
            string html = null;

            // execute async request to crawl base address

            await retryPolicy.ExecuteAsync(async() =>
            {
                // get the page content

                var response = await client.GetAsync("");
                if (!response.IsSuccessStatusCode)
                {
                    throw new Exception($"FAILED WITH STATUS CODE: {response.StatusCode}");
                }

                // read the string content

                html = await response.Content.ReadAsStringAsync();
            });

            // load the HTML document
            // and count the words

            var document = new HtmlDocument();

            document.LoadHtml(html);

            var endName  = "noscript";
            var rootNode = document.DocumentNode.Descendants("div")
                           .Where(node => node.GetAttributeValue("class", "").Equals("row"))
                           .First();

            using (var crawler = new Crawler(baseAddress: "https://www.crawler-test.com/content/word_count_100_words"))
            {
                var result = await crawler.CrawlAsync(rootNode, endName);

                Assert.NotEmpty(crawler.WordDict);
                Assert.NotEmpty(crawler.MostFrequent);
                Assert.Null(result.Error);
                Assert.NotNull(result.Words);
                Assert.Equal(5, result.Words.Values.Max());

                var sum = 0;
                foreach (var v in crawler.WordDict.Values)
                {
                    sum += v;
                }

                var key = "";
                foreach (var i in crawler.WordDict)
                {
                    if (i.Value == result.Words.Values.Max())
                    {
                        key = i.Key;
                    }
                }

                Assert.Equal("he", key);
                Assert.Equal(100, sum);
            }
        }