示例#1
0
        private static void RunAbot(Abot.Crawler.PoliteWebCrawler crawler)
        {
            Abot.Poco.CrawlResult result = crawler.Crawl(new Uri(AddressCrawl));
//            Abot.Poco.CrawlResult result = crawler.Crawl(new Uri("http://video.vnexpress.net/"));
//            Abot.Poco.CrawlResult result = crawler.Crawl(new Uri("http://video.vnexpress.net/"));
            //This is synchronous, it will not go to the next line until the crawl has completed

            if (result.ErrorOccurred)
            {
                Console.WriteLine("Crawl of {0} completed with error: {1}", result.RootUri.AbsoluteUri,
                                  result.ErrorException.Message);
            }
            else
            {
                Console.WriteLine("Crawl of {0} completed without error.", result.RootUri.AbsoluteUri);
            }
            System.Diagnostics.Debug.WriteLine(Videos.Count);
            System.Diagnostics.Debug.WriteLine("STOP");
            var videosOrder     = Videos.OrderByDescending(o => o.Id);
            var serializeObject = Newtonsoft.Json.JsonConvert.SerializeObject(videosOrder,
                                                                              Newtonsoft.Json.Formatting.Indented);

            System.IO.File.WriteAllText(PathSave, serializeObject);
            System.Diagnostics.Debug.Write($"EXPORT: {Videos.Count} Videos at {PathSave}");
        }
示例#2
0
        private static void UseAbot()
        {
            Abot.Poco.CrawlConfiguration configuration = new Abot.Poco.CrawlConfiguration();
            configuration.CrawlTimeoutSeconds  = 100;
            configuration.MaxConcurrentThreads = 10;
            configuration.MaxPagesToCrawl      = 1000;
            configuration.UserAgentString      = "abot v1.0 http://code.google.com/p/abot";

            Abot.Crawler.PoliteWebCrawler crawler = new Abot.Crawler.PoliteWebCrawler(configuration);
            crawler.PageCrawlStartingAsync  += Crawler_PageCrawlStartingAsync;
            crawler.PageCrawlCompletedAsync += Crawler_PageCrawlCompletedAsync;
            RunAbot(crawler);
        }