public void Crawl() { this.RaiseStatusEvent("Crawling started"); this.RaiseProgressEvent(0.0); this.graph = Graph.Reconstruct(this.configuration.OutputPath); var queue = Crawler.CreateQueue(this.graph, this.seeds); while (queue.Count != 0) { var seed = queue.Dequeue().Value; if (this.cancellationToken.IsCancellationRequested) { Trace.TraceInformation("Crawl cancel requested"); break; } var site = new Site(seed, this.configuration); try { string info = string.Format("Crawling {0}", seed.Host); Trace.TraceInformation(info); this.RaiseStatusEvent(info); var policy = this.RetrievePolicy(site); this.RetrieveSitemap(site, policy); if (this.graph.Exists(seed)) { Trace.TraceInformation(string.Format("Seed {0} exists in the graph", seed.Host)); // TO-DO: Rediscover in case if seed info is outdated continue; } var attributes = new Dictionary <string, string>() { { "robots", policy.IsRobots.ToString() }, { "sitemap", policy.IsSitemap.ToString() } }; this.graph.AddVertex(seed, attributes); this.Start(seed, site, policy); this.graph.MarkAsDiscovered(seed); info = string.Format("Crawling {0} completed", seed.Host); Trace.TraceInformation(info); this.RaiseStatusEvent(info); Crawler.UpdateQueue(this.graph, queue); } catch (Exception exception) { Trace.TraceError(string.Format("Failed to crawl {0}. {1}", seed.Host, exception.Message)); // Avoid crawling this host until a bug fix or // functionality implemented (should throw NotImplementedException) this.graph.MarkAsDoNotProcess(seed); } finally { if (!this.configuration.SaveRobotsFile) { File.Delete(site.RobotsPath); } if (this.configuration.SerializeGraph) { // Saving vertices under separate paths this.graph.Persist(this.configuration.OutputPath, true); } // Deleting empty paths for downloaded html files if (site.Configuration.DeleteHtmlAfterScrape && Directory.Exists(site.HtmlDownloadPath)) { Directory.Delete(site.HtmlDownloadPath, true); } site.Serialize(); } } if (this.configuration.SerializeGraph) { // Persisting graph as a whole this.graph.Serialize(this.configuration.GraphFilePath); } this.RaiseStatusEvent("Crawling completed"); this.RaiseProgressEvent(1.0); }