public void Download() { Thread.Sleep(SpiderController.IdleTime()); string filename = Path.GetFileName(this.uri.LocalPath); UriBuilder uri = new UriBuilder(this.uri.AbsoluteUri); string path = SpiderController.DownloadFolder + Regex.Replace(Path.GetDirectoryName(uri.Path), "/", "\\"); if (!path.EndsWith("\\")) { path += "\\"; } if (!Directory.Exists(path)) { Directory.CreateDirectory(path); } using (WebClient client = new WebClient()) { client.DownloadFileAsync(this.uri, path + filename); Log.DownloadedFile(this.uri.AbsoluteUri); } }
private void LoadNextURL() { while (this.URLQueue.Count > 0) { if (threadManager.ThreadList.Count >= SpiderController.MaxThreads) { break; } Url url = new Url(); lock (this.URLQueue) { if (this.URLQueue.Count > 0) { url = this.URLQueue.Dequeue(); } } if (SpiderController.ShouldContinue(url.depth)) { Thread.Sleep(SpiderController.IdleTime()); threadManager.LaunchThread(FetchNewPage, url); } } threadManager.KillThread(); }
private void HandleURL(Url url) { string link = url.uri.AbsoluteUri.ToLower(); if (this.UrlsSeen.Contains(link)) { Log.SkippedThisQueuedURL(link); } else if (SpiderController.UseWhiteList == true && !SpiderController.IsWhiteListedDomain(url.uri.Authority)) { Log.WriteToLog("URL domain not on whitelist", link); } else if (SpiderController.IsExcludedDomain(link)) { Log.SkippedThisExcludedURL(link); } else if (SpiderController.IsExcludedFileType(link)) { Log.SkippedThisExcludedFileType(link); } else if (SpiderController.ShouldDownload(link)) { this.UrlsSeen.Add(link); url.Download(); } else { lock (this.URLQueue) { this.UrlsSeen.Add(link); this.URLQueue.Enqueue(url); } Log.EngueuedURL(link); } }