Exemplo n.º 1
0
        public void Download()
        {
            Thread.Sleep(SpiderController.IdleTime());

            string filename = Path.GetFileName(this.uri.LocalPath);

            UriBuilder uri  = new UriBuilder(this.uri.AbsoluteUri);
            string     path = SpiderController.DownloadFolder + Regex.Replace(Path.GetDirectoryName(uri.Path), "/", "\\");

            if (!path.EndsWith("\\"))
            {
                path += "\\";
            }

            if (!Directory.Exists(path))
            {
                Directory.CreateDirectory(path);
            }

            using (WebClient client = new WebClient())
            {
                client.DownloadFileAsync(this.uri, path + filename);
                Log.DownloadedFile(this.uri.AbsoluteUri);
            }
        }
Exemplo n.º 2
0
        private void LoadNextURL()
        {
            while (this.URLQueue.Count > 0)
            {
                if (threadManager.ThreadList.Count >= SpiderController.MaxThreads)
                {
                    break;
                }

                Url url = new Url();

                lock (this.URLQueue)
                {
                    if (this.URLQueue.Count > 0)
                    {
                        url = this.URLQueue.Dequeue();
                    }
                }

                if (SpiderController.ShouldContinue(url.depth))
                {
                    Thread.Sleep(SpiderController.IdleTime());
                    threadManager.LaunchThread(FetchNewPage, url);
                }
            }
            threadManager.KillThread();
        }
Exemplo n.º 3
0
        private void HandleURL(Url url)
        {
            string link = url.uri.AbsoluteUri.ToLower();

            if (this.UrlsSeen.Contains(link))
            {
                Log.SkippedThisQueuedURL(link);
            }
            else if (SpiderController.UseWhiteList == true && !SpiderController.IsWhiteListedDomain(url.uri.Authority))
            {
                Log.WriteToLog("URL domain not on whitelist", link);
            }
            else if (SpiderController.IsExcludedDomain(link))
            {
                Log.SkippedThisExcludedURL(link);
            }
            else if (SpiderController.IsExcludedFileType(link))
            {
                Log.SkippedThisExcludedFileType(link);
            }
            else if (SpiderController.ShouldDownload(link))
            {
                this.UrlsSeen.Add(link);
                url.Download();
            }
            else
            {
                lock (this.URLQueue)
                {
                    this.UrlsSeen.Add(link);
                    this.URLQueue.Enqueue(url);
                }

                Log.EngueuedURL(link);
            }
        }