public async void TestJob(IThreadedWebClientJob initialJob)
        {
            Stack <IThreadedWebClientJob> queue = new Stack <IThreadedWebClientJob>();

            queue.Push(initialJob);

            while (queue.Count > 0)
            {
                IThreadedWebClientJob job = queue.Pop();

                using (HttpClient client = new HttpClient(new HttpClientHandler()
                {
                    AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
                }))
                {
                    // Add default headers to the client to simulate
                    // a real browser
                    ScraperHelper.AddHeadersToClient(client);

                    try
                    {
                        CancellationTokenSource cancelToken = new CancellationTokenSource();
                        cancelToken.CancelAfter(new TimeSpan(0, 1, 0, 00));
                        await job.ExecuteDownload(client, cancelToken.Token);

                        List <IThreadedWebClientJob> newJobs;
                        try
                        {
                            newJobs = job.Execute();
                            foreach (var t in newJobs)
                            {
                                queue.Push(t);
                            }
                        }
                        catch (Exception exp)
                        {
                            job.FailedExecute(exp);
                        }
                    }
                    // WebException may be a proxy error
                    catch (WebException exp)
                    {
                        throw;
                    }
                    // Uncaught error
                    catch (Exception exp)
                    {
                        job.FailedDownload(exp);
                    }
                }
            }
        }
Beispiel #2
0
        private async void RunWebDownload(object threadIndexObject)
        {
            int threadIndex = (int)threadIndexObject;

            logger.Trace("Download Thread {0} started", threadIndex);

            DateTime currentTaskStarted;
            Task     currentTask;

            // Run untill asked to shut down
            while (!this.stopThread)
            {
                if (this.DownloaderThreadStatus != null)
                {
                    this.DownloaderThreadStatus(this, threadIndex, true);
                }

                // Done jobs queue full, halt download work
                if (this.doneJobQueue.Count >= this.maxDoneQueue)
                {
                    this.DownloaderThreadJobChanged(this, threadIndex, null);
                    this.waitingForEmpty = true;
                    await Task.Delay(50);

                    continue;
                }
                if (this.waitingForEmpty)
                {
                    this.DownloaderThreadJobChanged(this, threadIndex, null);
                    // If the jobs queue is empty, resume work
                    if (this.doneJobQueue.Count == 0)
                    {
                        this.waitingForEmpty = false;
                    }
                    await Task.Delay(50);

                    continue;
                }

                // Find a proxy to use, if required, if the method
                // returns false, do we have to stop work, and we
                // use proxies, but there are no proxies left to work with.
                WebProxyHolder        proxy;
                IThreadedWebClientJob job;
                try
                {
                    // Dequeue a job.
                    //logger.Trace("Downloader {0} Trying to get a new job.", threadIndex);
                    job = this.Dequeue();
                    if (job != null)
                    {
                        logger.Trace("Downloader {0} got a new job {1}", threadIndex, job);
                        lock (this.jobsInProcessLocker)
                        {
                            this.jobsInProcess++;
                            if (this.JobProcessingChanged != null)
                            {
                                this.JobProcessingChanged(this, this.jobsInProcess);
                            }
                        }

                        if (this.DownloaderThreadJobChanged != null)
                        {
                            this.DownloaderThreadJobChanged(this, threadIndex, job);
                        }

                        WebProxy webProxy = this.HandleAddProxy(out proxy);
                        if (webProxy == null && this.useProxies)
                        {
                            if (this.dieOnProxiesLeft)
                            {
                                logger.Error("Download {0} was unable to find any proxies, shutting down.", threadIndex);
                                // No proxies to use, and we have to use proxies, kill all downloads.
                                this.stopThread    = true;
                                this.jobsInProcess = 0; // Ensure worker is not stuck
                                return;
                            }
                            else
                            {
                                logger.Trace("Download {0} was unable to find any proxies, requeueing the job.", threadIndex);
                                // We just keep running untill we get proxies again.
                                // Requeue the job
                                this.Enqueue(job);
                                // Count down the working jobs
                                lock (this.jobsInProcessLocker)
                                {
                                    this.jobsInProcess--;
                                    if (this.JobProcessingChanged != null)
                                    {
                                        this.JobProcessingChanged(this, this.jobsInProcess);
                                    }
                                }
                                await Task.Delay(50);

                                continue;
                            }
                        }

                        if (webProxy != null && this.useProxies)
                        {
                            logger.Trace("Downloader {0}, got proxy {1}:{2}", threadIndex, webProxy.Address.Host, webProxy.Address.Port);
                        }

                        try
                        {
                            bool run = true;
                            if (job is ITypedTask)
                            {
                                logger.Trace("Downloader {0}'s job {1} requires verification.", threadIndex, job);
                                run = ((ITypedTask)job).Verify();
                            }

                            if (run)
                            {
                                var HttpClientHandler = new HttpClientHandler()
                                {
                                    UseCookies = false,
                                    Proxy      = webProxy,
                                    UseProxy   = this.useProxies,
                                    // For Fiddler debugging
                                    //Proxy = new WebProxy("http://127.0.0.1:8888"),
                                    //UseProxy = true,
                                    AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate
                                };
                                if (job is IHttpClientHandlerTask)
                                {
                                    logger.Trace("Downloader {0}'s job {1} have custom HttpClient.", threadIndex, job);
                                    HttpClientHandler = ((IHttpClientHandlerTask)job).GetHttpClient(webProxy);
                                }
                                using (HttpClient client = new HttpClient(HttpClientHandler))
                                {
                                    // Add default headers to the client to simulate
                                    // a real browser
                                    ScraperHelper.AddHeadersToClient(client);

                                    logger.Trace("Downloader {0} is running job {1}.", threadIndex, job);
                                    CancellationTokenSource cancelToken = new CancellationTokenSource();
                                    var timelimit = new TimeSpan(0, 0, 30);
                                    // Some jobs might requere a bigger timelimit
                                    if (job is IThreadedWebClientLongJob)
                                    {
                                        logger.Debug("Fetching timelimit for downloader {0}'s job {1}.", threadIndex,
                                                     job);
                                        timelimit = ((IThreadedWebClientLongJob)job).GetTimeOut();
                                        logger.Debug("Downloader {0}'s job {1} have set a custome time limit to {2}.",
                                                     threadIndex, job, timelimit);
                                    }

                                    cancelToken.CancelAfter(timelimit);
                                    logger.Trace("Downloader {0} is executing job {1}.", threadIndex, job);
                                    await job.ExecuteDownload(client, cancelToken.Token);

                                    logger.Trace("Downloader {0} is done executing job {1}.", threadIndex, job);

                                    doneJobQueue.Enqueue(job);
                                    if (this.JobDoneInQueueChanged != null)
                                    {
                                        this.JobDoneInQueueChanged(this, this.doneJobQueue.Count);
                                    }

                                    // Vote up good proxy, if have bad votes
                                    if (proxy != null && this.badProxy.ContainsKey(proxy))
                                    {
                                        logger.Trace("Proxy {0} was good, up voting it.", proxy);
                                        if (this.badProxy[proxy] > 0)
                                        {
                                            this.badProxy[proxy]--;
                                        }
                                    }
                                }
                            }
                            else
                            {
                                logger.Trace("Downloader {0}'s job {1} did not verify, reenqueueing the job.", threadIndex, job);
                                // Requery the job
                                this.Enqueue(job);
                                await Task.Delay(10);
                            }
                        }
                        // WebException may be a proxy error
                        catch (HttpRequestException exp)
                        {
                            logger.Warn(exp, "Got web exception while executing downloader {0}'s job {1}.", threadIndex, job);
                            // Handle bad proxy voting
                            this.HandleBadProxy(threadIndex, proxy);
                            // Requeue the job
                            this.HandleBadJob(job, threadIndex);
                        }
                        // Uncaught error
                        catch (Exception exp)
                        {
                            logger.Error(exp, "Got unknown exception while executing downloader {0}'s job {1}.", threadIndex, job);
                            try
                            {
                                logger.Trace(exp, "Downloader {0} is running failed download for job {1}: {2}", threadIndex, job, exp.Message);
                                job.FailedDownload(exp);
                            }
                            catch (Exception exp2)
                            {
                                // Error here, nothing we can do
                            }
                            this.HandleBadJob(job, threadIndex);
                        }

                        lock (this.jobsInProcessLocker)
                        {
                            this.jobsInProcess--;
                            if (this.JobProcessingChanged != null)
                            {
                                this.JobProcessingChanged(this, this.jobsInProcess);
                            }
                        }
                    }
                    else
                    {
                        // Currently no jobs to do
                        if (this.DownloaderThreadJobChanged != null)
                        {
                            this.DownloaderThreadJobChanged(this, threadIndex, null);
                        }
                        await Task.Delay(100);
                    }
                }
                catch (HttpRequestException exp)
                {
                    break; // only no good proxies left
                }
                catch (Exception exp)
                {
                    // Something bad? maby no more proxies?
                }
            }
            // Download thread shutdown
            if (this.DownloaderThreadStatus != null)
            {
                this.DownloaderThreadStatus(this, threadIndex, false);
            }
        }