コード例 #1
0
    private void DownloadDataCallback(object sender, DownloadDataCompletedEventArgs e)
    {
        try
        {
            // If the request was not canceled and did not throw
            // an exception, display the resource.
            if (!e.Cancelled && e.Error == null)
            {
                var args = new DownloadedEventArgs {
                    uri = e.Uri, data = (byte[])e.result
                };
                DownloadCompleted(this, args)
            }
            else
            {
                var args = new DownloadFailedEventArgs {
                    uri = e.Uri, error = e.Error
                };
                DownloadFailed(this, args);
            }
        }
        catch (Exception err)
        {
            var args = new DownloadFailedEventArgs {
                uri = e.Uri, error = err
            };
            DownloadFailed(this, args);
        }

        TriggerJob();
    }
コード例 #2
0
        public async Task Crawl()
        {
            List <Task> downloadTask = new List <Task>();

            pendingUrls.Enqueue(InitialUrl);


            while (pendingUrls.Count > 0)
            {
                if (!pendingUrls.TryDequeue(out var current))
                {
                    continue;
                }
                ;

                if (CompletedUrls.Count + DownloadingUrls.Count > MaxCount)
                {
                    break;
                }

                //限制并发
                if (MaxParallel > 0 && DownloadingUrls.Count >= MaxParallel)
                {
                    await Task.Delay(100);

                    continue;
                }

                //下载开始事件
                var downEvent = new BeforeDownloadEventArgs()
                {
                    Url       = current,
                    Cancelled = false
                };
                BeforeDownload?.Invoke(this, downEvent);
                if (!downEvent.Cancelled)
                {
                    lock (urlLock)
                    {
                        DownloadingUrls.Add(current);
                    }
                    var down = DownLoad(current, nameIndex++.ToString())
                               .ContinueWith(taks =>
                    {
                        if (taks.IsFaulted)
                        {
                            FailedUrls.Add(current);
                            lock (urlLock)
                            {
                                DownloadingUrls.Remove(current);
                            }
                            return;
                        }
                        CompletedUrls.Add(current);

                        lock (urlLock)
                        {
                            DownloadingUrls.Remove(current);
                        }
                        var ev = new DownloadedEventArgs()
                        {
                            Html             = taks.Result,
                            OverrideUrlParse = false,
                            Url = current
                        };
                        Downloaded?.Invoke(this, ev);
                        //覆盖默认的url解析
                        if (ev.OverrideUrlParse && ev.NextUrls != null)
                        {
                            foreach (var u in ev.NextUrls)
                            {
                                AddPending(u);
                            }
                        }
                        else
                        {
                            Parse(taks.Result, current);//解析,并加入新的链接
                        }
                    });
                    downloadTask.Add(down);
                    Downloading?.Invoke(this, new DownloadingEventArgs()
                    {
                        Url = current
                    });
                }
                if (pendingUrls.Count <= 0)
                {
                    await Task.WhenAny(downloadTask);

                    downloadTask.RemoveAll(t => t.IsCompleted);
                }
            }

            await Task.WhenAll(downloadTask);

            this.CrawlerCompleted?.Invoke(this, new CrawlerCompletedEventArgs()
            {
                FailedCount  = FailedUrls.Count,
                SuccessCount = CompletedUrls.Count
            });
        }