Esempio n. 1
0
        void download_OnDownloadCompleted(object sender, DownloadCompletedEventArgs e)
        {
            string url = (sender as HtmlDownloader).Url;

            if (e.Canceled)
            {
                UpdateGridView(url, SpiderState.取消);
                return;
            }
            if (e.Exception != null)
            {
                UpdateGridView(url, SpiderState.失败);
                MyConsole.AppendLine(string.Format("下载Url:{0}失败,异常原因:{1}" + url, e.Exception.Message));
                return;
            }
            if (e.ResultType == typeof(String))
            {
                IHtmlParser   parser  = (AttachControl as IHtmlSpider).ParserEngin;
                List <string> newUrls = parser.ParseUrl(e.Result.ToString(), _baseForlder, url);
                UrlManager.AddNewUrls(url, newUrls.ToArray());
            }
            else
            {
                //文件存储
                ContentManger.Save(txtBasePath.Text, e.Result as byte[], SaveType.文件, Path.GetFileName(url));
                _downloadFileCount++;
            }
            UpdateGridView(url, SpiderState.已完成);
        }
Esempio n. 2
0
        /// <summary>
        /// 保存内容
        /// </summary>
        /// <param name="forlder">文件根目录</param>
        /// <param name="buffer">缓存内容</param>
        /// <param name="saveType">保存方式</param>
        /// <param name="fileName">文件名称</param>
        public static void Save(string forlder, byte[] buffer, SaveType saveType, string fileName)
        {
            switch (saveType)
            {
            case SaveType.文本内容:
                break;

            case SaveType.文件:
                string path = Path.Combine(forlder, fileName);
                MyConsole.AppendLine(string.Format("正在写入{0}到本地目录{1},写入时间:{2}", fileName, path, DateTime.Now));
                CheckForlder(path);
                using (FileStream stream = new FileStream(path, FileMode.Create))
                {
                    stream.Write(buffer, 0, buffer.Length);
                }
                break;

            case SaveType.表格:
                break;

            case SaveType.数据库:
                break;

            default:
                break;
            }
        }
Esempio n. 3
0
 private void _client_DownloadFileCompleted(object sender, System.ComponentModel.AsyncCompletedEventArgs e)
 {
     try
     {
         if (OnDownloadCompleted != null)
         {
             OnDownloadCompleted(this, new DownloadCompletedEventArgs(null, null, e.Cancelled, e.Error));
         }
     }
     catch (Exception ex)
     {
         MyConsole.AppendLine(string.Format("处理下载完成异常:{0}!时间:{1}", ex.Message, DateTime.Now));
         if (OnDownloadErrored != null)
         {
             OnDownloadErrored(sender, new DownloadErroredEventArgs(_url, ex));
         }
     }
 }
Esempio n. 4
0
 private void _client_DownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e)
 {
     try
     {
         if (OnDownloadCompleted != null)
         {
             OnDownloadCompleted(this, new DownloadCompletedEventArgs(e.Result, e.Result.GetType(), e.Cancelled, e.Error));
         }
     }
     catch (Exception ex)
     {
         MyConsole.AppendLine(string.Format("处理下载完成异常:{0}!时间:{1}", ex.Message, DateTime.Now));
         if (OnDownloadErrored != null)
         {
             OnDownloadErrored(sender, new DownloadErroredEventArgs(_url, ex));
         }
     }
 }
Esempio n. 5
0
        private void btnSpider_Click(object sender, EventArgs e)
        {
            _baseForlder = txtBasePath.Text;
            string[] rootUrl = (AttachControl as IHtmlSpider).RootUrls;;
            if (string.IsNullOrEmpty(_baseForlder) || rootUrl == null || rootUrl.Length == 0)
            {
                return;
            }
            SetControlState(false);

            MyConsole.AppendLine("爬取程序正在启动...");
            //获取参数
            MyConsole.AppendLine("开始收集爬虫需要的参数>>>");

            _limitedCount = int.Parse(txtNum.Text);
            pnlRight.Controls.Add(_chart); //显示报表控件
            MyConsole.AppendSign();
            MyConsole.AppendLine("开始爬取..");

            _startTime              = DateTime.Now; //记录爬取初始时间
            UrlManager.BaseForlder  = _baseForlder;
            UrlManager.LimitedCount = _limitedCount;

            UrlManager.AddNewUrls(rootUrl[0], rootUrl);  //添加根地址

            //设置爬虫的线程个数
            for (int i = 0; i < txtThreadNum.Value; i++)
            {
                //开始爬取
                Thread spiderThead = new Thread(new ThreadStart(delegate
                {
                    while (true)
                    {
                        if (_limitedCount != 0 && _totalCount > _limitedCount)
                        {
                            continue;
                        }
                        if (UrlManager.HasUrl)
                        {
                            string url = UrlManager.PopOneUrl();
                            try
                            {
                                //初始化爬虫下载器
                                HtmlDownloader download       = new HtmlDownloader(url, Properties.Resources.FileFilter, Encoding.UTF8);
                                download.OnDownloadChanged   += download_OnDownloadChanged;
                                download.OnDownloadCompleted += download_OnDownloadCompleted;
                                download.OnDownloadErrored   += download_OnDownloadErrored;
                                if (download.HasResponse)
                                {
                                    MyConsole.AppendLine(string.Format("开始爬取Url:{0},时间:{1}", url, DateTime.Now));
                                    download.DownloadAsync();
                                    UpdateGridView(url, SpiderState.爬取中);
                                }
                                else
                                {
                                    MyConsole.AppendLine(string.Format("爬取Url:{0}失败,异常原因:远程链接失败,时间:{1}", url, DateTime.Now));
                                    UpdateGridView(url, SpiderState.失败);
                                }
                            }
                            catch (Exception ex)
                            {
                                MyConsole.AppendLine(string.Format("爬取Url:{0}失败,异常原因:{1},时间:{2}", url, ex.Message, DateTime.Now));
                                UpdateGridView(url, SpiderState.失败);
                            }
                        }
                    }
                }));
                spiderThead.Name         = "SpiderThead" + i;
                spiderThead.IsBackground = true;
                spiderThead.Start();
            }
        }