/// <summary> /// 保存内容 /// </summary> /// <param name="forlder">文件根目录</param> /// <param name="buffer">缓存内容</param> /// <param name="saveType">保存方式</param> /// <param name="fileName">文件名称</param> public static void Save(string forlder, byte[] buffer, SaveType saveType, string fileName) { switch (saveType) { case SaveType.文本内容: break; case SaveType.文件: string path = Path.Combine(forlder, fileName); MyConsole.AppendLine(string.Format("正在写入{0}到本地目录{1},写入时间:{2}", fileName, path, DateTime.Now)); CheckForlder(path); using (FileStream stream = new FileStream(path, FileMode.Create)) { stream.Write(buffer, 0, buffer.Length); } break; case SaveType.表格: break; case SaveType.数据库: break; default: break; } }
private void _client_DownloadFileCompleted(object sender, System.ComponentModel.AsyncCompletedEventArgs e) { try { if (OnDownloadCompleted != null) { OnDownloadCompleted(this, new DownloadCompletedEventArgs(null, null, e.Cancelled, e.Error)); } } catch (Exception ex) { MyConsole.AppendLine(string.Format("处理下载完成异常:{0}!时间:{1}", ex.Message, DateTime.Now)); if (OnDownloadErrored != null) { OnDownloadErrored(sender, new DownloadErroredEventArgs(_url, ex)); } } }
private void _client_DownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e) { try { if (OnDownloadCompleted != null) { OnDownloadCompleted(this, new DownloadCompletedEventArgs(e.Result, e.Result.GetType(), e.Cancelled, e.Error)); } } catch (Exception ex) { MyConsole.AppendLine(string.Format("处理下载完成异常:{0}!时间:{1}", ex.Message, DateTime.Now)); if (OnDownloadErrored != null) { OnDownloadErrored(sender, new DownloadErroredEventArgs(_url, ex)); } } }
private void btnSpider_Click(object sender, EventArgs e) { _baseForlder = txtBasePath.Text; string[] rootUrl = (AttachControl as IHtmlSpider).RootUrls;; if (string.IsNullOrEmpty(_baseForlder) || rootUrl == null || rootUrl.Length == 0) { return; } SetControlState(false); MyConsole.AppendLine("爬取程序正在启动..."); //获取参数 MyConsole.AppendLine("开始收集爬虫需要的参数>>>"); _limitedCount = int.Parse(txtNum.Text); pnlRight.Controls.Add(_chart); //显示报表控件 MyConsole.AppendSign(); MyConsole.AppendLine("开始爬取.."); _startTime = DateTime.Now; //记录爬取初始时间 UrlManager.BaseForlder = _baseForlder; UrlManager.LimitedCount = _limitedCount; UrlManager.AddNewUrls(rootUrl[0], rootUrl); //添加根地址 //设置爬虫的线程个数 for (int i = 0; i < txtThreadNum.Value; i++) { //开始爬取 Thread spiderThead = new Thread(new ThreadStart(delegate { while (true) { if (_limitedCount != 0 && _totalCount > _limitedCount) { continue; } if (UrlManager.HasUrl) { string url = UrlManager.PopOneUrl(); try { //初始化爬虫下载器 HtmlDownloader download = new HtmlDownloader(url, Properties.Resources.FileFilter, Encoding.UTF8); download.OnDownloadChanged += download_OnDownloadChanged; download.OnDownloadCompleted += download_OnDownloadCompleted; download.OnDownloadErrored += download_OnDownloadErrored; if (download.HasResponse) { MyConsole.AppendLine(string.Format("开始爬取Url:{0},时间:{1}", url, DateTime.Now)); download.DownloadAsync(); UpdateGridView(url, SpiderState.爬取中); } else { MyConsole.AppendLine(string.Format("爬取Url:{0}失败,异常原因:远程链接失败,时间:{1}", url, DateTime.Now)); UpdateGridView(url, SpiderState.失败); } } catch (Exception ex) { MyConsole.AppendLine(string.Format("爬取Url:{0}失败,异常原因:{1},时间:{2}", url, ex.Message, DateTime.Now)); UpdateGridView(url, SpiderState.失败); } } } })); spiderThead.Name = "SpiderThead" + i; spiderThead.IsBackground = true; spiderThead.Start(); } }