/// <summary> /// 异步抓取页面HTML代码 /// </summary> /// <param name="webClient">Web客户端</param> /// <param name="onCrawlData">抓取回调事件</param> /// <param name="TmphRequest">URI请求信息</param> public static void Crawl(TmphWebClient webClient, Action<byte[]> onCrawlData, TmphRequest TmphRequest) { TmphDataCrawler TmphDataCrawler = TmphTypePool<TmphDataCrawler>.Pop(); if (TmphDataCrawler == null) { try { TmphDataCrawler = new TmphDataCrawler(); } catch (Exception error) { TmphLog.Default.Add(error, null, false); onCrawlData(null); return; } } TmphDataCrawler.webClient = webClient; TmphDataCrawler.onCrawlData = onCrawlData; TmphDataCrawler.TmphRequest = TmphRequest; TmphDataCrawler.crawl(); }
/// <summary> /// 错误处理 /// </summary> /// <param name="error">异常信息</param> /// <param name="TmphRequest">请求信息</param> private void onError(Exception error, TmphRequest TmphRequest) { if (TmphRequest.IsErrorOut) { TmphLog.Default.Add(error, (TmphRequest.IsErrorOutUri ? TmphRequest.Uri.AbsoluteUri : null) + " 抓取失败", !TmphRequest.IsErrorOutUri); } }
/// <summary> /// 数据解压缩 /// </summary> /// <param name="data">原始数据</param> /// <param name="TmphRequest">请求信息</param> /// <returns>解压缩数据</returns> private byte[] deCompress(byte[] data, TmphRequest TmphRequest) { TmphStream compressionStream = this.compressionStream; if (compressionStream != null) { try { return compressionStream.GetDeCompress(data).ToArray(); } catch (Exception error) { onError(error, TmphRequest); return null; } } return data; }
/// <summary> /// 异步抓取页面HTML代码 /// </summary> /// <param name="onCrawlHtml">异步事件</param> /// <param name="TmphRequest">URI请求信息</param> /// <param name="encoding">页面编码</param> /// <returns>页面HTML代码,失败返回null</returns> public void CrawlHtml(Action<string> onCrawlHtml, TmphRequest TmphRequest, Encoding encoding) { CrawlData(new TmphHtmlCrawler { WebClient = this, OnCrawlHtml = onCrawlHtml, Encoding = encoding }.onCrawlData, TmphRequest); }
/// <summary> /// 抓取页面HTML代码 /// </summary> /// <param name="TmphRequest">URI请求信息</param> /// <param name="encoding">页面编码</param> /// <returns>页面HTML代码,失败返回null</returns> public string CrawlHtml(TmphRequest TmphRequest, Encoding encoding) { return TmphChineseEncoder.ToString(CrawlData(TmphRequest), encoding ?? TextEncoding); }
/// <summary> /// 抓取页面字节流 /// </summary> /// <param name="onCrawlData">异步事件</param> /// <param name="TmphRequest">URI请求信息</param> public void CrawlData(Action<byte[]> onCrawlData, TmphRequest TmphRequest) { TmphDataCrawler.Crawl(this, onCrawlData, TmphRequest); }
/// <summary> /// 抓取页面字节流 /// </summary> /// <param name="TmphRequest">URI请求信息</param> /// <returns>页面字节流,失败返回null</returns> public byte[] CrawlData(TmphRequest TmphRequest) { if (TmphRequest.Uri != null) { try { Headers.Add(TmphHeader.UserAgent, UserAgent); Headers.Add(TmphHeader.Referer, TmphRequest.RefererUrl == null || TmphRequest.RefererUrl.Length == 0 ? TmphRequest.Uri.AbsoluteUri : TmphRequest.RefererUrl); return deCompress( TmphRequest.Form == null ? DownloadData(TmphRequest.Uri) : UploadValues(TmphRequest.Uri, Web.TmphHttp.TmphMethodType.POST.ToString(), TmphRequest.Form), TmphRequest); } catch (Exception error) { onError(error, TmphRequest); } } return null; }
/// <summary> /// 将网页保存到文件 /// </summary> /// <param name="TmphRequest">URI请求信息</param> /// <param name="fileName">保存文件名</param> /// <returns>是否保存成功</returns> public bool SaveFile(TmphRequest TmphRequest, string fileName) { if (TmphRequest.Uri != null && fileName != null) { try { Headers.Add(TmphHeader.UserAgent, UserAgent); Headers.Add(TmphHeader.Referer, TmphRequest.RefererUrl == null || TmphRequest.RefererUrl.Length == 0 ? TmphRequest.Uri.AbsoluteUri : TmphRequest.RefererUrl); DownloadFile(TmphRequest.Uri, fileName); return true; } catch (Exception error) { if (TmphRequest.IsErrorOut) { TmphLog.Default.Add(error, (TmphRequest.IsErrorOutUri ? TmphRequest.Uri.AbsoluteUri : null) + " 抓取失败", !TmphRequest.IsErrorOutUri); } } } return false; }