/// <summary> /// 解析导航规则 /// </summary> /// <param name="startingUrl">起始地址</param> /// <returns>导航地址</returns> private StringCollection ParseNavigationRuleItem(StringCollection startingUrl) { StringCollection urls = new StringCollection(); foreach (string u in startingUrl) { foreach (NavigationRule rule in _urlItem.NavigationRules) { /* * 描述: * 加入最终页面地址 * * 步骤: * 1.判断是否终端页面地址,如果是则直接加入并引发事件. * 2.否则,请求web服务器并返回html文本,根据导航规则解析出终端页面地址. * * 修改标志:王亚 201204244 */ if (rule.Terminal) { urls.Add(u); //最终页面直接加入导航地址 if (onSingleComplete != null) { this.onSingleComplete(this, u); //引发增加一条网址事件 } } else { try { HttpHelper http = new HttpHelper(); /* *修改标志 20120601 王亚 解析导航地址时增加Http请求编码 */ http._encoding = Encoding.GetEncoding(_urlItem.UrlEncoding); string htmlText = http.RequestResult(u); //发送Http请求获取导航地址 StringCollection navUrlItem = ParseNavigationRuleHtmlText(rule, htmlText); foreach (string r in navUrlItem) { /* * 处理相对路径网址问题如:/html/gndy/jddy/20120425/37418.html * 如果不包含http://选项,则在相对路径前边加上主机地址。 */ string path = r; if (!r.Contains("http://") && r.Length > 0) { path = r.Insert(0, "http://" + http.WebResponse.ResponseUri.Authority); } urls.Add(path); if (onSingleComplete != null) { this.onSingleComplete(this, path); //引发增加一条网址事件 } } } catch (Exception ex) { if (OnAppendSingileLog != null) { OnAppendSingileLog(this, new LogEventArgs(ex.Message)); } } } } } return urls; }
/// <summary> /// 销毁资源 /// </summary> public void Dispose() { //保存采集结果 SaveResult(); this._HttpHelper = null; this._Results = null; this._TaskConfig = null; }
/// <summary> /// 构造函数 /// </summary> public TaskUnit() { this._HttpHelper = new HttpHelper(Encoding.GetEncoding(this._TaskConfig.UrlListManager.UrlEncoding)); time = new Timer(new TimerCallback(Start), "", Timeout.Infinite, Timeout.Infinite); }
/// <summary> /// 解析导航规则 /// </summary> /// <param name="startingUrl">起始地址</param> /// <returns>导航地址</returns> private StringCollection ParseNavigationRuleItem(StringCollection startingUrl) { StringCollection urls = new StringCollection(); foreach (string u in startingUrl) { foreach (NavigationRule rule in _urlItem.NavigationRules) { /* * 描述: * 加入最终页面地址 * * 步骤: * 1.判断是否终端页面地址,如果是则直接加入并引发事件. * 2.否则,请求web服务器并返回html文本,根据导航规则解析出终端页面地址. * * 修改标志:王亚 201204244 */ if (rule.Terminal) { urls.Add(u); //最终页面直接加入导航地址 if (onSingleComplete != null) { this.onSingleComplete(this, u); //引发增加一条网址事件 } } else { try { HttpHelper http = new HttpHelper(); /* *修改标志 20120601 王亚 解析导航地址时增加Http请求编码 */ http._encoding = Encoding.GetEncoding(_urlItem.UrlEncoding); string htmlText = http.RequestResult(u); //发送Http请求获取导航地址 StringCollection navUrlItem = ParseNavigationRuleHtmlText(rule, htmlText); foreach (string r in navUrlItem) { /* * 处理相对路径网址问题如:/html/gndy/jddy/20120425/37418.html * 如果不包含http://选项,则在相对路径前边加上主机地址。 */ string path = r; if (!r.Contains("http://") && r.Length > 0) { path = r.Insert(0, "http://" + http.WebResponse.ResponseUri.Authority); } urls.Add(path); if (onSingleComplete != null) { this.onSingleComplete(this, path); //引发增加一条网址事件 } } } catch (Exception ex) { if (OnAppendSingileLog != null) { OnAppendSingileLog(this, new LogEventArgs(ex.Message)); } } } } } return(urls); }