Пример #1
0
        /// <summary>
        /// 解析导航规则
        /// </summary>
        /// <param name="startingUrl">起始地址</param>
        /// <returns>导航地址</returns>
        private StringCollection ParseNavigationRuleItem(StringCollection startingUrl)
        {
            StringCollection urls = new StringCollection();
            foreach (string u in startingUrl)
            {
                foreach (NavigationRule rule in _urlItem.NavigationRules)
                {
                    /*
                     * 描述:
                     * 加入最终页面地址
                     * 
                     * 步骤:
                     * 1.判断是否终端页面地址,如果是则直接加入并引发事件.
                     * 2.否则,请求web服务器并返回html文本,根据导航规则解析出终端页面地址.
                     * 
                     * 修改标志:王亚 201204244
                     */
                    if (rule.Terminal)
                    {
                        urls.Add(u);                                //最终页面直接加入导航地址                        
                        if (onSingleComplete != null)
                        {
                            this.onSingleComplete(this, u);         //引发增加一条网址事件
                        }
                    }
                    else
                    {
                        try
                        {
                            HttpHelper http = new HttpHelper();
                            /*
                             *修改标志 20120601 王亚 解析导航地址时增加Http请求编码 
                             */
                            http._encoding = Encoding.GetEncoding(_urlItem.UrlEncoding);
                            string htmlText = http.RequestResult(u);    //发送Http请求获取导航地址
                            StringCollection navUrlItem = ParseNavigationRuleHtmlText(rule, htmlText);
                            foreach (string r in navUrlItem)
                            {
                                /*
                                 * 处理相对路径网址问题如:/html/gndy/jddy/20120425/37418.html
                                 * 如果不包含http://选项,则在相对路径前边加上主机地址。
                                 */
                                string path = r;
                                if (!r.Contains("http://") && r.Length > 0)
                                {
                                    path = r.Insert(0, "http://" + http.WebResponse.ResponseUri.Authority);
                                }
                                urls.Add(path);
                                if (onSingleComplete != null)
                                {
                                    this.onSingleComplete(this, path);    //引发增加一条网址事件
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            if (OnAppendSingileLog != null)
                            {
                                OnAppendSingileLog(this, new LogEventArgs(ex.Message));
                            }
                        }
                    }
                }
            }

            return urls;
        }
Пример #2
0
        /// <summary>
        /// 销毁资源
        /// </summary>
        public void Dispose() {
            //保存采集结果
            SaveResult();

            this._HttpHelper = null;
            this._Results = null;
            this._TaskConfig = null;
        }
Пример #3
0
 /// <summary>
 /// 构造函数
 /// </summary>
 public TaskUnit() {
     this._HttpHelper = new HttpHelper(Encoding.GetEncoding(this._TaskConfig.UrlListManager.UrlEncoding));
     time = new Timer(new TimerCallback(Start), "", Timeout.Infinite, Timeout.Infinite);
 }
Пример #4
0
        /// <summary>
        /// 解析导航规则
        /// </summary>
        /// <param name="startingUrl">起始地址</param>
        /// <returns>导航地址</returns>
        private StringCollection ParseNavigationRuleItem(StringCollection startingUrl)
        {
            StringCollection urls = new StringCollection();

            foreach (string u in startingUrl)
            {
                foreach (NavigationRule rule in _urlItem.NavigationRules)
                {
                    /*
                     * 描述:
                     * 加入最终页面地址
                     *
                     * 步骤:
                     * 1.判断是否终端页面地址,如果是则直接加入并引发事件.
                     * 2.否则,请求web服务器并返回html文本,根据导航规则解析出终端页面地址.
                     *
                     * 修改标志:王亚 201204244
                     */
                    if (rule.Terminal)
                    {
                        urls.Add(u);                                //最终页面直接加入导航地址
                        if (onSingleComplete != null)
                        {
                            this.onSingleComplete(this, u);         //引发增加一条网址事件
                        }
                    }
                    else
                    {
                        try
                        {
                            HttpHelper http = new HttpHelper();

                            /*
                             *修改标志 20120601 王亚 解析导航地址时增加Http请求编码
                             */
                            http._encoding = Encoding.GetEncoding(_urlItem.UrlEncoding);
                            string           htmlText   = http.RequestResult(u); //发送Http请求获取导航地址
                            StringCollection navUrlItem = ParseNavigationRuleHtmlText(rule, htmlText);
                            foreach (string r in navUrlItem)
                            {
                                /*
                                 * 处理相对路径网址问题如:/html/gndy/jddy/20120425/37418.html
                                 * 如果不包含http://选项,则在相对路径前边加上主机地址。
                                 */
                                string path = r;
                                if (!r.Contains("http://") && r.Length > 0)
                                {
                                    path = r.Insert(0, "http://" + http.WebResponse.ResponseUri.Authority);
                                }
                                urls.Add(path);
                                if (onSingleComplete != null)
                                {
                                    this.onSingleComplete(this, path);    //引发增加一条网址事件
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            if (OnAppendSingileLog != null)
                            {
                                OnAppendSingileLog(this, new LogEventArgs(ex.Message));
                            }
                        }
                    }
                }
            }

            return(urls);
        }