Exemplo n.º 1
0
        public static Object search(Object o)
        {
            try
            {
                Hashtable param = (Hashtable)o;

                EngineEntity engine = (EngineEntity)param["engine"];

                engine.Status = "处理中";

                String searchKey = HttpUtility.UrlEncode((String)param["searchKey"], Encoding.UTF8);

                IEngine search = engine.Search;

                Dictionary <String, String> urlMap = new Dictionary <String, String>();

                Int32 timeout = Convert.ToInt32(ConfigService.get("parse.request.timeout")) * 1000;

                String html = String.Empty;

                Int32 unParseURL = 0;

                //出错50次算结束,连续5次没抓到邮箱算结束,大于设置最大邮箱数算结束
                while (!search.isEndPage(html, engine) && engine.PageCnt <= engine.MaxPageCnt && engine.ErrorCnt < 50 && unParseURL < 5)
                {
                    String url = search.getPageUrl(html, engine, searchKey);

                    try
                    {
                        html = WebClientUtils.getHTML(url, timeout);

                        if (!String.Empty.Equals(html))
                        {
                            if (search.addUrl(CommonUtils.getHTMLinkArray(url, html), engine) == 0)
                            {
                                unParseURL++;
                            }
                            else
                            {
                                unParseURL = 0;
                            }
                        }

                        engine.PageCnt = engine.PageCnt + 1;
                    }
                    catch (Exception ex)
                    {
                        engine.ErrorCnt = engine.ErrorCnt + 1;
                        search.error(engine, ex);
                        html = String.Empty;
                    }
                }

                engine.Status = "处理结束";
            }
            catch { }

            return(null);
        }
Exemplo n.º 2
0
        public static Object run(Object o)
        {
            try
            {
                Hashtable param = o as Hashtable;

                String url = Convert.ToString(param["url"]);

                try
                {
                    String keyword = (String)param["type"];

                    parseService.changeStatus(url, Constant.ParseStatus.START);

                    //如果RUNTIME的状态为STOP,则不再解析
                    if (!Constant.RunStatus.STOP.Equals(runtimeSerivce.get(url).RunStatus))
                    {
                        String html = WebClientUtils.getHTML(url, Convert.ToInt32(ConfigService.get("parse.request.timeout")) * 1000);

                        if (!"".Equals(html))
                        {
                            spiderURL(html, url, keyword);
                            spiderMail(html, keyword, url);
                        }
                    }

                    parseService.changeStatus(url, Constant.ParseStatus.SUCCESS);
                }
                catch
                {
                    parseService.changeStatus(url, Constant.ParseStatus.EXCEPTION);
                }
            }
            catch (Exception ex) { logger.Error(ex.Message, ex); }

            return(null);
        }