override public void PROCESSOR(BotCycle bc)
            {
                int _MaxDownloadedFileLength = Bot.Properties.Web.Default.MaxDownloadedFileLength;

                if (!Download)
                {
                    Bot.Properties.Web.Default.MaxDownloadedFileLength = 0;
                }
                HttpRoutine hr = new HttpRoutine();
                bool        rc = hr.GetPage(Url);

                Bot.Properties.Web.Default.MaxDownloadedFileLength = _MaxDownloadedFileLength;
                if (!rc)
                {
                    if (hr.Status == WebRoutineStatus.UNACCEPTABLE_CONTENT_TYPE)
                    {
                        return;
                    }
                    if (hr.HWResponse.StatusCode == System.Net.HttpStatusCode.NotFound)
                    {
                        FileWriter.This.WriteLine(ParentLink.Url, Url);
                    }
                    //site2boken_urls[item.Site.Url] = site2boken_urls[item.Site.Url] + "\n" + item.Url;
                    else
                    {
                        throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url);
                    }
                    return;
                }
                if (Download)
                {
                    get_links(Depth + 1, hr, bc);
                }
            }
示例#2
0
        /// <summary>
        /// 发送Http数据
        /// </summary>
        /// <param name="url"></param>
        /// <param name="callBack"></param>
        /// <param name="isPost"></param>
        /// <param name="json"></param>
        public void SendData(string url, HttpSendDataCallBack callBack, bool isPost = false, Dictionary <string, object> dic = null)
        {
            Debug.Log("从池中获取Http访问器");

            HttpRoutine http = GameEntry.Pool.SpawnClassObject <HttpRoutine>();

            http.SendData(url, callBack, isPost, dic);
        }
        static public void get_links(int depth2, HttpRoutine hr, BotCycle bc)
        {
            if (depth2 > Bot.Properties.Spider.Default.MaxDownloadLinkDepth)
            {
                return;
            }

            string domain     = Spider.GetDomain(hr.ResponseUrl);
            int    page_count = domain2page_count[domain];

            if (Bot.Properties.Spider.Default.MaxPageCountPerSite > -1 && page_count >= Bot.Properties.Spider.Default.MaxPageCountPerSite)
            {
                return;
            }

            AgileSpider    ags = new AgileSpider(hr.ResponseUrl, hr.HtmlResult);
            List <WebLink> wls = ags.GetWebLinks(WebLinkType.Anchor | WebLinkType.Area | WebLinkType.Form | WebLinkType.MetaTag | WebLinkType.Frame | WebLinkType.Image | WebLinkType.Javascript);
            List <WebLink> beyond_domain_web_links;

            wls = Spider.GetSpiderableLinks(ags.BaseUri, wls, out beyond_domain_web_links);
            bool download = true;

            if (depth2 >= Bot.Properties.Spider.Default.MaxDownloadLinkDepth)
            {
                download = false;
            }
            foreach (WebLink wl in wls)
            {
                bc.Add(new Link(url: wl.Url, depth: depth2, download: download));
                page_count++;
                if (Bot.Properties.Spider.Default.MaxPageCountPerSite > -1 && Bot.Properties.Spider.Default.MaxPageCountPerSite <= page_count)
                {
                    Log.Warning(domain + " reached MaxPageCountPerSite: " + Bot.Properties.Spider.Default.MaxPageCountPerSite.ToString());
                    break;
                }
            }
            domain2page_count[domain] = page_count;
            foreach (WebLink wl in beyond_domain_web_links)
            {
                bc.Add(new Link(url: wl.Url, depth: depth2, download: false));
            }
        }
示例#4
0
 override public void CycleBeginning()
 {
     //IR = new IeRoutine(((IeRoutineBotThreadControl)BotThreadControl.GetInstanceForThisThread()).Browser);
     //IR.UseCache = false;
     HR = new HttpRoutine();
 }