override public void PROCESSOR(BotCycle bc) { int _MaxDownloadedFileLength = Bot.Properties.Web.Default.MaxDownloadedFileLength; if (!Download) { Bot.Properties.Web.Default.MaxDownloadedFileLength = 0; } HttpRoutine hr = new HttpRoutine(); bool rc = hr.GetPage(Url); Bot.Properties.Web.Default.MaxDownloadedFileLength = _MaxDownloadedFileLength; if (!rc) { if (hr.Status == WebRoutineStatus.UNACCEPTABLE_CONTENT_TYPE) { return; } if (hr.HWResponse.StatusCode == System.Net.HttpStatusCode.NotFound) { FileWriter.This.WriteLine(ParentLink.Url, Url); } //site2boken_urls[item.Site.Url] = site2boken_urls[item.Site.Url] + "\n" + item.Url; else { throw new ProcessorException(ProcessorExceptionType.RESTORE_AS_NEW, "Could not get: " + Url); } return; } if (Download) { get_links(Depth + 1, hr, bc); } }
/// <summary> /// 发送Http数据 /// </summary> /// <param name="url"></param> /// <param name="callBack"></param> /// <param name="isPost"></param> /// <param name="json"></param> public void SendData(string url, HttpSendDataCallBack callBack, bool isPost = false, Dictionary <string, object> dic = null) { Debug.Log("从池中获取Http访问器"); HttpRoutine http = GameEntry.Pool.SpawnClassObject <HttpRoutine>(); http.SendData(url, callBack, isPost, dic); }
static public void get_links(int depth2, HttpRoutine hr, BotCycle bc) { if (depth2 > Bot.Properties.Spider.Default.MaxDownloadLinkDepth) { return; } string domain = Spider.GetDomain(hr.ResponseUrl); int page_count = domain2page_count[domain]; if (Bot.Properties.Spider.Default.MaxPageCountPerSite > -1 && page_count >= Bot.Properties.Spider.Default.MaxPageCountPerSite) { return; } AgileSpider ags = new AgileSpider(hr.ResponseUrl, hr.HtmlResult); List <WebLink> wls = ags.GetWebLinks(WebLinkType.Anchor | WebLinkType.Area | WebLinkType.Form | WebLinkType.MetaTag | WebLinkType.Frame | WebLinkType.Image | WebLinkType.Javascript); List <WebLink> beyond_domain_web_links; wls = Spider.GetSpiderableLinks(ags.BaseUri, wls, out beyond_domain_web_links); bool download = true; if (depth2 >= Bot.Properties.Spider.Default.MaxDownloadLinkDepth) { download = false; } foreach (WebLink wl in wls) { bc.Add(new Link(url: wl.Url, depth: depth2, download: download)); page_count++; if (Bot.Properties.Spider.Default.MaxPageCountPerSite > -1 && Bot.Properties.Spider.Default.MaxPageCountPerSite <= page_count) { Log.Warning(domain + " reached MaxPageCountPerSite: " + Bot.Properties.Spider.Default.MaxPageCountPerSite.ToString()); break; } } domain2page_count[domain] = page_count; foreach (WebLink wl in beyond_domain_web_links) { bc.Add(new Link(url: wl.Url, depth: depth2, download: false)); } }
override public void CycleBeginning() { //IR = new IeRoutine(((IeRoutineBotThreadControl)BotThreadControl.GetInstanceForThisThread()).Browser); //IR.UseCache = false; HR = new HttpRoutine(); }