public void get_links(int depth2, string queue) { if (depth2 > BotWeb.Settings.Spider.MaxDownloadLinkDepth) { return; } string domain = Spider.GetDomain(hr.ResponseUrl); int page_count = ((CustomSession)Session).domain2page_count[domain]; if (BotWeb.Settings.Spider.MaxPageCountPerSite > -1 && page_count >= BotWeb.Settings.Spider.MaxPageCountPerSite) { return; } string queue2 = domain + "-" + depth2.ToString(); if (depth2 > 1) { Session.SetInputItemQueuePositionAfterQueue(queue2, queue);//by default queue name is item type name but it can be different if needed } AgileSpider ags = new AgileSpider(hr.ResponseUrl, hr.HtmlResult); List <WebLink> wls = ags.GetWebLinks(WebLinkType.Anchor | WebLinkType.Area | WebLinkType.Form | WebLinkType.MetaTag | WebLinkType.Frame | WebLinkType.Image | WebLinkType.Javascript); List <WebLink> beyond_domain_web_links; wls = Spider.GetSpiderableLinks(ags.BaseUri, wls, out beyond_domain_web_links); bool download = true; if (depth2 >= BotWeb.Settings.Spider.MaxDownloadLinkDepth) { download = false; } foreach (WebLink wl in wls) { Add(queue2, new Link(url: wl.Url, depth: depth2, download: download)); page_count++; if (BotWeb.Settings.Spider.MaxPageCountPerSite > -1 && BotWeb.Settings.Spider.MaxPageCountPerSite <= page_count) { Log.Warning(domain + " reached MaxPageCountPerSite: " + BotWeb.Settings.Spider.MaxPageCountPerSite.ToString()); break; } } ((CustomSession)Session).domain2page_count[domain] = page_count; foreach (WebLink wl in beyond_domain_web_links) { Add(queue2, new Link(url: wl.Url, depth: depth2, download: false));//by default queue name is item type name but it can be deifferent if needed } }
public void get_links(int depth2) { if (depth2 > Bot.Properties.Spider.Default.MaxDownloadLinkDepth) { return; } string domain = Spider.GetDomain(hr.ResponseUrl); int page_count = domain2page_count[domain]; if (Bot.Properties.Spider.Default.MaxPageCountPerSite > -1 && page_count >= Bot.Properties.Spider.Default.MaxPageCountPerSite) { return; } AgileSpider ags = new AgileSpider(hr.ResponseUrl, hr.HtmlResult); List <WebLink> wls = ags.GetWebLinks(WebLinkType.Anchor | WebLinkType.Area | WebLinkType.Form | WebLinkType.MetaTag | WebLinkType.Frame | WebLinkType.Image | WebLinkType.Javascript); List <WebLink> beyond_domain_web_links; wls = Spider.GetSpiderableLinks(ags.BaseUri, wls, out beyond_domain_web_links); bool download = true; if (depth2 >= Bot.Properties.Spider.Default.MaxDownloadLinkDepth) { download = false; } foreach (WebLink wl in wls) { BotCycle.Add(new Link(url: wl.Url, depth: depth2, download: download)); page_count++; if (Bot.Properties.Spider.Default.MaxPageCountPerSite > -1 && Bot.Properties.Spider.Default.MaxPageCountPerSite <= page_count) { Log.Warning(domain + " reached MaxPageCountPerSite: " + Bot.Properties.Spider.Default.MaxPageCountPerSite.ToString()); break; } } domain2page_count[domain] = page_count; foreach (WebLink wl in beyond_domain_web_links) { BotCycle.Add(new Link(url: wl.Url, depth: depth2, download: false)); } }