private static bool CheckRequest(Request request) { RequestUtility.GetWebText(request); // if (request.Response.Error == true) if (!String.IsNullOrEmpty(request.Response.ErrorMessage)) { return(false); //MessageBox.Show(request.Response.ErrorMessage); } if (request.Response.Code.Equals("200")) { AppContext.Found.Add(request.Url); if (!AppContext.PortsFound.ContainsKey(request.Url)) { AppContext.PortsFound.Add(request.Url, new List <int>());//threw an object not set 6/24 } _responseHandler.Invoke(request); return(true); //AppContext.FoundSocialURLs404.Add(foundUrl.Url + " @ " + url); //_lstFound.Items.Add(request.Url); } return(false); }
private static void CrawlPage(IRequest request, int step) { // if (request.Url.Trim('/').Split('/').Count() - 2 > CrawlerContext.Depth) if (step > CrawlerContext.Depth) { return; } if (CrawlerContext.SinglePage && step > 2) { return; } Uri tempUri = new Uri(request.Url); string tempDomain = DomainUtility.GetDomainFromUrl(tempUri); if (CrawlerContext.IgnoreDirectory.Count != 0 && IgnoreDirectory(request.Url, tempDomain)) { return; } if (!PageHasBeenCrawled(request)) { CrawlerContext.Pages.Add(request); RequestUtility.GetWebText(request); _pageCounter.Invoke(); if (request.Response.Error) { //log return; } _responseHandler.Invoke(request); LinkParser linkParser = new LinkParser(); linkParser.ParseLinksAgility(request.Response.Body, request.Url); if (CrawlerContext.LightMode) { request.Response.Body = ""; } foreach (IRequest link in linkParser.GoodUrls) { CrawlerContext.PauseEvent.WaitOne(Timeout.Infinite); try { CrawlPage(link, step + 1); } catch { // _failedUrls.Add(link + " (on page at url " + url + ") - " + exc.Message); } } CrawlerContext.ExhaustedURL.Add(request.Url); } }
public static Dictionary <string, string> Find(string body, string url, List <string> userNames, List <DomainData> socialDomains, bool returnOnlyNone200 = true) { Dictionary <string, string> foundUrls = new Dictionary <string, string>(); LinkParser parser = new LinkParser(); parser.ParseLinksAgility(body, url, true); foreach (Request foundUrl in parser.GoodUrls) { string foundURL = DomainUtility.StripProtocol(foundUrl.Url.Split('?')[0]); if (SocialDomainUtility.CheckIfSocialMediaSite(foundURL, socialDomains)) { if (userNames.Count == 0) { if (!foundUrls.ContainsKey(foundURL)) { Request request = new Request(DomainUtility.EnsureHTTPS(foundURL)); RequestUtility.GetWebText(request); if (!request.Response.Code.Equals("200") || request.Url.Contains("buymethat")) { foundUrls.Add(foundURL, url); } else if (!returnOnlyNone200) { foundUrls.Add(foundURL, url); } } } else { foreach (string userName in userNames) { if (foundURL.ToLower().Contains(userName.ToLower())) { if (!foundUrls.ContainsKey(foundURL)) { if (Ignore.Contains(foundURL.ToLower())) { continue; } Request request = new Request(DomainUtility.EnsureHTTPS(foundURL)); RequestUtility.GetWebText(request); if (!request.Response.Code.Equals("200") || request.Url.Contains("buymethat")) { foundUrls.Add(foundURL, url); } else if (!returnOnlyNone200) { foundUrls.Add(foundURL, url); } } } } } } } return(foundUrls); }