private IList <HttpUrl> GetHttpUrls(HttpUrl sourceHttpUrl, IList <string> paths) { var httpUrls = new List <HttpUrl>(); foreach (var path in paths) { var hrefType = UrlParser.GetHrefType(path); if (hrefType == HrefType.FullUrl) { var httpUrl = UrlParser.GetHttpUrl(path); httpUrls.Add(httpUrl); } else if (hrefType == HrefType.RelativePath) { var httpUrl = UrlParser.GetHttpUrl(sourceHttpUrl, path); httpUrls.Add(httpUrl); } } return(httpUrls); }
public IList <HttpUrl> Find(HttpUrl sourcehttpUrl, List <string> errorStrings) { var httpUrls = new List <HttpUrl>(); foreach (var filePath in this.dictionaryFilePaths) { var file = new StreamReader(filePath); string relativeFilePath; while ((relativeFilePath = file.ReadLine()) != null) { var httpUrl = UrlParser.GetHttpUrl(sourcehttpUrl, "/" + relativeFilePath); var response = this.browser.LoadWebsite(httpUrl.FullUrl, errorStrings); if (response.IsSuccessStatusCode) { httpUrls.Add(httpUrl); } } } return(httpUrls); }
private void RecursiveScan(HttpUrl httpUrl, IList <string> targetHosts, ScanResult scanResult) { if (targetHosts.Any(x => httpUrl.Host.EndsWith(x)) == false) { return; } lock (this.httpUrlLock) { var storedHttpUrl = scanResult.GetHttpUrls().SingleOrDefault(x => x.Equals(httpUrl)); if (storedHttpUrl != null) { this.AddQueryString(httpUrl, storedHttpUrl.QueryString); return; } scanResult.AddHttpUrl(httpUrl); } Console.WriteLine($"Scanning URL {httpUrl.FullUrl}"); var node = this.GetDocumentNode(httpUrl); if (node.OuterLength <= 0) { return; } var content = node.OuterHtml; var hrefs = node.GetHrefs(); scanResult.AddComments(httpUrl, node.GetComments()); Parallel.ForEach(hrefs, href => { var hrefType = UrlParser.GetHrefType(href); switch (hrefType) { case HrefType.Anchor: break; case HrefType.BrowserUrl: scanResult.AddBrowserUrl(httpUrl, href); break; case HrefType.DataUrl: scanResult.AddDataUrl(httpUrl, href); break; case HrefType.FtpUrl: var host = UrlParser.GetHostFromUrl(href); var path = UrlParser.GetPathFromUrl(href); var fileName = UrlParser.GetFileNameFromUrl(href); scanResult.AddFtpUrl(httpUrl, new FtpUrl(host, path, fileName)); break; case HrefType.Javascript: scanResult.AddJavascriptUrl(httpUrl, href); break; case HrefType.MailAddress: scanResult.AddMailAddress(httpUrl, href); break; case HrefType.UnknownUrl: scanResult.AddUnkownUrl(httpUrl, href); break; } if (hrefType == HrefType.FullUrl) { var url = UrlParser.GetHttpUrl(href); this.RecursiveScan(url, targetHosts, scanResult); } else if (hrefType == HrefType.RelativePath) { var url = UrlParser.GetHttpUrl(httpUrl, href); this.RecursiveScan(url, targetHosts, scanResult); } }); }