public static void ScrapeUrls(Request Req, Response Res) { List <string> LocalScrapedUrls = new List <string>(); if (Res.IsHtml) { foreach (string Comment in Res.Html.Comments) { LocalScrapedUrls.AddRange(ScrapeUrls(Req, Comment)); } foreach (string Script in Res.Html.GetJavaScript()) { LocalScrapedUrls.AddRange(ScrapeUrls(Req, Script)); } } else if (Res.IsJson) { FormatParameters JsonParams = FormatPlugin.GetJsonParameters(Res); for (int i = 0; i < JsonParams.Count; i++) { LocalScrapedUrls.AddRange(ScrapeUrls(Req, JsonParams.GetValue(i))); } } else if (Res.IsJavaScript) { LocalScrapedUrls.AddRange(ScrapeUrls(Req, Res.BodyString)); } if (Res.IsRedirect) { try { HTML ResHtml = new HTML(Res.BodyString); foreach (string Link in ResHtml.Links) { string FullUrl = Req.RelativeUrlToAbsoluteUrl(Link); if (!FullUrl.Equals(Req.FullUrl)) { if (!LocalScrapedUrls.Contains(FullUrl)) { LocalScrapedUrls.Add(FullUrl); } } } } catch { } } lock (ScrapedUrls) { ScrapedUrls.AddRange(LocalScrapedUrls); } }
public static List <string> ScrapeUrls(Request Req, string Text) { List <string> Urls = new List <string>(); foreach (string UrlStartMarker in UrlsStartMarkers) { int Pointer = 0; while (Pointer < Text.Length) { string Quote = ""; string UrlValue = ""; int UrlStartIndex = Text.IndexOf(UrlStartMarker, Pointer); if (UrlStartIndex > -1) { Quote = GetStartQuote(Text, UrlStartIndex); UrlValue = ReadTillEndOfUrl(Quote, Text, UrlStartIndex); try { string FullUrl = Req.RelativeUrlToAbsoluteUrl(UrlValue); if (!Tools.HasInvalidUrlCharacters(FullUrl) && !FullUrl.Equals(Req.FullUrl)) { Request TestReq = new Request(FullUrl); if (!Urls.Contains(FullUrl)) { Urls.Add(FullUrl); } Pointer = UrlStartIndex + UrlValue.Length; continue; } } catch {} } else { break; } Pointer = Pointer + UrlStartMarker.Length; } } foreach (string FileExt in UrlExtensionsToScrape) { int Pointer = 0; while (Pointer < Text.Length) { int ExtensionStartIndex = Text.IndexOf(FileExt, Pointer); if (ExtensionStartIndex > -1) { string UrlStartPart = ReadTillStartOfUrl(Text, ExtensionStartIndex - 1); string Quote = GetStartQuote(Text, ExtensionStartIndex - UrlStartPart.Length); string UrlEndPart = ReadTillEndOfUrl(Quote, Text, ExtensionStartIndex); string Url = string.Concat(UrlStartPart, UrlEndPart); try { string FullUrl = Req.RelativeUrlToAbsoluteUrl(Url); if (!Tools.HasInvalidUrlCharacters(FullUrl) && !FullUrl.Equals(Req.FullUrl)) { Request TempReq = new Request(FullUrl); if (!Urls.Contains(FullUrl)) { Urls.Add(FullUrl); } Pointer = ExtensionStartIndex + UrlEndPart.Length; continue; } } catch { } } else { break; } Pointer = Pointer + FileExt.Length; } } return(Urls); }