예제 #1
0
        public static void ScrapeUrls(Request Req, Response Res)
        {
            List <string> LocalScrapedUrls = new List <string>();

            if (Res.IsHtml)
            {
                foreach (string Comment in Res.Html.Comments)
                {
                    LocalScrapedUrls.AddRange(ScrapeUrls(Req, Comment));
                }
                foreach (string Script in Res.Html.GetJavaScript())
                {
                    LocalScrapedUrls.AddRange(ScrapeUrls(Req, Script));
                }
            }
            else if (Res.IsJson)
            {
                FormatParameters JsonParams = FormatPlugin.GetJsonParameters(Res);
                for (int i = 0; i < JsonParams.Count; i++)
                {
                    LocalScrapedUrls.AddRange(ScrapeUrls(Req, JsonParams.GetValue(i)));
                }
            }
            else if (Res.IsJavaScript)
            {
                LocalScrapedUrls.AddRange(ScrapeUrls(Req, Res.BodyString));
            }
            if (Res.IsRedirect)
            {
                try
                {
                    HTML ResHtml = new HTML(Res.BodyString);
                    foreach (string Link in ResHtml.Links)
                    {
                        string FullUrl = Req.RelativeUrlToAbsoluteUrl(Link);
                        if (!FullUrl.Equals(Req.FullUrl))
                        {
                            if (!LocalScrapedUrls.Contains(FullUrl))
                            {
                                LocalScrapedUrls.Add(FullUrl);
                            }
                        }
                    }
                }
                catch { }
            }
            lock (ScrapedUrls)
            {
                ScrapedUrls.AddRange(LocalScrapedUrls);
            }
        }
예제 #2
0
        public static List <string> ScrapeUrls(Request Req, string Text)
        {
            List <string> Urls = new List <string>();

            foreach (string UrlStartMarker in UrlsStartMarkers)
            {
                int Pointer = 0;
                while (Pointer < Text.Length)
                {
                    string Quote    = "";
                    string UrlValue = "";

                    int UrlStartIndex = Text.IndexOf(UrlStartMarker, Pointer);
                    if (UrlStartIndex > -1)
                    {
                        Quote    = GetStartQuote(Text, UrlStartIndex);
                        UrlValue = ReadTillEndOfUrl(Quote, Text, UrlStartIndex);
                        try
                        {
                            string FullUrl = Req.RelativeUrlToAbsoluteUrl(UrlValue);
                            if (!Tools.HasInvalidUrlCharacters(FullUrl) && !FullUrl.Equals(Req.FullUrl))
                            {
                                Request TestReq = new Request(FullUrl);
                                if (!Urls.Contains(FullUrl))
                                {
                                    Urls.Add(FullUrl);
                                }
                                Pointer = UrlStartIndex + UrlValue.Length;
                                continue;
                            }
                        }
                        catch
                        {}
                    }
                    else
                    {
                        break;
                    }
                    Pointer = Pointer + UrlStartMarker.Length;
                }
            }

            foreach (string FileExt in UrlExtensionsToScrape)
            {
                int Pointer = 0;
                while (Pointer < Text.Length)
                {
                    int ExtensionStartIndex = Text.IndexOf(FileExt, Pointer);
                    if (ExtensionStartIndex > -1)
                    {
                        string UrlStartPart = ReadTillStartOfUrl(Text, ExtensionStartIndex - 1);
                        string Quote        = GetStartQuote(Text, ExtensionStartIndex - UrlStartPart.Length);
                        string UrlEndPart   = ReadTillEndOfUrl(Quote, Text, ExtensionStartIndex);
                        string Url          = string.Concat(UrlStartPart, UrlEndPart);
                        try
                        {
                            string FullUrl = Req.RelativeUrlToAbsoluteUrl(Url);
                            if (!Tools.HasInvalidUrlCharacters(FullUrl) && !FullUrl.Equals(Req.FullUrl))
                            {
                                Request TempReq = new Request(FullUrl);
                                if (!Urls.Contains(FullUrl))
                                {
                                    Urls.Add(FullUrl);
                                }
                                Pointer = ExtensionStartIndex + UrlEndPart.Length;
                                continue;
                            }
                        }
                        catch { }
                    }
                    else
                    {
                        break;
                    }
                    Pointer = Pointer + FileExt.Length;
                }
            }
            return(Urls);
        }