Beispiel #1
0
        private void RecursiveScan(HttpUrl httpUrl, IList <string> targetHosts, ScanResult scanResult)
        {
            if (targetHosts.Any(x => httpUrl.Host.EndsWith(x)) == false)
            {
                return;
            }

            lock (this.httpUrlLock)
            {
                var storedHttpUrl = scanResult.GetHttpUrls().SingleOrDefault(x => x.Equals(httpUrl));

                if (storedHttpUrl != null)
                {
                    this.AddQueryString(httpUrl, storedHttpUrl.QueryString);
                    return;
                }

                scanResult.AddHttpUrl(httpUrl);
            }

            Console.WriteLine($"Scanning URL {httpUrl.FullUrl}");

            var node = this.GetDocumentNode(httpUrl);

            if (node.OuterLength <= 0)
            {
                return;
            }

            var content = node.OuterHtml;
            var hrefs   = node.GetHrefs();

            scanResult.AddComments(httpUrl, node.GetComments());

            Parallel.ForEach(hrefs, href =>
            {
                var hrefType = UrlParser.GetHrefType(href);

                switch (hrefType)
                {
                case HrefType.Anchor:
                    break;

                case HrefType.BrowserUrl:
                    scanResult.AddBrowserUrl(httpUrl, href);
                    break;

                case HrefType.DataUrl:
                    scanResult.AddDataUrl(httpUrl, href);
                    break;

                case HrefType.FtpUrl:
                    var host     = UrlParser.GetHostFromUrl(href);
                    var path     = UrlParser.GetPathFromUrl(href);
                    var fileName = UrlParser.GetFileNameFromUrl(href);
                    scanResult.AddFtpUrl(httpUrl, new FtpUrl(host, path, fileName));
                    break;

                case HrefType.Javascript:
                    scanResult.AddJavascriptUrl(httpUrl, href);
                    break;

                case HrefType.MailAddress:
                    scanResult.AddMailAddress(httpUrl, href);
                    break;

                case HrefType.UnknownUrl:
                    scanResult.AddUnkownUrl(httpUrl, href);
                    break;
                }

                if (hrefType == HrefType.FullUrl)
                {
                    var url = UrlParser.GetHttpUrl(href);
                    this.RecursiveScan(url, targetHosts, scanResult);
                }
                else if (hrefType == HrefType.RelativePath)
                {
                    var url = UrlParser.GetHttpUrl(httpUrl, href);
                    this.RecursiveScan(url, targetHosts, scanResult);
                }
            });
        }