コード例 #1
0
        void Crawl(Request Req, int Depth, bool Scraped)
        {
            if (Stopped)
            {
                return;
            }
            if (Depth > MaxDepth)
            {
                return;
            }
            if (WasCrawled(Req))
            {
                return;
            }
            if (!CanCrawl(Req))
            {
                return;
            }

            lock (PageSignatures)
            {
                PageSignatures.Add(GetPageSignature(Req));
            }

            Req.Source = RequestSource.Probe;
            Req.SetCookie(Cookies);
            if (UserAgent.Length > 0)
            {
                Req.Headers.Set("User-Agent", UserAgent);
            }
            if (SpecialHeader[0] != null)
            {
                Req.Headers.Set(SpecialHeader[0], SpecialHeader[1]);
            }
            if (Stopped)
            {
                return;
            }
            Response Res = Req.Send();

            if (Stopped)
            {
                return;
            }
            Cookies.Add(Req, Res);
            bool Is404File = IsA404(Req, Res);

            if (!Res.IsHtml)
            {
                return;
            }

            if (Depth + 1 > MaxDepth)
            {
                return;
            }
            List <Request> Redirects = GetRedirects(Req, Res);

            foreach (Request Redirect in Redirects)
            {
                AddToCrawlQueue(Redirect, Depth + 1, true);
            }
            List <Request> LinkClicks = GetLinkClicks(Req, Res);

            foreach (Request LinkClick in LinkClicks)
            {
                AddToCrawlQueue(LinkClick, Depth + 1, true);
            }

            List <Request> FormSubmissions = GetFormSubmissions(Req, Res);

            foreach (Request FormSubmission in FormSubmissions)
            {
                AddToCrawlQueue(FormSubmission, Depth + 1, true);
            }

            Request DirCheck = Req.GetClone();

            DirCheck.Method = "GET";
            DirCheck.Body.RemoveAll();
            DirCheck.Url = DirCheck.UrlDir;

            if (!Req.Url.EndsWith("/"))
            {
                AddToCrawlQueue(DirCheck, Depth + 1, false);
            }

            if (PerformDirAndFileGuessing && !Is404File)
            {
                foreach (string File in FileNamesToCheck)
                {
                    Request FileCheck = DirCheck.GetClone();
                    FileCheck.Url = FileCheck.Url + File;
                    AddToCrawlQueue(FileCheck, Depth + 1, false);
                }

                foreach (string Dir in DirNamesToCheck)
                {
                    Request DirectoryCheck = DirCheck.GetClone();
                    DirectoryCheck.Url = DirectoryCheck.Url + Dir + "/";
                    AddToCrawlQueue(DirectoryCheck, Depth + 1, false);
                }
            }
            if (Stopped)
            {
                return;
            }
            if (Scraped || !Is404File)
            {
                lock (CrawledRequests)
                {
                    CrawledRequests.Enqueue(Req);
                }
                IronUpdater.AddToSiteMap(Req);
            }
        }
コード例 #2
0
ファイル: CookieStore.cs プロジェクト: war-and-code/IronWASP
 public static void AddToStore(Request Req, Response Res)
 {
     StaticCookieStore.Add(Req, Res);
 }