Exemple #1
0
        private static bool CheckRequest(Request request)
        {
            RequestUtility.GetWebText(request);

            // if (request.Response.Error == true)
            if (!String.IsNullOrEmpty(request.Response.ErrorMessage))
            {
                return(false);
                //MessageBox.Show(request.Response.ErrorMessage);
            }

            if (request.Response.Code.Equals("200"))
            {
                AppContext.Found.Add(request.Url);
                if (!AppContext.PortsFound.ContainsKey(request.Url))
                {
                    AppContext.PortsFound.Add(request.Url, new List <int>());//threw an object not set 6/24
                }
                _responseHandler.Invoke(request);

                return(true);
                //AppContext.FoundSocialURLs404.Add(foundUrl.Url + " @ " + url);
                //_lstFound.Items.Add(request.Url);
            }

            return(false);
        }
Exemple #2
0
        private static void CrawlPage(IRequest request, int step)
        {
            // if (request.Url.Trim('/').Split('/').Count() - 2 > CrawlerContext.Depth)
            if (step > CrawlerContext.Depth)
            {
                return;
            }

            if (CrawlerContext.SinglePage && step > 2)
            {
                return;
            }

            Uri    tempUri    = new Uri(request.Url);
            string tempDomain = DomainUtility.GetDomainFromUrl(tempUri);

            if (CrawlerContext.IgnoreDirectory.Count != 0 && IgnoreDirectory(request.Url, tempDomain))
            {
                return;
            }

            if (!PageHasBeenCrawled(request))
            {
                CrawlerContext.Pages.Add(request);

                RequestUtility.GetWebText(request);

                _pageCounter.Invoke();
                if (request.Response.Error)
                {
                    //log
                    return;
                }

                _responseHandler.Invoke(request);

                LinkParser linkParser = new LinkParser();

                linkParser.ParseLinksAgility(request.Response.Body, request.Url);

                if (CrawlerContext.LightMode)
                {
                    request.Response.Body = "";
                }

                foreach (IRequest link in linkParser.GoodUrls)
                {
                    CrawlerContext.PauseEvent.WaitOne(Timeout.Infinite);
                    try
                    {
                        CrawlPage(link, step + 1);
                    }
                    catch
                    {
                        // _failedUrls.Add(link + " (on page at url " + url + ") - " + exc.Message);
                    }
                }
                CrawlerContext.ExhaustedURL.Add(request.Url);
            }
        }
Exemple #3
0
        public static Dictionary <string, string> Find(string body, string url, List <string> userNames, List <DomainData> socialDomains, bool returnOnlyNone200 = true)
        {
            Dictionary <string, string> foundUrls = new Dictionary <string, string>();
            LinkParser parser = new LinkParser();

            parser.ParseLinksAgility(body, url, true);

            foreach (Request foundUrl in parser.GoodUrls)
            {
                string foundURL = DomainUtility.StripProtocol(foundUrl.Url.Split('?')[0]);

                if (SocialDomainUtility.CheckIfSocialMediaSite(foundURL, socialDomains))
                {
                    if (userNames.Count == 0)
                    {
                        if (!foundUrls.ContainsKey(foundURL))
                        {
                            Request request = new Request(DomainUtility.EnsureHTTPS(foundURL));
                            RequestUtility.GetWebText(request);
                            if (!request.Response.Code.Equals("200") || request.Url.Contains("buymethat"))
                            {
                                foundUrls.Add(foundURL, url);
                            }
                            else if (!returnOnlyNone200)
                            {
                                foundUrls.Add(foundURL, url);
                            }
                        }
                    }
                    else
                    {
                        foreach (string userName in userNames)
                        {
                            if (foundURL.ToLower().Contains(userName.ToLower()))
                            {
                                if (!foundUrls.ContainsKey(foundURL))
                                {
                                    if (Ignore.Contains(foundURL.ToLower()))
                                    {
                                        continue;
                                    }

                                    Request request = new Request(DomainUtility.EnsureHTTPS(foundURL));
                                    RequestUtility.GetWebText(request);
                                    if (!request.Response.Code.Equals("200") || request.Url.Contains("buymethat"))
                                    {
                                        foundUrls.Add(foundURL, url);
                                    }
                                    else if (!returnOnlyNone200)
                                    {
                                        foundUrls.Add(foundURL, url);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            return(foundUrls);
        }