Example #1
0
        public void Crawl()
        {
            this.HttpRequestResults = new List <HttpRequestResult>();
            foreach (Uri seed in Seeds)
            {
                HttpRequestResult result = BrowserToTest.Get(seed);
                result.Parse();
                HttpRequestResults.Add(result);
            }
            for (int i = 0; i < HttpRequestResults.Count && HttpRequestResults.Count < RecursionLimit; i++)
            {
                if (HttpRequestResults[i].Links != null && GetSetOfCrawlableHosts().Contains(HttpRequestResults[i].ResultUrl.Host))
                {
                    foreach (Link link in HttpRequestResults[i].Links)
                    {
                        if (link.Ex == null)
                        {
                            bool alreadyRequested = (from result in HttpRequestResults
                                                     where result.Equals(link)
                                                     select result).Count() != 0;
                            if (!alreadyRequested && HttpRequestResults.Count < RecursionLimit)
                            {
                                var result = BrowserToTest.Get(link.AbsoluteUri);
                                result.Parse();
                                HttpRequestResults.Add(result);
                            }
                        }
                    }
                }
            }

            foreach (var result in HttpRequestResults)
            {
                if (result.Links != null)
                {
                    foreach (var link in result.Links)
                    {
                        foreach (var result2 in HttpRequestResults)
                        {
                            if (result2.Equals(link) &&
                                (result.ResultUrl == null && GetSetOfCrawlableHosts().Contains(result.RequestUrl.Host.ToString()) ||
                                 result.ResultUrl != null && GetSetOfCrawlableHosts().Contains(result.ResultUrl.Host.ToString())))
                            {
                                link.WasRetrieved = true;
                                link.IsBroken     = result2.Error != null;
                                break;
                            }
                        }
                    }
                }
            }
        }
Example #2
0
 public HtmlParser(HttpRequestResult HttpRequestResult)
 {
     this.HttpRequestResult = HttpRequestResult;
 }
 /// <summary>
 /// Will be used to link the set of HttpRequestResults and the Links
 /// for each HttpRequestResult to generate information on which pages
 /// contain links that are broken.
 /// </summary>
 /// <param name="obj"></param>
 /// <returns></returns>
 public bool Equals(HttpRequestResult obj)
 {
     return(AbsoluteUri != null && AbsoluteUri.Equals(obj.RequestUrl));
 }
Example #4
0
        public HttpRequestResult Get(Uri url)
        {
            HttpRequestResult results = (from httpRequestResult in HttpRequestResults
                                         where httpRequestResult.RequestUrl.Equals(url)
                                         select httpRequestResult).FirstOrDefault();

            if (results == null)
            {
                results             = new HttpRequestResult();
                results.RequestUrl  = url;
                results.Start       = DateTime.Now;
                results.BrowserUsed = this;

                StreamReader streamReader = null;
                WebResponse  response     = null;
                try
                {
                    var request = (HttpWebRequest)WebRequest.Create(url);
                    request.MaximumAutomaticRedirections = MaximumAutomaticRedirections;
                    request.AllowAutoRedirect            = AllowAutoRedirect;
                    request.UserAgent = UserAgent;
                    request.Accept    = Accept;
                    request.Headers.Add("Accept-Charset", AcceptCharset);
                    request.Headers.Add("Accept-Language", AcceptLanguage);
                    request.Credentials = this.Credentials;

                    response     = request.GetResponse();
                    streamReader = new StreamReader(response.GetResponseStream());

                    string content = streamReader.ReadToEnd();

                    results.ContentType = response.ContentType;
                    if (results.IsCss || results.IsHtml)
                    {
                        results.Content = content;
                    }
                    results.ResultUrl = request.Address;

                    HttpRequestResults.Add(results);
                }
                catch (WebException exception)
                {
                    var error = new HttpValidationError()
                    {
                        AbsoluteUri = url,
                        Error       = exception,
                        Message     = exception.Message
                    };

                    if (exception.Status == WebExceptionStatus.ProtocolError)
                    {
                        error.HttpCode = (int)((HttpWebResponse)exception.Response).StatusCode;
                    }

                    results.Error = error;
                }
                catch (Exception exception)
                {
                    results.Error = new HttpValidationError()
                    {
                        AbsoluteUri = url,
                        Error       = exception,
                        Message     = exception.Message
                    };
                }
                finally
                {
                    results.End = DateTime.Now;
                    if (streamReader != null)
                    {
                        try { streamReader.Close(); }
                        catch { }
                    }
                    if (response != null)
                    {
                        try { response.Close(); }
                        catch { }
                    }
                }
            }

            return(results);
        }
Example #5
0
 public CssParser(HttpRequestResult HttpRequestResult)
 {
     this.HttpRequestResult = HttpRequestResult;
 }