Esempio n. 1
0
        private static void ProcessUrl(string url)
        {
            content = new bo.Web.WebPageContent();
            content.SetRequestFromURL(url);

            if (bo.Helpers.WebHelper.IsUrlValid(url) && content.TryGetResponse())
            {
                // Now we can get the robots file
                Uri baseUri   = new Uri(url);
                Uri robotsUri = new Uri(bo.Helpers.WebHelper.CombineUrl(baseUri, "/robots.txt"));
                content = new bo.Web.WebPageContent();
                content.SetRequestFromURL(robotsUri.AbsoluteUri);
                string responseString = string.Empty;
                if (content.TryGetResponse())
                {
                    responseString = bo.Helpers.WebHelper.ResponseToString(content.response);
                }

                bo.Web.RobotsTxt robots = bo.Helpers.RobotsHelper.ParseRobotsTxt(responseString);

                bo.Web.PageCrawlDetail pageDetails = new bo.Web.PageCrawlDetail(baseUri);


                try
                {
                    //Start the Crawling
                    bo.Crawler.CrawlerProcessing crw = new bo.Crawler.CrawlerProcessing(NoOfThreads, SleepTime, baseUri, robots);
                    crw.Crawl(pageDetails).Wait();
                    bo.Helpers.FileHelper.PageDetailToCSV(crw.CrawlList.UrlsCompleted, outputFilePath);
                }
                catch (AggregateException e)
                {
                    foreach (var ex in e.InnerExceptions)
                    {
                        Console.WriteLine(ex.InnerException);
                    }
                    Console.ReadLine();
                }
            }
            else
            {
                string info = "Invalid URL: " + url;
                Console.WriteLine(url);
                log.Info(url);
            }
        }
Esempio n. 2
0
        public void ValidHttpResponse()
        {
            Moq.Mock <HttpWebResponse> fakeResponse = new Moq.Mock <HttpWebResponse>();
            fakeResponse.Setup(response => response.StatusCode).Returns(HttpStatusCode.OK);
            Moq.Mock <HttpWebRequest> fakeRequest = new Moq.Mock <HttpWebRequest>();
            fakeRequest.Setup(request => request.GetResponse()).Returns(fakeResponse.Object);

            bo.Web.WebPageContent content = new bo.Web.WebPageContent();

            content.request = fakeRequest.Object;
            Assert.IsTrue(content.TryGetResponse());

            content = new bo.Web.WebPageContent();
            //content.SetRequestFromURL("http://www.tyre-shopper.co.uk/robots.txt
            content.SetRequestFromURL("http://localhost:4174/");

            Assert.IsTrue(content.TryGetResponse());
        }