public void GetPageCrawlerDetailTest() { string page = string.Empty; bo.Web.WebPageContent content = new bo.Web.WebPageContent(); string fileName = "tyre-shopper.co.uk.home.html"; string path = GetUnitTestDataFilePath(fileName); page = GetFileContents(fileName); Uri uri = new Uri(path); bo.Web.PageCrawlDetail pageCrawlDetail = new bo.Web.PageCrawlDetail(uri); pageCrawlDetail.PageUri = new Uri("http://www.tyre-shopper.co.uk"); pageCrawlDetail.LoadContent(page, HttpStatusCode.OK); pageCrawlDetail.LoadUris(); Assert.IsTrue(pageCrawlDetail.AllLinks.Count == 37); content = new bo.Web.WebPageContent(); fileName = "tyre-shopper.co.uk.aboutUs.html"; path = GetUnitTestDataFilePath(fileName); page = GetFileContents(fileName); uri = new Uri(path); pageCrawlDetail = new bo.Web.PageCrawlDetail(uri); pageCrawlDetail.PageUri = new Uri("http://www.tyre-shopper.co.uk/about-us/"); pageCrawlDetail.LoadContent(page, HttpStatusCode.OK); pageCrawlDetail.LoadUris(); Assert.IsTrue(pageCrawlDetail.AllLinks.Count == 22); }
public void InvalidHttpResponse() { Moq.Mock <HttpWebResponse> fakeResponse = new Moq.Mock <HttpWebResponse>(); fakeResponse.Setup(response => response.StatusCode).Returns(HttpStatusCode.NotFound); Moq.Mock <HttpWebRequest> fakeRequest = new Moq.Mock <HttpWebRequest>(); fakeRequest.Setup(request => request.GetResponse()).Returns(fakeResponse.Object); bo.Web.WebPageContent content = new bo.Web.WebPageContent(); content.request = fakeRequest.Object; Assert.IsFalse(content.TryGetResponse()); }
private static void ProcessUrl(string url) { content = new bo.Web.WebPageContent(); content.SetRequestFromURL(url); if (bo.Helpers.WebHelper.IsUrlValid(url) && content.TryGetResponse()) { // Now we can get the robots file Uri baseUri = new Uri(url); Uri robotsUri = new Uri(bo.Helpers.WebHelper.CombineUrl(baseUri, "/robots.txt")); content = new bo.Web.WebPageContent(); content.SetRequestFromURL(robotsUri.AbsoluteUri); string responseString = string.Empty; if (content.TryGetResponse()) { responseString = bo.Helpers.WebHelper.ResponseToString(content.response); } bo.Web.RobotsTxt robots = bo.Helpers.RobotsHelper.ParseRobotsTxt(responseString); bo.Web.PageCrawlDetail pageDetails = new bo.Web.PageCrawlDetail(baseUri); try { //Start the Crawling bo.Crawler.CrawlerProcessing crw = new bo.Crawler.CrawlerProcessing(NoOfThreads, SleepTime, baseUri, robots); crw.Crawl(pageDetails).Wait(); bo.Helpers.FileHelper.PageDetailToCSV(crw.CrawlList.UrlsCompleted, outputFilePath); } catch (AggregateException e) { foreach (var ex in e.InnerExceptions) { Console.WriteLine(ex.InnerException); } Console.ReadLine(); } } else { string info = "Invalid URL: " + url; Console.WriteLine(url); log.Info(url); } }
public void ValidHttpResponse() { Moq.Mock <HttpWebResponse> fakeResponse = new Moq.Mock <HttpWebResponse>(); fakeResponse.Setup(response => response.StatusCode).Returns(HttpStatusCode.OK); Moq.Mock <HttpWebRequest> fakeRequest = new Moq.Mock <HttpWebRequest>(); fakeRequest.Setup(request => request.GetResponse()).Returns(fakeResponse.Object); bo.Web.WebPageContent content = new bo.Web.WebPageContent(); content.request = fakeRequest.Object; Assert.IsTrue(content.TryGetResponse()); content = new bo.Web.WebPageContent(); //content.SetRequestFromURL("http://www.tyre-shopper.co.uk/robots.txt content.SetRequestFromURL("http://localhost:4174/"); Assert.IsTrue(content.TryGetResponse()); }