public void ExtractImageUrls_Should_Return_Collection_Of_Image_Src_Content() { // Arrange var mock = new Mock<IHtmlDownloader>(); mock.Setup(h => h.DownloadHtml(It.IsAny<string>())) .Returns( "<html>" + "<img src=\"nakov.png\"/>" + "<span>Hello</span>" + "<img src=\"courses/inner/background.jpeg\"/>" + "</html>"); //to throw exception -> //mock.Setup(h => h.DownloadHtml(It.Is((string url) => url == null))) // .Throws(new ArgumentNullException()); //var fakeHtmlDownloader = new FakeHtmlDownloader(); var crawler = new Crawler(mock.Object); var expectedImageUrls = new[] { // What to expect? "nakov.png", "courses/inner/background.jpeg" }; // Act var imageUrls = crawler.ExtractImageUrls(string.Empty) .ToList(); // Assert CollectionAssert.AreEqual(expectedImageUrls, imageUrls); }
static void Main() { var crawler = new Crawler(); var urls = crawler.ExtractImageUrls("http://clubz.bg/"); int count = 0; foreach (var url in urls) { Console.WriteLine("{0, -3}: {1}", count, url); count++; } }
private static void Main() { crawler = new Crawler(); while (true) { string url = Console.ReadLine(); if (url == String.Empty) { throw new AggregateException("URL cannot be empty"); } RunCrawl(url); } }
private static void RunCrawler(StandardKernel kernel) { var htmlProvider = kernel.Get<IHtmlProvider>(); var crawler = new Crawler(htmlProvider); var urls = crawler.ExtractImageUrls("http://dariknews.bg/"); int count = 0; foreach (var url in urls) { Console.WriteLine("{0, -3}: {1}", count, url); count++; } }
public void ExtractImageUrls_Should_Return_Collection_Of_Image_Src_Content() { // Arrange var crawler = new Crawler(); var expectedImageUrls = new[] { // What to expect? "nakov.png", "courses/inner/background.jpeg" }; // Act var imageUrls = crawler.ExtractImageUrls(string.Empty) .ToList(); // Assert CollectionAssert.AreEqual(expectedImageUrls, imageUrls); }
//执行函数 public void Do() { //记录一下任务开始时的时间 DateTime dt = DateTime.Now; //确定文件名 string fileName = _lat + "," + _lon + ".html"; //修正了之前的错误,程序自己创建data文件夹,如果已存在则忽略 DirectoryInfo dir = new DirectoryInfo("data\\"); dir.Create(); //如果已经存在这个文件了,说明抓过了,任务取消 if ((new FileInfo("data\\" + fileName)).Exists) { Console.WriteLine("已存在文件:" + fileName); return; } //记录一下为抓这个数据尝试了几次 int count = 1; //确认网址链接,这里没什么门道,浏览NASA网页时看浏览器地址,找规律 string link = "https://eosweb.larc.nasa.gov/cgi-bin/sse/grid.cgi?&num=182092&lat=" + _lat + "&hgt=100&submit=Submit&veg=17&sitelev=&email=&p=grid_id&step=2&lon=" + _lon; //新建抓取者实例 Crawler crawler = new Crawler(link, "data\\" + fileName); //不停地循环尝试抓取,直到成功 while (!(crawler.Crawl())) { ++count; Console.WriteLine("第" + count + "次尝试抓取" + fileName); } //计算总耗时并显示 TimeSpan ts = DateTime.Now - dt; Console.WriteLine("耗时" + ts + ",尝试" + count + "次"); }