Ejemplo n.º 1
0
        public void ExtractImageUrls_Should_Return_Collection_Of_Image_Src_Content()
        {
            // Arrange
            var mock = new Mock<IHtmlDownloader>();
            mock.Setup(h => h.DownloadHtml(It.IsAny<string>()))
                .Returns(
                    "<html>" + "<img src=\"nakov.png\"/>" + "<span>Hello</span>"
                    + "<img src=\"courses/inner/background.jpeg\"/>" + "</html>");

            //to throw exception -> 
            //mock.Setup(h => h.DownloadHtml(It.Is((string url) => url == null)))
            //             .Throws(new ArgumentNullException());

            //var fakeHtmlDownloader = new FakeHtmlDownloader();
            var crawler = new Crawler(mock.Object);

            var expectedImageUrls = new[]
            {
                // What to expect?
                "nakov.png",
                "courses/inner/background.jpeg"
            };

            // Act
            var imageUrls = crawler.ExtractImageUrls(string.Empty)
                .ToList();

            
            // Assert
            CollectionAssert.AreEqual(expectedImageUrls, imageUrls);
        }
Ejemplo n.º 2
0
        static void Main()
        {
            var crawler = new Crawler();
            var urls = crawler.ExtractImageUrls("http://clubz.bg/");

            int count = 0;
            foreach (var url in urls)
            {
                Console.WriteLine("{0, -3}: {1}", count, url);
                count++;
            }
        }
Ejemplo n.º 3
0
 private static void Main()
 {
     crawler = new Crawler();
     while (true)
     {
         string url = Console.ReadLine();
         if (url == String.Empty)
         {
             throw new AggregateException("URL cannot be empty");
         }
         RunCrawl(url);
     }
 }
        private static void RunCrawler(StandardKernel kernel)
        {
            var htmlProvider = kernel.Get<IHtmlProvider>();
            var crawler = new Crawler(htmlProvider);
            var urls = crawler.ExtractImageUrls("http://dariknews.bg/");

            int count = 0;
            foreach (var url in urls)
            {
                Console.WriteLine("{0, -3}: {1}", count, url);
                count++;
            }
        }
Ejemplo n.º 5
0
        public void ExtractImageUrls_Should_Return_Collection_Of_Image_Src_Content()
        {
            // Arrange
            var crawler = new Crawler();

            var expectedImageUrls = new[]
            {
                // What to expect?
                "nakov.png",
                "courses/inner/background.jpeg"
            };

            // Act
            var imageUrls = crawler.ExtractImageUrls(string.Empty)
                .ToList();

            
            // Assert
            CollectionAssert.AreEqual(expectedImageUrls, imageUrls);
        }
        //执行函数
        public void Do()
        {
            //记录一下任务开始时的时间
            DateTime dt = DateTime.Now;
            //确定文件名
            string fileName = _lat + "," + _lon + ".html";

            //修正了之前的错误,程序自己创建data文件夹,如果已存在则忽略
            DirectoryInfo dir = new DirectoryInfo("data\\");
            dir.Create();

            //如果已经存在这个文件了,说明抓过了,任务取消
            if ((new FileInfo("data\\" + fileName)).Exists)
            {
                Console.WriteLine("已存在文件:" + fileName);
                return;
            }

            //记录一下为抓这个数据尝试了几次
            int count = 1;
            //确认网址链接,这里没什么门道,浏览NASA网页时看浏览器地址,找规律
            string link = "https://eosweb.larc.nasa.gov/cgi-bin/sse/grid.cgi?&num=182092&lat=" + _lat + "&hgt=100&submit=Submit&veg=17&sitelev=&email=&p=grid_id&step=2&lon=" + _lon;
            //新建抓取者实例
            Crawler crawler = new Crawler(link, "data\\" + fileName);

            //不停地循环尝试抓取,直到成功
            while (!(crawler.Crawl()))
            {
                ++count;
                Console.WriteLine("第" + count + "次尝试抓取" + fileName);
            }

            //计算总耗时并显示
            TimeSpan ts = DateTime.Now - dt;
            Console.WriteLine("耗时" + ts + ",尝试" + count + "次");
        }