Esempio n. 1
0
 public void initSeeds(string[] seeds)
 {
     foreach (var seed in seeds)
     {
         LinkQueue.addUnVisitedUrl(seed);
     }
 }
Esempio n. 2
0
        public void spyding(string[] seeds)
        {
            var filter = new Filter((url) =>
            {
                return(url.StartsWith("http://www.baidu.com"));
            });

            //初始化爬虫种子
            this.initSeeds(seeds);
            while ((!LinkQueue.getUnVisitedUrl().isEmpty()) && LinkQueue.visitedUrlNum < 1000)
            {
                string visitUrl = LinkQueue.dequeueUnVisitedUrl();
                Console.WriteLine($"开始爬取:{visitUrl}");
                if (visitUrl == null)
                {
                    Console.WriteLine("visitUrl为空,跳出当前循环");
                    continue;
                }
                new DownloadFile(visitUrl);
                LinkQueue.addVisitedUrl(visitUrl);
                var links = HtmlParser.extractLinks(visitUrl);

                Console.WriteLine($"抓取链接");
                foreach (var link in links)
                {
                    var fixLink = link.StartsWith("//") ? "http:" + link : link;
                    LinkQueue.addUnVisitedUrl(fixLink);
                }
            }
        }