Ejemplo n.º 1
0
        public static void CrawlBaiduImg(string BaiduImgUrl, out List <string> imgUrls)
        {
            var    HtmlCode = HTMLService.DownloadUrl(BaiduImgUrl);
            string pattern  = @"""objURL"":""(?<url>.*?)""";

            imgUrls = new List <string>();
            foreach (Match match in Regex.Matches(HtmlCode, pattern))
            {
                imgUrls.Add(match.Groups["url"].Value);
            }
        }
Ejemplo n.º 2
0
        public static void DefaultCrawl(Project project, out List <string> urls, out List <string> imgUrls)
        {
            //下载当前url网页的html代码并保存
            project.HTMLData.HTMLCodes.Enqueue(HTMLService.DownloadUrl(project.ImgInputData.Url));
            string htmlcode = project.HTMLData.HTMLCodes.Dequeue();

            //解析出该网页链接到下一个网页的url
            urls = HTMLService.Parse(htmlcode);
            foreach (string item in urls)
            {
                if (true /*一些筛选条件*/)
                {
                    project.URLData.HTMLUrls.Enqueue(item);
                }
            }
            //解析出该网页上的图片资源
            imgUrls = ImgParseService.Parse(htmlcode);
        }