Esempio n. 1
0
 public void Download(url_info url)
 {
     try
     {
         WebClient webClient = new WebClient();
         webClient.Encoding = Encoding.UTF8;
         string html     = webClient.DownloadString(url.url);
         string fileName = count.ToString();
         File.WriteAllText(fileName, html, Encoding.UTF8);
         url.html = html;
         crawlerstopped(this, url);
         Parsse(html, url.url);//解析,并加入新的链接
     }
     catch (Exception)
     {
     }
 }
Esempio n. 2
0
        public event crawlevent crawer;                 //生成事件,下载完成后的url返回给窗口
        public void Crawl()
        {
            url_info url_Info = new url_info()
            {
                url = urlstart, processing = false, html = ""
            };

            urls1.Add(url_Info);
            string str = @"(www\.){0,1}.*?\..*?/";
            Regex  r   = new Regex(str);
            Match  m   = r.Match(urlstart);

            urlstart = m.Value;

            while (true)
            {
                url_info current = null;
                foreach (var url in urls1)
                {
                    if (url.processing)
                    {
                        continue;
                    }
                    current = url;
                    if (count > 20)
                    {
                        break;
                    }
                    if (current == null)
                    {
                        continue;
                    }
                    current.processing = true;
                    var t = new Thread(() => Download(current));
                    t.Start();
                    count++;
                }
            }
        }