예제 #1
0
 private void PageDownloaded(Crawler crawler, urlstates url)
 {
     lock (resultBindingSource)
     {
         var    pageInfo = new { Index = resultBindingSource.Count + 1, URL = url.url };
         Action action   = () => { resultBindingSource.Add(pageInfo); };
         if (this.InvokeRequired)
         {
             this.Invoke(action);
         }
         else
         {
             action();
         }
     }
 }
예제 #2
0
 public void Process(urlstates url)
 {
     try
     {
         WebClient webClient = new WebClient();
         webClient.Encoding = Encoding.UTF8;
         string html     = webClient.DownloadString(url.url);
         string fileName = count.ToString();
         File.WriteAllText(fileName, html, Encoding.UTF8);
         url.html = html;
         PageDownloaded(this, url);
         Parse(html, url.url);//解析,并加入新的链接
     }
     catch (Exception)
     {
     }
 }
예제 #3
0
        public void Crawl()
        {
            urlstates surl = new urlstates()
            {
                url = startUrl, processing = false, html = ""
            };

            urls.Add(surl);

            string str = @"(www\.){0,1}.*?\..*?/";
            Regex  r   = new Regex(str);
            Match  m   = r.Match(startUrl);

            startWith = m.Value;

            while (true)
            {
                urlstates current = null;
                foreach (var url in urls)
                {
                    if (url.processing)
                    {
                        continue;
                    }
                    current = url;
                    if (count > 20)
                    {
                        break;
                    }
                    if (current == null)
                    {
                        continue;
                    }
                    current.processing = true;
                    var t = new Thread(() => Process(current));
                    t.Start();
                    count++;
                }
            }
        }