static void AccessingUrl(string link) { try { using (WebClient webClient = new WebClient()) { webClient.Encoding = System.Text.Encoding.UTF8; string content = webClient.DownloadString(link); List <string> urls = LinkExtractor.ExtractUrlSameHost(content, link); foreach (var url in urls.ToList()) { if (!NewUrls.Contains(url) && (!OldUrls.Contains(url))) { NewUrls.Add(url); } //Console.WriteLine(url); } GetInfoOnPage(content, link); } } catch (System.Net.WebException ex) { Console.WriteLine("Error: " + ex.Response); Console.WriteLine("Try again..."); AccessingUrl(link); } }
private void AccessingWithCheck(Uri address) { new Task(() => { using (WebClient webClient = new WebClient()) { webClient.Encoding = System.Text.Encoding.UTF8; var result = string.Empty; webClient.DownloadStringCompleted += (sender, e) => { if (e.Error == null) { result = e.Result; if ((NewUrls.Count == 0) || ((NewUrls.Contains(address.ToString())) && (!OldUrls.Contains(address.ToString())))) { OldUrls.Add(address.ToString()); } List <string> urls = LinkExtractor.ExtractUrl(result); var urlsSameSite = urls.Where(x => Host.IsBaseOf(new Uri(x))); foreach (var url in urlsSameSite.ToList()) { string strUrl = url; if (!strUrl.EndsWith("/")) { if (!Path.HasExtension(new Uri(strUrl).AbsolutePath)) { strUrl = url + "/"; } } if ((!NewUrls.Contains(strUrl)) && (!OldUrls.Contains(strUrl))) { NewUrls.Add(strUrl); //Console.WriteLine(url); } } NewUrls.Remove(NewUrls[0]); if (ResultEvent != null) { ResultEvent.Invoke(new ResultEventArgs(address.ToString(), result, urls, NewUrls, OldUrls)); } if (NewUrls.Count > 0) { AccessingWithCheck(new Uri(NewUrls[0])); } } else { ResultEvent.Invoke(new ResultEventArgs(e.Error)); } }; webClient.DownloadStringAsync(address); } }).Start(); }
private void AccessingPageWithoutCheck(Uri address) { new Task(() => { using (WebClient webClient = new WebClient()) { webClient.Encoding = System.Text.Encoding.UTF8; webClient.DownloadStringCompleted += (sender, e) => { if (e.Error == null) { OldUrls.Add(address.ToString()); List <string> urls = LinkExtractor.ExtractUrlSameHost(e.Result, address.ToString()); foreach (var url in urls.ToList()) { string strUrl = url; if (!strUrl.EndsWith("/")) { if (!Path.HasExtension(new Uri(strUrl).AbsolutePath)) { strUrl = url + "/"; } } if (!NewUrls.Contains(strUrl) && (!OldUrls.Contains(strUrl))) { NewUrls.Add(strUrl); } Console.WriteLine(strUrl); } } }; webClient.DownloadStringAsync(address); } }).Start(); }