Esempio n. 1
0
 static void AccessingUrl(string link)
 {
     try
     {
         using (WebClient webClient = new WebClient())
         {
             webClient.Encoding = System.Text.Encoding.UTF8;
             string        content = webClient.DownloadString(link);
             List <string> urls    = LinkExtractor.ExtractUrlSameHost(content, link);
             foreach (var url in urls.ToList())
             {
                 if (!NewUrls.Contains(url) && (!OldUrls.Contains(url)))
                 {
                     NewUrls.Add(url);
                 }
                 //Console.WriteLine(url);
             }
             GetInfoOnPage(content, link);
         }
     }
     catch (System.Net.WebException ex)
     {
         Console.WriteLine("Error: " + ex.Response);
         Console.WriteLine("Try again...");
         AccessingUrl(link);
     }
 }
Esempio n. 2
0
        private void AccessingWithCheck(Uri address)
        {
            new Task(() =>
            {
                using (WebClient webClient = new WebClient())
                {
                    webClient.Encoding = System.Text.Encoding.UTF8;
                    var result         = string.Empty;
                    webClient.DownloadStringCompleted +=
                        (sender, e) =>
                    {
                        if (e.Error == null)
                        {
                            result = e.Result;
                            if ((NewUrls.Count == 0) || ((NewUrls.Contains(address.ToString())) && (!OldUrls.Contains(address.ToString()))))
                            {
                                OldUrls.Add(address.ToString());
                            }
                            List <string> urls = LinkExtractor.ExtractUrl(result);

                            var urlsSameSite = urls.Where(x => Host.IsBaseOf(new Uri(x)));

                            foreach (var url in urlsSameSite.ToList())
                            {
                                string strUrl = url;
                                if (!strUrl.EndsWith("/"))
                                {
                                    if (!Path.HasExtension(new Uri(strUrl).AbsolutePath))
                                    {
                                        strUrl = url + "/";
                                    }
                                }
                                if ((!NewUrls.Contains(strUrl)) && (!OldUrls.Contains(strUrl)))
                                {
                                    NewUrls.Add(strUrl);
                                    //Console.WriteLine(url);
                                }
                            }
                            NewUrls.Remove(NewUrls[0]);
                            if (ResultEvent != null)
                            {
                                ResultEvent.Invoke(new ResultEventArgs(address.ToString(), result, urls, NewUrls, OldUrls));
                            }
                            if (NewUrls.Count > 0)
                            {
                                AccessingWithCheck(new Uri(NewUrls[0]));
                            }
                        }
                        else
                        {
                            ResultEvent.Invoke(new ResultEventArgs(e.Error));
                        }
                    };
                    webClient.DownloadStringAsync(address);
                }
            }).Start();
        }
Esempio n. 3
0
 private void AccessingPageWithoutCheck(Uri address)
 {
     new Task(() =>
     {
         using (WebClient webClient = new WebClient())
         {
             webClient.Encoding = System.Text.Encoding.UTF8;
             webClient.DownloadStringCompleted +=
                 (sender, e) =>
             {
                 if (e.Error == null)
                 {
                     OldUrls.Add(address.ToString());
                     List <string> urls = LinkExtractor.ExtractUrlSameHost(e.Result, address.ToString());
                     foreach (var url in urls.ToList())
                     {
                         string strUrl = url;
                         if (!strUrl.EndsWith("/"))
                         {
                             if (!Path.HasExtension(new Uri(strUrl).AbsolutePath))
                             {
                                 strUrl = url + "/";
                             }
                         }
                         if (!NewUrls.Contains(strUrl) && (!OldUrls.Contains(strUrl)))
                         {
                             NewUrls.Add(strUrl);
                         }
                         Console.WriteLine(strUrl);
                     }
                 }
             };
             webClient.DownloadStringAsync(address);
         }
     }).Start();
 }