//WORK WITH ROBOTS TXT public UrlClass checkUrl(String url) { UrlClass urlClass = new UrlClass(); url = urlClass.checkUrl(url); if (url == null) { MessageBox.Show("Нет доступа к сайту или не верно введена ссылка"); return(null); } var dom = CQ.CreateFromUrl(url); string text = dom["meta"].ToString(); // MessageBox.Show(text.Contains("charset=\"UTF-8\"").ToString()); if (!text.ToLower().Contains("charset=utf-8")) { if (!text.ToLower().Contains("charset=\"utf-8\"")) { MessageBox.Show("На данной веб-странице не используется UTF-8"); return(null); } } return(urlClass); }
public void start(String url) { UrlClass urlClass = checkUrl(url); if (urlClass == null) { return; } UrlClass urlClass2 = new UrlClass(url); urlClass2.start(1); }
private void Button_Click2(object sender, RoutedEventArgs e) { string url = firstNameTxtBox_Copy2.Text; DataSet ds = new DataSet(); DataTable dt = new DataTable(); try { conn.Open(); using (var cmd = new NpgsqlCommand()) { cmd.Connection = conn; DateTime dateTime; TimeSpan diff; // List<int> average = new List<int>(); Dictionary <string, string> dic = new Dictionary <string, string>(); cmd.CommandText = "SELECT newdate, link, hash, average FROM webCrawler where link=" + "\'" + url + "\'"; try { using (var reader = cmd.ExecuteReader()) { while (reader.Read()) { string link = (string)reader["link"]; string hash = (string)reader["hash"]; dic.Add(link, hash); } } foreach (KeyValuePair <string, string> kvp in dic) { HtmlDocument doc = new HtmlDocument(); try { try { try { HtmlWeb hw = new HtmlWeb(); doc = hw.Load(kvp.Key);//загрузка html страницы } catch { using (WebClient wc = new WebClient()) { byte[] respones = wc.DownloadData(kvp.Key); String str = UrlClass.Unzip(respones); doc.LoadHtml(str); } } } catch (Exception ex) { cmd.CommandText = "DELETE FROM webCrawler where link=" + "\'" + kvp.Key + "\'"; cmd.ExecuteNonQuery(); } UrlClass urlClass = new UrlClass(); String text = urlClass.getTxtFromWebsite(doc); string hash = HashMD5.MD5Hash(text); if (kvp.Value != hash) { cmd.CommandText = "UPDATE webCrawler set hash=" + "\'" + hash + "\'" + ", text=" + "\'" + text + "\'" + " WHERE link=" + "\'" + kvp.Key + "\'"; cmd.ExecuteNonQuery(); } } catch (Exception ex) { Console.WriteLine(ex.Message); } // Console.WriteLine(hours.ToString()); } } catch (Exception ex) { Console.WriteLine(ex.Message); } } // since we only showing the result we don't need connection anymore conn.Close(); } catch (Exception msg) { // something went wrong, and you wanna know why // MessageBox.Show(msg.ToString()); } }