Example #1
0
        //WORK WITH ROBOTS TXT

        public UrlClass checkUrl(String url)
        {
            UrlClass urlClass = new UrlClass();

            url = urlClass.checkUrl(url);


            if (url == null)
            {
                MessageBox.Show("Нет доступа к сайту или не верно введена ссылка");
                return(null);
            }

            var    dom  = CQ.CreateFromUrl(url);
            string text = dom["meta"].ToString();

            // MessageBox.Show(text.Contains("charset=\"UTF-8\"").ToString());
            if (!text.ToLower().Contains("charset=utf-8"))
            {
                if (!text.ToLower().Contains("charset=\"utf-8\""))
                {
                    MessageBox.Show("На данной веб-странице не используется UTF-8");
                    return(null);
                }
            }

            return(urlClass);
        }
Example #2
0
        public void start(String url)
        {
            UrlClass urlClass = checkUrl(url);

            if (urlClass == null)
            {
                return;
            }



            UrlClass urlClass2 = new UrlClass(url);

            urlClass2.start(1);
        }
Example #3
0
        private void Button_Click2(object sender, RoutedEventArgs e)
        {
            string    url = firstNameTxtBox_Copy2.Text;
            DataSet   ds  = new DataSet();
            DataTable dt  = new DataTable();

            try
            {
                conn.Open();

                using (var cmd = new NpgsqlCommand())
                {
                    cmd.Connection = conn;
                    DateTime dateTime;
                    TimeSpan diff;

                    //    List<int> average = new List<int>();
                    Dictionary <string, string> dic = new Dictionary <string, string>();


                    cmd.CommandText = "SELECT newdate, link, hash, average FROM webCrawler where link=" + "\'" + url + "\'";


                    try
                    {
                        using (var reader = cmd.ExecuteReader())
                        {
                            while (reader.Read())
                            {
                                string link = (string)reader["link"];
                                string hash = (string)reader["hash"];
                                dic.Add(link, hash);
                            }
                        }


                        foreach (KeyValuePair <string, string> kvp in dic)
                        {
                            HtmlDocument doc = new HtmlDocument();

                            try
                            {
                                try
                                {
                                    try
                                    {
                                        HtmlWeb hw = new HtmlWeb();

                                        doc = hw.Load(kvp.Key);//загрузка html страницы
                                    }

                                    catch
                                    {
                                        using (WebClient wc = new WebClient())
                                        {
                                            byte[] respones = wc.DownloadData(kvp.Key);
                                            String str      = UrlClass.Unzip(respones);
                                            doc.LoadHtml(str);
                                        }
                                    }
                                }

                                catch (Exception ex)
                                {
                                    cmd.CommandText = "DELETE FROM webCrawler where link=" + "\'" + kvp.Key + "\'";
                                    cmd.ExecuteNonQuery();
                                }


                                UrlClass urlClass = new UrlClass();
                                String   text     = urlClass.getTxtFromWebsite(doc);
                                string   hash     = HashMD5.MD5Hash(text);
                                if (kvp.Value != hash)
                                {
                                    cmd.CommandText = "UPDATE webCrawler set hash=" + "\'" + hash + "\'" + ", text=" + "\'" + text + "\'" + " WHERE link=" + "\'" + kvp.Key + "\'";
                                    cmd.ExecuteNonQuery();
                                }
                            }

                            catch (Exception ex)
                            {
                                Console.WriteLine(ex.Message);
                            }

                            //    Console.WriteLine(hours.ToString());
                        }
                    }



                    catch (Exception ex)
                    {
                        Console.WriteLine(ex.Message);
                    }
                }


                // since we only showing the result we don't need connection anymore
                conn.Close();
            }
            catch (Exception msg)
            {
                // something went wrong, and you wanna know why
                //  MessageBox.Show(msg.ToString());
            }
        }