コード例 #1
0
        private void UpdateContent(string contentstr, string link, string id)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(contentstr);
            HtmlNode rootnode = doc.DocumentNode;    //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num
            //根据网页的内容设置XPath路径表达式
            string             xpathstring = "//div[@class='entry']";
            HtmlNodeCollection list        = rootnode.SelectNodes(xpathstring); //所有找到的节点都是一个集合

            string content = NoHTML(list[0].InnerHtml);
            string dtime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");


            string insertChapter = $"update article set content = '{content}' where id={id}";

            //Task.Run(() => {
            MySQLHelper.GetInstance().ExecuteNonQuery(insertChapter);
            //});

            this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"更新:[{id}]";
            this.textBox1.SelectionStart = this.textBox1.Text.Length;
            this.textBox1.ScrollToCaret();//滚动到最后一行
            Application.DoEvents();

            // System.Threading.Thread.Sleep(1000);
        }
コード例 #2
0
        private void GrabData(string contentstr, string link, string title, string module)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(contentstr);
            HtmlNode rootnode = doc.DocumentNode;    //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num
            //根据网页的内容设置XPath路径表达式
            string             xpathstring = "//div[@class='entry']";
            HtmlNodeCollection list        = rootnode.SelectNodes(xpathstring); //所有找到的节点都是一个集合

            string content = NoHTML(list[0].InnerHtml);
            string dtime   = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");


            string insertChapter = $"insert into article(origin_url,name,book_type,create_time,content) values('{link}','{title}','{module}','{dtime}','{content}')";

            //Task.Run(() => {
            MySQLHelper.GetInstance().ExecuteNonQuery(insertChapter);
            //});

            this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"写入:[{module}] {title},链接地址:{link},{dtime}";
            this.textBox1.SelectionStart = this.textBox1.Text.Length;
            this.textBox1.ScrollToCaret();//滚动到最后一行
            Application.DoEvents();

            // System.Threading.Thread.Sleep(1000);
        }
コード例 #3
0
        private int GetMaxNumber()
        {
            string sql    = "select max(id) from chapter";
            int    number = MySQLHelper.GetInstance().ExecuteScalar(sql);

            return(number++);
        }
コード例 #4
0
        /// <summary>
        /// 查询节点数据,保存数据
        /// </summary>
        /// <param name="htmlstr"></param>
        /// <param name="zone"></param>
        /// <param name="module"></param>
        private void GrabData(string htmlstr, string module)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlstr);
            HtmlNode rootnode = doc.DocumentNode;    //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num
            //根据网页的内容设置XPath路径表达式
            //string xpathstring = "//div[@id='content']/div[@class='post']/a[@rel='bookmark']";
            string             xpathstring = "//a[@rel='bookmark']";
            HtmlNodeCollection list        = rootnode.SelectNodes(xpathstring); //所有找到的节点都是一个集合

            foreach (var item in list)
            {
                var    link       = item.GetAttributeValue("href", "");
                var    title      = item.InnerText;
                string contentStr = "";

                //判断是否存在
                string selSql = $"select count(*) from article where origin_url like '%{link}%'";

                int res = MySQLHelper.GetInstance().ExecuteScalar(selSql);
                if (res > 0)
                {
                    continue;
                }
                try
                {
                    string dtime         = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
                    string insertChapter = $"insert into article(origin_url,name,book_type,create_time,content) values('{link}','{title}','{module}','{dtime}','')";

                    //Task.Run(() => {
                    MySQLHelper.GetInstance().ExecuteNonQuery(insertChapter);
                    //});

                    this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"写入:[{module}] {title},链接地址:{link},{dtime}";
                    this.textBox1.SelectionStart = this.textBox1.Text.Length;
                    this.textBox1.ScrollToCaret();//滚动到最后一行
                    Application.DoEvents();

                    //contentStr = GetHtmlStr(link);

                    //if (contentStr != "")
                    //{

                    //    GrabData(contentStr, link, title, module);

                    //}
                }
                catch (Exception ex)
                {
                    this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"报错:[{ex.Message}] ,link:{link},title:{title},contentStr:{contentStr}";
                    this.textBox1.SelectionStart = this.textBox1.Text.Length;
                    this.textBox1.ScrollToCaret();//滚动到最后一行
                    Application.DoEvents();
                }
            }
        }
コード例 #5
0
        private void button2_Click(object sender, EventArgs e)
        {
            string selSql = $"select id,origin_url from article where content=''";

            //Task.Run(() => {
            var dt = MySQLHelper.GetInstance().ExecuteDataTable(selSql);

            if (dt != null && dt.Rows.Count > 0)
            {
                this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"查询到需要更新的条目数:[{dt.Rows.Count}]";
                this.textBox1.SelectionStart = this.textBox1.Text.Length;
                this.textBox1.ScrollToCaret();//滚动到最后一行
                Application.DoEvents();

                for (int i = 0; i < dt.Rows.Count; i++)
                {
                    this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"处理第:[{(i+1)}]条";
                    this.textBox1.SelectionStart = this.textBox1.Text.Length;
                    this.textBox1.ScrollToCaret();//滚动到最后一行
                    Application.DoEvents();

                    var    dtrow = dt.Rows[i];
                    string link  = dtrow["origin_url"].ToString();
                    string id    = dtrow["id"].ToString();

                    try
                    {
                        string contentStr = GetHtmlStr(link);

                        if (contentStr != "")
                        {
                            UpdateContent(contentStr, link, id);
                        }
                    }
                    catch (Exception ex)
                    {
                        this.textBox1.Text           = this.textBox1.Text + "\r\n" + $"错误:[{ex.Message}]条";
                        this.textBox1.SelectionStart = this.textBox1.Text.Length;
                        this.textBox1.ScrollToCaret();//滚动到最后一行
                        Application.DoEvents();
                    }
                }
            }
        }
コード例 #6
0
        private void GrabData(string contentstr)
        {
            var    _name      = GrabTitleData(contentstr);
            string content    = NoHTML(GrabContentData(contentstr));
            string next       = GrabNextData(contentstr).Replace("&amp;", "&");
            string dtime      = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");
            int    article_id = int.Parse(this.txtNumber.Text);

            string insertChapter = $"insert into Chapter(name,create_time,content,sort,click,article_id,page_id) values('{_name}','{dtime}','{content}',{count},0,{article_id},{count})";

            //Task.Run(() => {
            MySQLHelper.GetInstance().ExecuteNonQuery(insertChapter);
            //});

            //this.txtLink.Text = _name;
            //Application.DoEvents();

            count++;
            if (!string.IsNullOrEmpty(next) && (next.IndexOf("last") < 0))
            {
                try
                {
                    //string mainUrl = this.txtLink.Text.Substring(0, this.txtLink.Text.LastIndexOf('/'));

                    GrabData(GetHtmlStr("http://175.24.134.140:1111" + next));
                }
                catch (Exception ex)
                {
                    this.textBox1.Text           = this.textBox1.Text + "\r\n" + "错误:" + ex.Message;
                    this.textBox1.Text           = this.textBox1.Text + "\r\n" + "当前url:" + next;
                    this.textBox1.SelectionStart = this.textBox1.Text.Length;
                    this.textBox1.ScrollToCaret();//滚动到最后一行
                    Application.DoEvents();
                }
            }
        }
コード例 #7
0
        private void button1_Click(object sender, EventArgs e)
        {
            try
            {
                if (string.IsNullOrWhiteSpace(this.txtNumber.Text) || string.IsNullOrWhiteSpace(this.txtLink.Text))
                {
                    MessageBox.Show("请输入数据");
                    return;
                }

                //string url = "http://www.jianlaixiaoshuo.com/danhuangwudi/";
                //string url = "http://www.jianlaixiaoshuo.com/book/1.html";
                //string url = "http://www.quanshuwang.com/all/allvisit_0_0_0_0_1_0_{0}.html";
                string url = this.txtLink.Text;

                ////写入章节
                //string htmlstr = GetHtmlStr(url);
                //GrabData(htmlstr);

                #region 写入章节

                //获取数据库中的待处理的文章列表
                string articlesql = $"SELECT id,from_id from article where id>={int.Parse(this.txtNumber.Text)}";
                var    dt         = MySQLHelper.GetInstance().ExecuteDataTable(articlesql);
                var    link       = "http://175.24.134.140:1111/book/catalog?bookId=";

                for (int i = 0; i < dt.Rows.Count; i++)
                {
                    count = 1;

                    var article_id = int.Parse(dt.Rows[i]["from_id"].ToString());
                    var caid       = dt.Rows[i]["id"].ToString();
                    this.txtNumber.Text = caid;
                    Application.DoEvents();

                    url = link + article_id;

                    string htmlstr = GetHtmlStr(url);

                    //获取章节首页
                    url = GetChapterIndexLink(htmlstr);

                    url = url.Replace("&amp;", "&");

                    string htmlstr2 = GetHtmlStr("http://175.24.134.140:1111" + url);

                    try
                    {
                        GrabData(htmlstr2);
                    }
                    catch (Exception ex)
                    {
                        this.textBox1.Text           = this.textBox1.Text + "\r\n" + "错误:" + ex.Message;
                        this.textBox1.SelectionStart = this.textBox1.Text.Length;
                        this.textBox1.ScrollToCaret();//滚动到最后一行
                    }
                }



                #endregion


                #region 写入书名

                ///写入书名表
                //for (int j = 1; j <= 200; j++)
                //{

                //    string htmlstr = PostHtmlStr(url, j);

                //    if (!string.IsNullOrWhiteSpace(htmlstr))
                //    {


                //        JObject jsonObj = JObject.Parse(htmlstr);

                //        var arraylist = (JArray)(((JObject)jsonObj["data"])["book"]);
                //        for (int i = 0; i < arraylist.Count; i++)
                //        {
                //            Article article = new Article();
                //            article.fromid = int.Parse(arraylist[i]["id"].ToString());
                //            article.origin_url = arraylist[i]["OriginUrl"].ToString();
                //            article.name = arraylist[i]["name"].ToString();
                //            article.book_type = arraylist[i]["bookType"].ToString();
                //            article.desc = arraylist[i]["description"].ToString();
                //            article.author = arraylist[i]["author"].ToString();

                //            articles.Add(article);
                //        }

                //    }
                //}
                //int countid = 4453;

                //foreach (var item in articles)
                //{
                //    if (item.fromid > 3594)
                //    {
                //        item.desc = item.desc.Replace("\\", "");

                //        string insertSql = $@"insert into article values({countid++},{item.fromid},'{item.origin_url}','{item.name}','{item.book_type}','{DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")}',0,'{item.desc}','{item.author}')";

                //        MySQLHelper.GetInstance().ExecuteNonQuery(insertSql);
                //    }
                //}

                #endregion
            }
            catch (WebException ex)
            {
                //连接失败
                this.textBox1.Text           = this.textBox1.Text + "\r\n" + "错误:" + ex.Message;
                this.textBox1.SelectionStart = this.textBox1.Text.Length;
                this.textBox1.ScrollToCaret();//滚动到最后一行
                Application.DoEvents();
            }
        }
コード例 #8
0
        private void GrabData(string htmlstr, string article = "剑来")
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlstr);
            HtmlNode rootnode = doc.DocumentNode;    //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num
            //根据网页的内容设置XPath路径表达式
            string xpathstring = "//ol[@id='alllist']/li/a";
            //string xpathstring = "//div[@class='inner']/dl[@class='chapterlist']/dd/a";
            HtmlNodeCollection list = rootnode.SelectNodes(xpathstring);    //所有找到的节点都是一个集合

            ////商家名称
            //var nameNode = rootnode.SelectNodes("//div[@class='txt']/div[@class='tit']/a/h4");


            ////商家地址
            //var addNode = rootnode.SelectNodes("//div[@class='tag-addr']/span[@class='addr']");

            ////商家评分
            //var rankNode = rootnode.SelectNodes("//div[@class='txt']/div[@class='comment']/span");

            ////分类
            //var typeNode = rootnode.SelectNodes("//div[@class='tag-addr']/a[@data-click-name='shop_tag_cate_click']/span");

            ////链接
            //var linkNode = rootnode.SelectNodes("//div[@class='txt']/div[@class='tit']/a[@data-click-name='shop_title_click']");

            int count = 1;

            foreach (var item in list)
            {
                var _name = item.InnerText;
                var _link = item.GetAttributeValue("href", "");


                string contentstr = GetHtmlStr("http://m.quanshuwang.com/" + _link);
                string content    = NoHTML(GrabContentData(contentstr));
                string dtime      = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss");

                string insertChapter = $"insert into Chapter(name,create_time,content,sort,click,article_id,page_id) values('{_name}','{dtime}','{content}',{count},0,3,{count})";

                //Task.Run(() => {
                MySQLHelper.GetInstance().ExecuteNonQuery(insertChapter);
                //});

                this.textBox1.Text           = this.textBox1.Text + "\r\n" + "写入" + _name + "," + dtime;
                this.textBox1.SelectionStart = this.textBox1.Text.Length;
                this.textBox1.ScrollToCaret();//滚动到最后一行
                Application.DoEvents();
                //for (int i = 0; i < nameNode.Count; i++)
                //{
                //    var _name = nameNode[i].InnerText;
                //    var _addr = addNode[i].InnerText;
                //    var _rank = rankNode[i].GetAttributeValue("title", "");
                //    var _type = typeNode[i].InnerText;

                //    //var _shopId = linkNode[i].GetAttributeValue("data-shopid", ""); ;
                //    //var _tel = GetPhone(_shopId);


                //}

                count++;
            }
        }