private void button1_Click(object sender, EventArgs e) { try { if (string.IsNullOrWhiteSpace(this.txtNumber.Text) || string.IsNullOrWhiteSpace(this.txtLink.Text)) { MessageBox.Show("请输入数据"); return; } //string url = "http://www.jianlaixiaoshuo.com/danhuangwudi/"; //string url = "http://www.jianlaixiaoshuo.com/book/1.html"; //string url = "http://www.quanshuwang.com/all/allvisit_0_0_0_0_1_0_{0}.html"; string url = this.txtLink.Text; ////写入章节 //string htmlstr = GetHtmlStr(url); //GrabData(htmlstr); #region 写入章节 //获取数据库中的待处理的文章列表 string articlesql = $"SELECT id,from_id from article where id>={int.Parse(this.txtNumber.Text)}"; var dt = MySQLHelper.GetInstance().ExecuteDataTable(articlesql); var link = "http://175.24.134.140:1111/book/catalog?bookId="; for (int i = 0; i < dt.Rows.Count; i++) { count = 1; var article_id = int.Parse(dt.Rows[i]["from_id"].ToString()); var caid = dt.Rows[i]["id"].ToString(); this.txtNumber.Text = caid; Application.DoEvents(); url = link + article_id; string htmlstr = GetHtmlStr(url); //获取章节首页 url = GetChapterIndexLink(htmlstr); url = url.Replace("&", "&"); string htmlstr2 = GetHtmlStr("http://175.24.134.140:1111" + url); try { GrabData(htmlstr2); } catch (Exception ex) { this.textBox1.Text = this.textBox1.Text + "\r\n" + "错误:" + ex.Message; this.textBox1.SelectionStart = this.textBox1.Text.Length; this.textBox1.ScrollToCaret();//滚动到最后一行 } } #endregion #region 写入书名 ///写入书名表 //for (int j = 1; j <= 200; j++) //{ // string htmlstr = PostHtmlStr(url, j); // if (!string.IsNullOrWhiteSpace(htmlstr)) // { // JObject jsonObj = JObject.Parse(htmlstr); // var arraylist = (JArray)(((JObject)jsonObj["data"])["book"]); // for (int i = 0; i < arraylist.Count; i++) // { // Article article = new Article(); // article.fromid = int.Parse(arraylist[i]["id"].ToString()); // article.origin_url = arraylist[i]["OriginUrl"].ToString(); // article.name = arraylist[i]["name"].ToString(); // article.book_type = arraylist[i]["bookType"].ToString(); // article.desc = arraylist[i]["description"].ToString(); // article.author = arraylist[i]["author"].ToString(); // articles.Add(article); // } // } //} //int countid = 4453; //foreach (var item in articles) //{ // if (item.fromid > 3594) // { // item.desc = item.desc.Replace("\\", ""); // string insertSql = $@"insert into article values({countid++},{item.fromid},'{item.origin_url}','{item.name}','{item.book_type}','{DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss")}',0,'{item.desc}','{item.author}')"; // MySQLHelper.GetInstance().ExecuteNonQuery(insertSql); // } //} #endregion } catch (WebException ex) { //连接失败 this.textBox1.Text = this.textBox1.Text + "\r\n" + "错误:" + ex.Message; this.textBox1.SelectionStart = this.textBox1.Text.Length; this.textBox1.ScrollToCaret();//滚动到最后一行 Application.DoEvents(); } }
private void GrabData(string htmlstr, string article = "剑来") { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlstr); HtmlNode rootnode = doc.DocumentNode; //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num //根据网页的内容设置XPath路径表达式 string xpathstring = "//ol[@id='alllist']/li/a"; //string xpathstring = "//div[@class='inner']/dl[@class='chapterlist']/dd/a"; HtmlNodeCollection list = rootnode.SelectNodes(xpathstring); //所有找到的节点都是一个集合 ////商家名称 //var nameNode = rootnode.SelectNodes("//div[@class='txt']/div[@class='tit']/a/h4"); ////商家地址 //var addNode = rootnode.SelectNodes("//div[@class='tag-addr']/span[@class='addr']"); ////商家评分 //var rankNode = rootnode.SelectNodes("//div[@class='txt']/div[@class='comment']/span"); ////分类 //var typeNode = rootnode.SelectNodes("//div[@class='tag-addr']/a[@data-click-name='shop_tag_cate_click']/span"); ////链接 //var linkNode = rootnode.SelectNodes("//div[@class='txt']/div[@class='tit']/a[@data-click-name='shop_title_click']"); int count = 1; foreach (var item in list) { var _name = item.InnerText; var _link = item.GetAttributeValue("href", ""); string contentstr = GetHtmlStr("http://m.quanshuwang.com/" + _link); string content = NoHTML(GrabContentData(contentstr)); string dtime = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"); string insertChapter = $"insert into Chapter(name,create_time,content,sort,click,article_id,page_id) values('{_name}','{dtime}','{content}',{count},0,3,{count})"; //Task.Run(() => { MySQLHelper.GetInstance().ExecuteNonQuery(insertChapter); //}); this.textBox1.Text = this.textBox1.Text + "\r\n" + "写入" + _name + "," + dtime; this.textBox1.SelectionStart = this.textBox1.Text.Length; this.textBox1.ScrollToCaret();//滚动到最后一行 Application.DoEvents(); //for (int i = 0; i < nameNode.Count; i++) //{ // var _name = nameNode[i].InnerText; // var _addr = addNode[i].InnerText; // var _rank = rankNode[i].GetAttributeValue("title", ""); // var _type = typeNode[i].InnerText; // //var _shopId = linkNode[i].GetAttributeValue("data-shopid", ""); ; // //var _tel = GetPhone(_shopId); //} count++; } }