/// <summary> /// YQH商品列表获取 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button3_Click_1(object sender, EventArgs e) { //连接mysql MySqlConnection mysqlConn = getMySqlCon(); //获取页数 int YQHNum = int.Parse(this.YQHNum.Text); if (YQHNum == 0) { MessageBox.Show("”获取页数“ 请输入大于0的数字!", "提示"); } else { for (int i = 1; i < YQHNum + 1; i++) { //获取到商品信息写入xml文件 string url = string.Format("http://www.178hui.com/zdm/list/0-0-0-0-{0}.html", i); //string url2 = "http://www.178hui.com/zdm/view-684087.html"; string getContent = LoginHelper.HttpGet(url); string yuanshiHtml = LogHelper.writeFile(getContent, "一起惠返利.html"); string[] good_bl_i_cc = HtmlHelper.GetElementsByTagAndClass(getContent, "div", "bl_i_cc"); string[] good_bl_i_lb = HtmlHelper.GetElementsByTagAndClass(getContent, "div", "bl_i_lb"); //轮询获取bl_i_cc标签信息 foreach (string x in good_bl_i_cc) { //LogHelper.writeFile(x, "所有bl_i_cc标签内容.txt"); string[] tag_id = HtmlHelper.GetElementsByTagName(x, "li"); foreach (string y in tag_id) { //LogHelper.writeFile(y, "所有li标签内容.txt"); string[] bl_cc_title = HtmlHelper.GetElementsByTagAndClass(y, "div", "bl_cc_title"); //获取bl_cc_title列表 string[] bl_cc_zhida = HtmlHelper.GetElementsByTagAndClass(y, "div", "bl_cc_zhida"); //获取bl_cc_zhida列表 string[] bl_cc_price = HtmlHelper.GetElementsByClass(y, "bl_cc_price"); //获取bl_cc_price列表 string[] bl_cc_oprice = HtmlHelper.GetElementsByClass(y, "bl_cc_oprice"); //获取bl_cc_oprice列表 //获取原始网址+标题 foreach (string a in bl_cc_title) { string pattern = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?"; //匹配原始网址 qyh_bl_cc_url = ReHelper.reMatch(a, pattern); qyh_bl_cc_title = HtmlHelper.DelHTML(a); //删除标签,获取文本 //获取分类信息 string url2 = qyh_bl_cc_url; string getContent2 = LoginHelper.HttpGet(url2); string[] bl_v_lable = HtmlHelper.GetElementsByTagAndClass(getContent2, "div", "bl_v_lable"); foreach (string bl_v_x in bl_v_lable) { string[] bl_v_y = HtmlHelper.GetElementsByTagName(bl_v_x, "dd"); string bl_v_z = bl_v_y[1]; qyh_bl_cc_fenlei = HtmlHelper.DelHTML(bl_v_z); } //获取真实商城连接 string trueUrlContent = LoginHelper.HttpGet(qyh_bl_cc_url); string urlpatten = "(?<=<script\\s*type=\"text\\/javascript\">)[^<]*(?=<\\/script>)"; Match stcro = Regex.Match(trueUrlContent, urlpatten); string result1 = stcro.Groups[0].Value.ToString();//获取第一个匹配字符串 //匹配bl_url string urlpatten2 = @"bl_url : '\bhttp\S*\b'"; string content3 = ReHelper.reMatch(result1, urlpatten2); //匹配真实网址 string urlpatten3 = @"\bhttp\S*\b"; qyh_bl_cc_true_url = ReHelper.reMatch(content3, urlpatten3); //qyh_bl_cc_true_url } //获取直达连接 foreach (string b in bl_cc_zhida) { string pattern = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";//匹配直达网址 qyh_bl_cc_zhida = ReHelper.reMatch(b, pattern); } //获取优惠价格 foreach (string z in bl_cc_price) { string pattern = @"\b\d\S*\d\d\b";;//匹配price string cc_price = ReHelper.reMatch(z, pattern); qyh_bl_cc_price = decimal.Parse(cc_price); } //获取原始价格 foreach (string w in bl_cc_oprice) { string pattern = @"\b\d\S*\d\d\b";;//匹配oprice string cc_oprice = ReHelper.reMatch(w, pattern); qyh_bl_cc_oprice = decimal.Parse(cc_oprice); } string mysqlStr = string.Format("insert into yqh_goods_yuanshi (cc_url,cc_price,cc_oprice,cc_zhida,cc_title,cc_fenlei,cc_true_url) values ('{0}','{1}','{2}','{3}','{4}','{5}','{6}')", qyh_bl_cc_url, qyh_bl_cc_price, qyh_bl_cc_oprice, qyh_bl_cc_zhida, qyh_bl_cc_title, qyh_bl_cc_fenlei.Replace("分类:", ""), qyh_bl_cc_true_url); getInsert(mysqlStr, mysqlConn); } } //获取描述信息 foreach (string x_lb in good_bl_i_lb) { //LogHelper.writeFile(x, "所有bl_i_cc标签内容.txt"); string[] tag_id = HtmlHelper.GetElementsByTagName(x_lb, "li"); foreach (string y_lb in tag_id) { //LogHelper.writeFile(y, "所有li标签内容.txt"); string[] bl_lb_title = HtmlHelper.GetElementsByTagAndClass(y_lb, "div", "bl_lb_title"); //获取bl_cc_title列表 string[] bl_lb_zhida = HtmlHelper.GetElementsByTagAndClass(y_lb, "div", "bl_lb_zhida"); //获取bl_cc_zhida列表 string[] bl_lb_info = HtmlHelper.GetElementsByTagAndClass(y_lb, "div", "bl_lb_info"); //获取bl_lb_info列表 string[] bl_lb_img = HtmlHelper.GetElementsByTagAndClass(y_lb, "div", "bl_lb_img"); //获取图片 foreach (string bl_a in bl_lb_title) { string pattern = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";//匹配原始网址 qyh_bl_lb_url = ReHelper.reMatch(bl_a, pattern); } foreach (string bl_b in bl_lb_zhida) { string pattern = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?";//匹配直达网址 qyh_bl_lb_zhida = ReHelper.reMatch(bl_b, pattern); } foreach (string bl_z in bl_lb_info) { qyh_bl_lb_info = HtmlHelper.DelHTML(bl_z);//删除标签,获取文本 } //图片另存为 foreach (string lb_x in bl_lb_img) { string pattern = @"\bhttp\S*jpg\b"; string zdm_img = ReHelper.reMatch(lb_x, pattern); string img_path = Guid.NewGuid().ToString();//生成随机的图片名 qyh_bl_cc_img = Path.Combine(basepath, "Picture", "一起惠", img_path + ".jpg"); try { DownloadHelper.DownloadPicture(zdm_img, qyh_bl_cc_img, 2000);//图片另存为 } catch { } } string mysqlStr = string.Format("update yqh_goods_yuanshi set cc_info='{0}',cc_img_path='{1}' where cc_url='{2}' and cc_zhida='{3}'", qyh_bl_lb_info, qyh_bl_cc_img.Replace("\\", "\\\\"), qyh_bl_lb_url, qyh_bl_lb_zhida); getInsert(mysqlStr, mysqlConn); } } } mysqlConn.Close(); MessageBox.Show("一起惠商品获取完成!", "提示"); } }
/// <summary> /// 值得买商品获取 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button4_Click_1(object sender, EventArgs e) { //连接mysql MySqlConnection mysqlConn = getMySqlCon(); int ZMDNum = int.Parse(this.ZDMNum.Text); if (ZMDNum == 0) { MessageBox.Show("”获取页数“ 请输入大于0的数字!", "提示"); } else { //获取到商品信息写入xml文件 for (int i = 1; i < ZMDNum + 1; i++) { string url = string.Format("http://faxian.smzdm.com/p{0}/#filter-block", i); string getContent = LoginHelper.HttpGet(url); try { string yuanshiHtml = LogHelper.writeFile(getContent, "什么值得买.html"); string[] feed_block_ver = HtmlHelper.GetElementsByTagAndClass(getContent, "div", "feed-block-ver ");//获取值得买商品列表 //轮询获取feed_hot_card标签信息 foreach (string x in feed_block_ver) { //LogHelper.writeFile(x, "所有feed_block_ver标签内容.txt"); string[] feed_ver_title = HtmlHelper.GetElementsByTagAndClass(x, "h5", "feed-ver-title"); //获取网址标题列表 string[] feed_ver_price = HtmlHelper.GetElementsByTagAndClass(x, "div", "z-highlight z-ellipsis"); //获取价格列表 string[] zdm_ver_descripe = HtmlHelper.GetElementsByTagAndClass(x, "div", "feed-ver-descripe"); //获取商品描述列表 string[] zdm_ver_zhida = HtmlHelper.GetElementsByTagAndClass(x, "div", "feed-link-btn-inner"); //获取原始网址列表 string[] zdm_ver_image = HtmlHelper.GetElementsByTagAndClass(x, "div", "feed-ver-pic"); //获取图片网址列表 //获取url+标题 foreach (string feed_a in feed_ver_title) { string pattern = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?"; zdm_url = ReHelper.reMatch(feed_a, pattern); zdm_title = HtmlHelper.DelHTML(feed_a); //获取分类 string url3 = zdm_url; string getContent3 = LoginHelper.HttpGet(url3); string[] fenlei = HtmlHelper.GetElementsByTagAndClass(getContent3, "div", "crumbs"); foreach (string zdm_v_x in fenlei) { zdm_fenlei = HtmlHelper.DelHTML(zdm_v_x); } } //获取价格 foreach (string feed_z in feed_ver_price) { zdm_price_str = HtmlHelper.DelHTML(feed_z);//去除html标签 string regex = @"[0-9][0-9,.]*"; Match mstr = Regex.Match(zdm_price_str, regex); //正则匹配数字 string result = mstr.Groups[0].Value.ToString(); //获取第一个匹配字符串 zdm_price = decimal.Parse(result); } //获取描述 foreach (string feed_w in zdm_ver_descripe) { zdm_descripe = HtmlHelper.DelHTML(feed_w);//描述 } //获取直达网址 foreach (string feed_b in zdm_ver_zhida) { string pattern = @"https://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?"; //匹配原始网址 string pattern2 = @"http://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)?"; //匹配商城连接 zdm_zhida = ReHelper.reMatch(feed_b, pattern); //zdm_zhida_shangcheng = ReHelper.reMatch(feed_b, pattern2);//商城直达网址 } //图片地址 foreach (string feed_c in zdm_ver_image) { string pattern = @"\bhttp\S*jpg\b"; string zdm_img = ReHelper.reMatch(feed_c, pattern); string img_path = Guid.NewGuid().ToString();//生成随机的图片名 img_save_path = Path.Combine(basepath, "Picture", "值得买", img_path + ".jpg"); DownloadHelper.DownloadPicture(zdm_img, img_save_path, 2000);//图片另存为 } string mysqlStr = string.Format("insert into zdm_goods_yuanshi (zdm_url,zdm_title,zdm_price,zdm_descripe,zdm_zhida,zdm_price_str,zdm_image,zdm_fenlei) values ('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}')", zdm_url, @zdm_title, zdm_price, zdm_descripe, zdm_zhida, zdm_price_str, img_save_path.Replace("\\", "\\\\"), zdm_fenlei.Replace(">文章详情", "").Replace("当前位置:首页>", "")); //转义"\\" getInsert(mysqlStr, mysqlConn); } } catch (Exception ex) { MessageBox.Show(ex.Message); } } mysqlConn.Close(); MessageBox.Show("值得买商品信息提取完成!", "信息"); } }