// 保存采集信息 private void save_info(object sender, EventArgs e) { string[] content = { textBox1.Text, textBox2.Text, textBox3.Text, textBox4.Text }; write2csv writer = new write2csv(); writer.start(info_csv, content, title); string Message_str = String.Format("刚才输入的xpath信息已保存至\"{0}\"", info_csv); log.LogMessage(display_box, Message_str); }
/// <summary> /// 爬取一条数据,先调用"价格采集.exe",无效时,调用phantomjs_crawl.start /// 前者速度快,后者通用性强 /// </summary> /// <param name="goods_name">商品名</param> /// <param name="goods_unit">单位</param> /// <param name="goods_url">链接</param> /// <param name="price_xpath">价格的xpath</param> public void one_crawl(string goods_name, string goods_unit, string goods_url, string price_xpath) { // 调用 string[] args = { goods_name, goods_unit, goods_url, price_xpath }; string status_code = StartProcess(exe_path, args); // 调用外部的exe // 判断错误原因 string status_info = ""; if ((status_code == "0") || (status_code == "1")) { if (status_code == "0") { status_info = "成功获取数据"; } else { status_info = "'采集数据.exe'获取数据失败,该数据位于渲染后的网页上"; } } else { status_info = "网址链接打开失败,返回码:" + status_code; } string price_csv = "./商品价格/" + goods_name + ".csv"; // python没有获取到数据时 if (status_code != "0") { log.LogNormal(display_box, status_info); log.LogNormal(display_box, "开始使用C#的phantomjs爬虫"); phantomjs_crawl crawl = new phantomjs_crawl(); string price = crawl.xpath_crwal(goods_url, price_xpath); // 网址,xpath if (price == "NULL") { log.LogError(display_box, "C#的phantomjs爬虫也未成功得到" + goods_name + "的数据"); return; // phantomjs.exe也失败了,就退出当前函数 } string[] price_title = { "日期", "价格", "单位" }; ///判断是否有最新价格 bool is_exist_newest = false; // 初始化,"最新价格不存在" if (File.Exists(price_csv)) // 存在该文件时 { //获取已有最新价格 string[] lines = File.ReadAllLines(price_csv); string[] data0 = lines[lines.Length - 1].Split('\"'); // 拆分 List <string> data_1 = new List <string>(data0); string[] data = data_1.Where(p => (p != ",") & (p != " ") & (p != "")).ToArray(); // 去除数组中的逗号,空格,空值 if (price == data[1]) { is_exist_newest = true; } } if (price != "") { log.LogNormal(display_box, "成功获取渲染后的网页数据,价格为:"); if (is_exist_newest == false) { DateTime dt = DateTime.Now; string date_str = String.Format("{0:d}", dt); string[] content = { date_str, price, goods_unit }; // 日期,价格,单位 write2csv writer = new write2csv(); writer.start("./商品价格/" + goods_name + ".csv", content, price_title); log.LogMessage(display_box, "今日价格为:" + price + ",已保存到'商品价格'文件夹"); } else { log.LogMessage(display_box, "最新价格已存在,无需重复写入.今日价格为:" + price + goods_unit); } } else { log.LogError(display_box, "获取失败"); } } }