Exemple #1
0
        // 保存采集信息
        private void save_info(object sender, EventArgs e)
        {
            string[]  content = { textBox1.Text, textBox2.Text, textBox3.Text, textBox4.Text };
            write2csv writer  = new write2csv();

            writer.start(info_csv, content, title);
            string Message_str = String.Format("刚才输入的xpath信息已保存至\"{0}\"", info_csv);

            log.LogMessage(display_box, Message_str);
        }
Exemple #2
0
        /// <summary>
        /// 爬取一条数据,先调用"价格采集.exe",无效时,调用phantomjs_crawl.start
        /// 前者速度快,后者通用性强
        /// </summary>
        /// <param name="goods_name">商品名</param>
        /// <param name="goods_unit">单位</param>
        /// <param name="goods_url">链接</param>
        /// <param name="price_xpath">价格的xpath</param>
        public void one_crawl(string goods_name, string goods_unit, string goods_url, string price_xpath)
        {
            // 调用
            string[] args        = { goods_name, goods_unit, goods_url, price_xpath };
            string   status_code = StartProcess(exe_path, args); // 调用外部的exe
            // 判断错误原因
            string status_info = "";

            if ((status_code == "0") || (status_code == "1"))
            {
                if (status_code == "0")
                {
                    status_info = "成功获取数据";
                }
                else
                {
                    status_info = "'采集数据.exe'获取数据失败,该数据位于渲染后的网页上";
                }
            }
            else
            {
                status_info = "网址链接打开失败,返回码:" + status_code;
            }

            string price_csv = "./商品价格/" + goods_name + ".csv";

            // python没有获取到数据时
            if (status_code != "0")
            {
                log.LogNormal(display_box, status_info);
                log.LogNormal(display_box, "开始使用C#的phantomjs爬虫");
                phantomjs_crawl crawl = new phantomjs_crawl();
                string          price = crawl.xpath_crwal(goods_url, price_xpath); // 网址,xpath
                if (price == "NULL")
                {
                    log.LogError(display_box, "C#的phantomjs爬虫也未成功得到" + goods_name + "的数据");
                    return; // phantomjs.exe也失败了,就退出当前函数
                }
                string[] price_title = { "日期", "价格", "单位" };
                ///判断是否有最新价格
                bool is_exist_newest = false; // 初始化,"最新价格不存在"
                if (File.Exists(price_csv))   // 存在该文件时
                {
                    //获取已有最新价格
                    string[]      lines  = File.ReadAllLines(price_csv);
                    string[]      data0  = lines[lines.Length - 1].Split('\"');                              // 拆分
                    List <string> data_1 = new List <string>(data0);
                    string[]      data   = data_1.Where(p => (p != ",") & (p != " ") & (p != "")).ToArray(); // 去除数组中的逗号,空格,空值
                    if (price == data[1])
                    {
                        is_exist_newest = true;
                    }
                }

                if (price != "")
                {
                    log.LogNormal(display_box, "成功获取渲染后的网页数据,价格为:");
                    if (is_exist_newest == false)
                    {
                        DateTime  dt       = DateTime.Now;
                        string    date_str = String.Format("{0:d}", dt);
                        string[]  content  = { date_str, price, goods_unit }; // 日期,价格,单位
                        write2csv writer   = new write2csv();
                        writer.start("./商品价格/" + goods_name + ".csv", content, price_title);
                        log.LogMessage(display_box, "今日价格为:" + price + ",已保存到'商品价格'文件夹");
                    }
                    else
                    {
                        log.LogMessage(display_box, "最新价格已存在,无需重复写入.今日价格为:" + price + goods_unit);
                    }
                }
                else
                {
                    log.LogError(display_box, "获取失败");
                }
            }
        }