Exemplo n.º 1
0
        /// <summary>
        /// 采集并导出的方法[根据类别采集]
        /// </summary>
        public static void Collect2Export()
        {
            CurrentIndex = Start;
            var baseUrl = "http://www.e-cantonfair.com/china-supplier/";
            //采集数据
            DataTable dt = Core.GetEmptyDataTable();
            for (int i = Start; i <= End; i++)
            {
                var url = baseUrl + String.Format("contact-us-{0}.html", i);
                Data data = Core.GetDataByUrl(url);
                Core.Add2Dt(dt, data);
                CurrentIndex = i;
            }

            //添加到dataSet中
            string fileName = String.Format("{0}-{1}采集结果", Start, End);
            dt.TableName = fileName;

            CollectDt = dt;

            Hashtable ht = GetHashtable();

            string flag = new ExcelHelper(String.Format("{0}/{1}.xls", floderPath, fileName), "").DatatableToExcel(dt,ht);

            //Log.Info(log);

            //记录导出失败的日志
            if (flag != "")
            {
                Log.Write(flag);
            }
        }
Exemplo n.º 2
0
        public void run()
        {
            //区分进出口
            string url_cn_base ;
            string url_en_base ;
            if (this.Category.Param.IndexOf("areano") == -1)
            {
                url_cn_base = Config.BaseURL_CN;
                url_en_base = Config.BaseURL_EN;
            }
            else {
                url_cn_base = Config.BaseURL_CN_Imp;
                url_en_base = Config.BaseURL_EN_Imp;
                this.Category.Title = "进口:" + this.Category.Title;
            }

            var url_cn = String.Format("{0}?{1}", url_cn_base, this.Category.Param);
            var url_en = String.Format("{0}?{1}", url_en_base, this.Category.Param);

            DataTable result_cn = Core.GetAllCompanyName_DataTable(url_cn,this.Category.TimePhase,this.Category.Title);
            DataTable result_en = Core.GetAllCompanyName_DataTable(url_en, this.Category.TimePhase, this.Category.Title);
            DataTable dt = S_Core.MergeTableColumn(result_cn, result_en);

            //添加到dataSet中
            dt.TableName = String.Format("第{0}期_{1}", this.Category.TimePhase, this.Category.Title);

            string fileName = String.Format("第{0}期_{1}", this.Category.TimePhase, this.Category.Title);
            fileName = fileName.Trim().Replace("进口:", "进口_").Replace(" ", "").Replace("&nbsp;", "").Replace("、", "");

            string flag = new ExcelHelper(String.Format("{0}/{1}.xls", FolderPath, fileName), "").DatatableToExcel(dt);
            if (flag != "") {
                Log.Write(flag);
            }
            //保存到全局变量中
            S_CollectThread.CompleteList.Add(this.Category.Title);
            S_CollectThread.DS.Tables.Add(dt);
            S_CollectThread.Total += dt.Rows.Count;
        }
Exemplo n.º 3
0
        /// <summary>
        /// 定时器,判断是否采集结束[多线程,Timer判断]
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void timer1_Tick(object sender, EventArgs e)
        {
            if (S_CollectThread.DS.Tables.Count == S_CollectThread.DataTableCount)
            {
                SetButtonEnable(true);

                sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别,采集结束!", S_CollectThread.DS.Tables.Count, S_CollectThread.DataTableCount, DateTime.Now.ToString("HH:mm:ss")));
                txt_result.Text = sb_loadingState.ToString();


                DataTable dt = DataHandler(S_CollectThread.DS);
                string flag = new ExcelHelper(String.Format("{0}/本次采集数据汇总表.xls", defaultfilePath), "").DatatableToExcel(dt);
                if (flag != "")
                {
                    MessageBox.Show("导出失败!" + flag);
                }

                MessageBox.Show(String.Format("共抓取数据: {0} 条!", dt.Rows.Count));

                //清空表格集合
                S_CollectThread.DS = new DataSet();

            }
            else
            {
                sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别", S_CollectThread.DS.Tables.Count, S_CollectThread.DataTableCount, DateTime.Now.ToString("HH:mm:ss")));
                if (sb_loadingState.Length > 1024) sb_loadingState = new StringBuilder();
                txt_result.Text = sb_loadingState.ToString();

            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// 判断单线程采集是否结束
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void timer2_Tick(object sender, EventArgs e)
        {
            if (S_SingleThreadConfig.DS.Tables.Count == S_SingleThreadConfig.DataTableCount)
            {
                SetButtonEnable(true,true);

                sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别,采集结束!", S_SingleThreadConfig.DS.Tables.Count, S_SingleThreadConfig.DataTableCount, DateTime.Now.ToString("HH:mm:ss")));
                txt_result.Text = sb_loadingState.ToString();


                DataTable dt = DataHandler(S_SingleThreadConfig.DS);
                string flag = new ExcelHelper(String.Format("{0}/本次采集数据汇总表.xls", defaultfilePath), "").DatatableToExcel(dt);
                if (flag != "")
                {
                    MessageBox.Show("导出失败!" + flag);
                }

                string msg = "采集成功:采集回来的数据与网站上每个类别显示的总条数一致!";
                if (S_SingleThreadConfig.Sb_ErrorCategory.ToString().Length != 0) {
                    S_SingleThreadConfig.Sb_ErrorCategory = S_SingleThreadConfig.Sb_ErrorCategory.AppendLine("请查看有问题的类别,尾页的条数是否为总数,个位数上的值。(每页显示10条)");
                    msg = S_SingleThreadConfig.Sb_ErrorCategory.ToString();
                }
                txt_result.Text = msg;

                MessageBox.Show(String.Format("网页上数据共有:{0}条,实际抓取数据: {1} 条!",S_SingleThreadConfig.SingleTotal_site, dt.Rows.Count));
                //清空表格集合
                S_SingleThreadConfig.ReSet();

            }
            else
            {
                sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别", S_SingleThreadConfig.DS.Tables.Count, S_SingleThreadConfig.DataTableCount, DateTime.Now.ToString("HH:mm:ss")));
                if (sb_loadingState.Length > 1024) sb_loadingState = new StringBuilder();
                txt_result.Text = sb_loadingState.ToString();

            }
        }
Exemplo n.º 5
0
        /*
        /// <summary>
        /// 采集并导出[点击事件]
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="caie"></param>
        private void btn_collect_export_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog fbd = new FolderBrowserDialog();

            //导出路径,使用上次选中的值
            if (defaultfilePath != "")
            {
                fbd.SelectedPath = defaultfilePath;
            }

            //开始抓取
            if (fbd.ShowDialog() == DialogResult.OK)
            {
                defaultfilePath = fbd.SelectedPath;

                CollectAndExport(fbd.SelectedPath);
            }
        }
         */


        /// <summary>
        /// 采集并导出[方法]【用异步来判断是否正在加载】
        /// </summary>
        /// <param name="floder">导出的文件夹位置</param>
        private void CollectAndExport(string floder)
        {

            DataSet ds = new DataSet();

            //当前选中的期数(第一期、第二期、第三期)的类别
            List<M_Category> list = GetCategory(tab_all.SelectedIndex);

            //遍历抓取数据
            foreach (M_Category item in list)
            {
                //区分进出口
                string url_cn_base = item.Param.IndexOf("areano") == -1 ? Config.BaseURL_CN : Config.BaseURL_CN_Imp;
                string url_en_base = item.Param.IndexOf("areano") == -1 ? Config.BaseURL_EN : Config.BaseURL_EN_Imp;
                var url_cn = String.Format("{0}?{1}", url_cn_base, item.Param);
                var url_en = String.Format("{0}?{1}", url_en_base, item.Param);

                LoadingHandler.Show(this, LoadingStyle.None, args =>
                {
                    args.Execute(ex =>
                    {
                        sb_loadingState.AppendLine(String.Format("正在抓取 {0} ... ", item.Title));
                        txt_result.Text = sb_loadingState.ToString();
                    });

                    DataTable result_cn = Core.GetAllCompanyName_DataTable(url_cn, item.TimePhase, item.Title);
                    DataTable result_en = Core.GetAllCompanyName_DataTable(url_en, item.TimePhase, item.Title);
                    DataTable dt = S_Core.MergeTableColumn(result_cn, result_en);

                    //添加到dataSet中
                    dt.TableName = item.Title;

                    totalRecord += dt.Rows.Count;

                    ds.Tables.Add(dt);

                    args.Execute(ex =>
                    {
                        sb_loadingState.AppendLine(String.Format("抓取 {0} 结束! ", item.Title));
                        txt_result.Text = sb_loadingState.ToString();
                    });
                });
            }

            //导出 DataSet
            string flag = new ExcelHelper(String.Format("{0}/第{1}期.xls", floder, tab_all.SelectedIndex + 1), "").DataSetToExcel(ds);
            if (flag != "")
            {
                MessageBox.Show("导出失败!" + flag);
            }
            MessageBox.Show(String.Format("共抓取数据: {0} 条!", totalRecord));
        }
Exemplo n.º 6
0
        /// <summary>
        /// 采集并导出的方法[根据类别采集]
        /// </summary>
        public static void Collect2Export()
        {
            foreach (M_Category item in list)
            {
                string url_cn_base;
                string url_en_base;
                //判断是进口或者出口
                if (item.Param.IndexOf("areano") == -1)
                {
                    url_cn_base = Config.BaseURL_CN;
                    url_en_base = Config.BaseURL_EN;
                }
                else
                {
                    url_cn_base = Config.BaseURL_CN_Imp;
                    url_en_base = Config.BaseURL_EN_Imp;
                    item.Title = "进口:" + item.Title;
                }
                //拼接URL路径
                var url_cn = String.Format("{0}?{1}", url_cn_base, item.Param);
                var url_en = String.Format("{0}?{1}", url_en_base, item.Param);


                DataTable result_cn = Core.GetAllCompanyName_DataTable(url_cn, item.TimePhase, item.Title);
                DataTable result_en = Core.GetAllCompanyName_DataTable(url_en, item.TimePhase, item.Title);
                DataTable dt = S_Core.MergeTableColumn(result_cn, result_en);


                //添加到dataSet中
                dt.TableName = String.Format("第{0}期_{1}", item.TimePhase, item.Title);

                string fileName = String.Format("第{0}期_{1}", item.TimePhase, item.Title);
                fileName = fileName.Trim().Replace("进口:", "进口_").Replace(" ", "").Replace("&nbsp;", "").Replace("、", "");

                string flag = new ExcelHelper(String.Format("{0}/{1}.xls", floderPath, fileName), "").DatatableToExcel(dt);

                //获取当前类别数据的总条数
                int categoryDataTotal_cn = Core.GetCategoryDataCount(url_cn, XPath.CATEGORYDATATOTAL);
                int categoryDataTotal_en = Core.GetCategoryDataCount(url_en, XPath.CATEGORYDATATOTAL);

                //记录当前类别的条数
                string log = String.Format("{0}==>cn:{1},en:{2},collect:{3}==>{4}", dt.TableName, categoryDataTotal_cn, categoryDataTotal_en, dt.Rows.Count, categoryDataTotal_cn == dt.Rows.Count);
                Log.Info(log);
                if (categoryDataTotal_cn != dt.Rows.Count)
                {
                    Sb_ErrorCategory = Sb_ErrorCategory.AppendLine(log);
                }

                SingleTotal_site += categoryDataTotal_cn;
                SingleTotal += dt.Rows.Count;
                DS.Tables.Add(dt);

                //记录导出失败的日志
                if (flag != "")
                {
                    Log.Write(flag);
                }
            }
        }