/// <summary> /// 采集并导出的方法[根据类别采集] /// </summary> public static void Collect2Export() { CurrentIndex = Start; var baseUrl = "http://www.e-cantonfair.com/china-supplier/"; //采集数据 DataTable dt = Core.GetEmptyDataTable(); for (int i = Start; i <= End; i++) { var url = baseUrl + String.Format("contact-us-{0}.html", i); Data data = Core.GetDataByUrl(url); Core.Add2Dt(dt, data); CurrentIndex = i; } //添加到dataSet中 string fileName = String.Format("{0}-{1}采集结果", Start, End); dt.TableName = fileName; CollectDt = dt; Hashtable ht = GetHashtable(); string flag = new ExcelHelper(String.Format("{0}/{1}.xls", floderPath, fileName), "").DatatableToExcel(dt,ht); //Log.Info(log); //记录导出失败的日志 if (flag != "") { Log.Write(flag); } }
public void run() { //区分进出口 string url_cn_base ; string url_en_base ; if (this.Category.Param.IndexOf("areano") == -1) { url_cn_base = Config.BaseURL_CN; url_en_base = Config.BaseURL_EN; } else { url_cn_base = Config.BaseURL_CN_Imp; url_en_base = Config.BaseURL_EN_Imp; this.Category.Title = "进口:" + this.Category.Title; } var url_cn = String.Format("{0}?{1}", url_cn_base, this.Category.Param); var url_en = String.Format("{0}?{1}", url_en_base, this.Category.Param); DataTable result_cn = Core.GetAllCompanyName_DataTable(url_cn,this.Category.TimePhase,this.Category.Title); DataTable result_en = Core.GetAllCompanyName_DataTable(url_en, this.Category.TimePhase, this.Category.Title); DataTable dt = S_Core.MergeTableColumn(result_cn, result_en); //添加到dataSet中 dt.TableName = String.Format("第{0}期_{1}", this.Category.TimePhase, this.Category.Title); string fileName = String.Format("第{0}期_{1}", this.Category.TimePhase, this.Category.Title); fileName = fileName.Trim().Replace("进口:", "进口_").Replace(" ", "").Replace(" ", "").Replace("、", ""); string flag = new ExcelHelper(String.Format("{0}/{1}.xls", FolderPath, fileName), "").DatatableToExcel(dt); if (flag != "") { Log.Write(flag); } //保存到全局变量中 S_CollectThread.CompleteList.Add(this.Category.Title); S_CollectThread.DS.Tables.Add(dt); S_CollectThread.Total += dt.Rows.Count; }
/// <summary> /// 定时器,判断是否采集结束[多线程,Timer判断] /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void timer1_Tick(object sender, EventArgs e) { if (S_CollectThread.DS.Tables.Count == S_CollectThread.DataTableCount) { SetButtonEnable(true); sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别,采集结束!", S_CollectThread.DS.Tables.Count, S_CollectThread.DataTableCount, DateTime.Now.ToString("HH:mm:ss"))); txt_result.Text = sb_loadingState.ToString(); DataTable dt = DataHandler(S_CollectThread.DS); string flag = new ExcelHelper(String.Format("{0}/本次采集数据汇总表.xls", defaultfilePath), "").DatatableToExcel(dt); if (flag != "") { MessageBox.Show("导出失败!" + flag); } MessageBox.Show(String.Format("共抓取数据: {0} 条!", dt.Rows.Count)); //清空表格集合 S_CollectThread.DS = new DataSet(); } else { sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别", S_CollectThread.DS.Tables.Count, S_CollectThread.DataTableCount, DateTime.Now.ToString("HH:mm:ss"))); if (sb_loadingState.Length > 1024) sb_loadingState = new StringBuilder(); txt_result.Text = sb_loadingState.ToString(); } }
/// <summary> /// 判断单线程采集是否结束 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void timer2_Tick(object sender, EventArgs e) { if (S_SingleThreadConfig.DS.Tables.Count == S_SingleThreadConfig.DataTableCount) { SetButtonEnable(true,true); sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别,采集结束!", S_SingleThreadConfig.DS.Tables.Count, S_SingleThreadConfig.DataTableCount, DateTime.Now.ToString("HH:mm:ss"))); txt_result.Text = sb_loadingState.ToString(); DataTable dt = DataHandler(S_SingleThreadConfig.DS); string flag = new ExcelHelper(String.Format("{0}/本次采集数据汇总表.xls", defaultfilePath), "").DatatableToExcel(dt); if (flag != "") { MessageBox.Show("导出失败!" + flag); } string msg = "采集成功:采集回来的数据与网站上每个类别显示的总条数一致!"; if (S_SingleThreadConfig.Sb_ErrorCategory.ToString().Length != 0) { S_SingleThreadConfig.Sb_ErrorCategory = S_SingleThreadConfig.Sb_ErrorCategory.AppendLine("请查看有问题的类别,尾页的条数是否为总数,个位数上的值。(每页显示10条)"); msg = S_SingleThreadConfig.Sb_ErrorCategory.ToString(); } txt_result.Text = msg; MessageBox.Show(String.Format("网页上数据共有:{0}条,实际抓取数据: {1} 条!",S_SingleThreadConfig.SingleTotal_site, dt.Rows.Count)); //清空表格集合 S_SingleThreadConfig.ReSet(); } else { sb_loadingState.AppendLine(String.Format("Time:{2}==>共采集{1}个类别,已经采集 {0} 个类别", S_SingleThreadConfig.DS.Tables.Count, S_SingleThreadConfig.DataTableCount, DateTime.Now.ToString("HH:mm:ss"))); if (sb_loadingState.Length > 1024) sb_loadingState = new StringBuilder(); txt_result.Text = sb_loadingState.ToString(); } }
/* /// <summary> /// 采集并导出[点击事件] /// </summary> /// <param name="sender"></param> /// <param name="caie"></param> private void btn_collect_export_Click(object sender, EventArgs e) { FolderBrowserDialog fbd = new FolderBrowserDialog(); //导出路径,使用上次选中的值 if (defaultfilePath != "") { fbd.SelectedPath = defaultfilePath; } //开始抓取 if (fbd.ShowDialog() == DialogResult.OK) { defaultfilePath = fbd.SelectedPath; CollectAndExport(fbd.SelectedPath); } } */ /// <summary> /// 采集并导出[方法]【用异步来判断是否正在加载】 /// </summary> /// <param name="floder">导出的文件夹位置</param> private void CollectAndExport(string floder) { DataSet ds = new DataSet(); //当前选中的期数(第一期、第二期、第三期)的类别 List<M_Category> list = GetCategory(tab_all.SelectedIndex); //遍历抓取数据 foreach (M_Category item in list) { //区分进出口 string url_cn_base = item.Param.IndexOf("areano") == -1 ? Config.BaseURL_CN : Config.BaseURL_CN_Imp; string url_en_base = item.Param.IndexOf("areano") == -1 ? Config.BaseURL_EN : Config.BaseURL_EN_Imp; var url_cn = String.Format("{0}?{1}", url_cn_base, item.Param); var url_en = String.Format("{0}?{1}", url_en_base, item.Param); LoadingHandler.Show(this, LoadingStyle.None, args => { args.Execute(ex => { sb_loadingState.AppendLine(String.Format("正在抓取 {0} ... ", item.Title)); txt_result.Text = sb_loadingState.ToString(); }); DataTable result_cn = Core.GetAllCompanyName_DataTable(url_cn, item.TimePhase, item.Title); DataTable result_en = Core.GetAllCompanyName_DataTable(url_en, item.TimePhase, item.Title); DataTable dt = S_Core.MergeTableColumn(result_cn, result_en); //添加到dataSet中 dt.TableName = item.Title; totalRecord += dt.Rows.Count; ds.Tables.Add(dt); args.Execute(ex => { sb_loadingState.AppendLine(String.Format("抓取 {0} 结束! ", item.Title)); txt_result.Text = sb_loadingState.ToString(); }); }); } //导出 DataSet string flag = new ExcelHelper(String.Format("{0}/第{1}期.xls", floder, tab_all.SelectedIndex + 1), "").DataSetToExcel(ds); if (flag != "") { MessageBox.Show("导出失败!" + flag); } MessageBox.Show(String.Format("共抓取数据: {0} 条!", totalRecord)); }
/// <summary> /// 采集并导出的方法[根据类别采集] /// </summary> public static void Collect2Export() { foreach (M_Category item in list) { string url_cn_base; string url_en_base; //判断是进口或者出口 if (item.Param.IndexOf("areano") == -1) { url_cn_base = Config.BaseURL_CN; url_en_base = Config.BaseURL_EN; } else { url_cn_base = Config.BaseURL_CN_Imp; url_en_base = Config.BaseURL_EN_Imp; item.Title = "进口:" + item.Title; } //拼接URL路径 var url_cn = String.Format("{0}?{1}", url_cn_base, item.Param); var url_en = String.Format("{0}?{1}", url_en_base, item.Param); DataTable result_cn = Core.GetAllCompanyName_DataTable(url_cn, item.TimePhase, item.Title); DataTable result_en = Core.GetAllCompanyName_DataTable(url_en, item.TimePhase, item.Title); DataTable dt = S_Core.MergeTableColumn(result_cn, result_en); //添加到dataSet中 dt.TableName = String.Format("第{0}期_{1}", item.TimePhase, item.Title); string fileName = String.Format("第{0}期_{1}", item.TimePhase, item.Title); fileName = fileName.Trim().Replace("进口:", "进口_").Replace(" ", "").Replace(" ", "").Replace("、", ""); string flag = new ExcelHelper(String.Format("{0}/{1}.xls", floderPath, fileName), "").DatatableToExcel(dt); //获取当前类别数据的总条数 int categoryDataTotal_cn = Core.GetCategoryDataCount(url_cn, XPath.CATEGORYDATATOTAL); int categoryDataTotal_en = Core.GetCategoryDataCount(url_en, XPath.CATEGORYDATATOTAL); //记录当前类别的条数 string log = String.Format("{0}==>cn:{1},en:{2},collect:{3}==>{4}", dt.TableName, categoryDataTotal_cn, categoryDataTotal_en, dt.Rows.Count, categoryDataTotal_cn == dt.Rows.Count); Log.Info(log); if (categoryDataTotal_cn != dt.Rows.Count) { Sb_ErrorCategory = Sb_ErrorCategory.AppendLine(log); } SingleTotal_site += categoryDataTotal_cn; SingleTotal += dt.Rows.Count; DS.Tables.Add(dt); //记录导出失败的日志 if (flag != "") { Log.Write(flag); } } }