/// <summary> /// 开始采集 /// </summary> /// <param name="folderid">目录名称</param> /// <param name="num">采集数量</param> public void Collecting(int folderid, int num, bool bnorepeat) { if (ShowProGressBar) { HProgressBar.Start("正在读取列表数据"); } DataTable tb = GetSite(folderid); #region 检查数据是否完整 if (tb == null || tb.Rows.Count < 1) { if (ShowProGressBar) { HProgressBar.Roll("没有找到该站点的相关记录!", 0); } return; } DataRow r = tb.Rows[0]; if (r.IsNull("LinkSetting") || r.IsNull("PageTitleSetting") || r.IsNull("PagebodySetting")) { if (ShowProGressBar) { HProgressBar.Roll("相关的参数没有设置,无法取得新闻列表!", 0); } return; } if (bool.Parse(r["SaveRemotePic"].ToString())) { #region 远程图片 string rtpath = NetCMS.Config.UIConfig.dirFile; if (rtpath == null || rtpath.Trim().Equals("")) { if (ShowProGressBar) { HProgressBar.Roll("没有找到管理员附件目录!", 0); } return; } string dtpath = DateTime.Now.ToString("yyyyMMdd"); PicSavePath = NetCMS.Common.ServerInfo.GetRootPath().TrimEnd('\\') + @"\" + rtpath + @"\RemoteFiles\" + dtpath; if (!Directory.Exists(PicSavePath)) { Directory.CreateDirectory(PicSavePath); } PicSaveUrl = NetCMS.Publish.CommonData.getUrl() + "/" + rtpath + "/RemoteFiles/" + dtpath; bSaveRemotePic = true; #endregion } #endregion 检查数据是否完整 if (ShowProGressBar) { HProgressBar.Roll("正在获取新闻列表页", 0); } string sListUrl = r["objURL"].ToString(); string sEncode = r["Encode"].ToString(); bool bReverse = bool.Parse(r["IsReverse"].ToString()); string listset = @"<body[^>]*>(?<list>[\s\S]+?)</body>"; if (!r.IsNull("ListSetting")) { listset = r["ListSetting"].ToString(); } PageList PL = new PageList(r["objURL"].ToString(), r["Encode"].ToString()); PL.RuleOfList = listset; PL.RuleOfLink = r["LinkSetting"].ToString(); string[] NewsUrl = GetNewsList(PL); if (NewsUrl == null) { if (ShowProGressBar) { HProgressBar.Roll("没有找到相关新闻链接地址!", 0); } return; } int len = NewsUrl.Length; if (len < num) { int pagetype = int.Parse(r["OtherType"].ToString()); string[] otherurl = null; switch (pagetype) { case 0: break; case 1: //递归 otherurl = PL.Pagination(r["OtherPageSetting"].ToString(), num - len); break; case 2: //其他页 otherurl = PL.SinglePagination(r["OtherPageSetting"].ToString(), num - len); break; case 3: //索引页 otherurl = PL.IndexPagination(r["OtherPageSetting"].ToString(), int.Parse(r["StartPageNum"].ToString()), int.Parse(r["EndPageNum"].ToString()), num - len); break; default: break; } if (otherurl != null && otherurl.Length > 0) { Array.Resize(ref NewsUrl, len + otherurl.Length); otherurl.CopyTo(NewsUrl, len); } } if (NewsUrl.Length < 1) { if (ShowProGressBar) { HProgressBar.Roll("从列表内容中没有找到任何新闻的相关链接!", 0); } return; } if (bReverse) { Array.Reverse(NewsUrl); } if (ShowProGressBar) { HProgressBar.Roll("开始采集新闻", 0); } int nSucceed = 0, nFailed = 0, nRepeat = 0; for (int i = 0; i < NewsUrl.Length; i++) { if (i >= num) { break; } try { int flag = CollectPage(NewsUrl[i], r, bnorepeat); if (flag != 1) { nSucceed++; if (flag == -1) { nRepeat++; } } else { nFailed++; } } catch { nFailed++; } string prompt = "正在采集新闻,终止<a href=\"Collect_List.aspx\">返回</a>.成功:" + nSucceed * 100 / num + "% "; if (nRepeat > 0) { prompt += "(其中重复:" + nRepeat * 100 / num + "%) "; } prompt += "失败:" + nFailed * 100 / num + "%"; if (ShowProGressBar) { HProgressBar.Roll(prompt, (i + 1) * 100 / num); } } }