Exemplo n.º 1
0
        /// <summary>
        /// 开始采集
        /// </summary>
        /// <param name="folderid">目录名称</param>
        /// <param name="num">采集数量</param>
        public void Collecting(int folderid, int num, bool bnorepeat)
        {
            if (ShowProGressBar)
            {
                HProgressBar.Start("正在读取列表数据");
            }
            DataTable tb = GetSite(folderid);

            #region 检查数据是否完整
            if (tb == null || tb.Rows.Count < 1)
            {
                if (ShowProGressBar)
                {
                    HProgressBar.Roll("没有找到该站点的相关记录!", 0);
                }
                return;
            }
            DataRow r = tb.Rows[0];
            if (r.IsNull("LinkSetting") || r.IsNull("PageTitleSetting") || r.IsNull("PagebodySetting"))
            {
                if (ShowProGressBar)
                {
                    HProgressBar.Roll("相关的参数没有设置,无法取得新闻列表!", 0);
                }
                return;
            }
            if (bool.Parse(r["SaveRemotePic"].ToString()))
            {
                #region 远程图片
                string rtpath = NetCMS.Config.UIConfig.dirFile;
                if (rtpath == null || rtpath.Trim().Equals(""))
                {
                    if (ShowProGressBar)
                    {
                        HProgressBar.Roll("没有找到管理员附件目录!", 0);
                    }
                    return;
                }
                string dtpath = DateTime.Now.ToString("yyyyMMdd");
                PicSavePath = NetCMS.Common.ServerInfo.GetRootPath().TrimEnd('\\') + @"\" + rtpath + @"\RemoteFiles\" + dtpath;
                if (!Directory.Exists(PicSavePath))
                {
                    Directory.CreateDirectory(PicSavePath);
                }
                PicSaveUrl     = NetCMS.Publish.CommonData.getUrl() + "/" + rtpath + "/RemoteFiles/" + dtpath;
                bSaveRemotePic = true;
                #endregion
            }
            #endregion 检查数据是否完整
            if (ShowProGressBar)
            {
                HProgressBar.Roll("正在获取新闻列表页", 0);
            }

            string sListUrl = r["objURL"].ToString();
            string sEncode  = r["Encode"].ToString();
            bool   bReverse = bool.Parse(r["IsReverse"].ToString());
            string listset  = @"<body[^>]*>(?<list>[\s\S]+?)</body>";
            if (!r.IsNull("ListSetting"))
            {
                listset = r["ListSetting"].ToString();
            }
            PageList PL = new PageList(r["objURL"].ToString(), r["Encode"].ToString());
            PL.RuleOfList = listset;
            PL.RuleOfLink = r["LinkSetting"].ToString();
            string[] NewsUrl = GetNewsList(PL);
            if (NewsUrl == null)
            {
                if (ShowProGressBar)
                {
                    HProgressBar.Roll("没有找到相关新闻链接地址!", 0);
                }
                return;
            }
            int len = NewsUrl.Length;
            if (len < num)
            {
                int      pagetype = int.Parse(r["OtherType"].ToString());
                string[] otherurl = null;
                switch (pagetype)
                {
                case 0:
                    break;

                case 1:    //递归
                    otherurl = PL.Pagination(r["OtherPageSetting"].ToString(), num - len);
                    break;

                case 2:    //其他页
                    otherurl = PL.SinglePagination(r["OtherPageSetting"].ToString(), num - len);
                    break;

                case 3:    //索引页
                    otherurl = PL.IndexPagination(r["OtherPageSetting"].ToString(), int.Parse(r["StartPageNum"].ToString()), int.Parse(r["EndPageNum"].ToString()), num - len);
                    break;

                default:
                    break;
                }
                if (otherurl != null && otherurl.Length > 0)
                {
                    Array.Resize(ref NewsUrl, len + otherurl.Length);
                    otherurl.CopyTo(NewsUrl, len);
                }
            }
            if (NewsUrl.Length < 1)
            {
                if (ShowProGressBar)
                {
                    HProgressBar.Roll("从列表内容中没有找到任何新闻的相关链接!", 0);
                }
                return;
            }
            if (bReverse)
            {
                Array.Reverse(NewsUrl);
            }
            if (ShowProGressBar)
            {
                HProgressBar.Roll("开始采集新闻", 0);
            }
            int nSucceed = 0, nFailed = 0, nRepeat = 0;
            for (int i = 0; i < NewsUrl.Length; i++)
            {
                if (i >= num)
                {
                    break;
                }
                try
                {
                    int flag = CollectPage(NewsUrl[i], r, bnorepeat);
                    if (flag != 1)
                    {
                        nSucceed++;
                        if (flag == -1)
                        {
                            nRepeat++;
                        }
                    }
                    else
                    {
                        nFailed++;
                    }
                }
                catch
                {
                    nFailed++;
                }
                string prompt = "正在采集新闻,终止<a href=\"Collect_List.aspx\">返回</a>.成功:" + nSucceed * 100 / num + "% ";
                if (nRepeat > 0)
                {
                    prompt += "(其中重复:" + nRepeat * 100 / num + "%) ";
                }
                prompt += "失败:" + nFailed * 100 / num + "%";
                if (ShowProGressBar)
                {
                    HProgressBar.Roll(prompt, (i + 1) * 100 / num);
                }
            }
        }