Exemplo n.º 1
0
    //采集
    private bool GetTest(int testType)
    {
        string Pic = "/upload/images_collects/";
        System.Text.Encoding enType = System.Text.Encoding.Default;
        switch (WebEncode)
        {
            case "1":
                enType = System.Text.Encoding.GetEncoding("gb2312");
                break;
            case "2":
                enType = System.Text.Encoding.UTF8;
                break;
            case "3":
                enType = System.Text.Encoding.Unicode;
                break;
        }

        TPortalClass.Collection Cn = new TPortalClass.Collection();

        string testList = Cn.GetHttpPageCode(ListUrl, enType);
        if (testList == "$UrlIsFalse")
        {
            Response.Write("<script>alert('列表地址设置错误');window.location='list_collect.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        if (testList == "$GetFalse")
        {
            Response.Write("<script>alert('无法连接列表页或连接超时');window.location='list_collect.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        testList = Cn.GetBody(testList, ListStart, ListEnd, true, true);
        if (testList == "$StartFalse")
        {
            Response.Write("<script>alert('列表开始标记设置错误,请重新设置');window.location='list_collect.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        if (testList == "$EndFalse")
        {
            Response.Write("<script>alert('列表结束记设置错误,请重新设置');window.location='list_collect.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        if (testType == 0)
        {
            this.ltListTest.Text = testList;
        }
        else
        {
            ArrayList linkArray = Cn.GetLinkArray(testList, LinkStart, LinkEnd);
            if (linkArray.Count == 0)
            {
                Response.Write("<script>alert('未取到链接,请检查链接设置');window.location='list_collect.aspx?action=link&id=" + this.txtId.Text + "'</script");
                return false;
            }
            else
            {
                if (linkArray[0].ToString() == "$StartFalse")
                {
                    Response.Write("<script>alert('链接开始标记设置错误,请重新设置111');window.location='list_collect.aspx?action=link&id=" + this.txtId.Text + "'</script");
                    return false;
                }
                if (linkArray[0].ToString() == "$EndFalse")
                {
                    Response.Write("<script>alert('链接开始标记设置错误,请重新设置');window.location='list_collect.aspx?action=link&id=" + this.txtId.Text + "'</script");
                    return false;
                }
                if (linkArray[0].ToString() == "$NoneLink")
                {
                    Response.Write("<script>alert('未取到链接,请检查链接设置222');window.location='list_collect.aspx?action=link&id=" + this.txtId.Text + "'</script");
                    return false;
                }
                if (IsDesc == 1)
                {
                    linkArray.Reverse();
                }
                if (CollecNum > 0 && linkArray.Count > CollecNum)
                {
                    linkArray.RemoveRange(CollecNum, linkArray.Count - CollecNum);
                }
                string linkStr = string.Empty;

                if (testType == 1)//链接地址
                {
                    for (int i = 0; i < linkArray.Count; i++)
                    {
                        linkStr = Cn.DefiniteUrl(linkArray[i].ToString(), WebUrl);
                        if (linkStr != "$False")
                        {
                            linkStr = "<a href='" + linkStr + "' target=_blank>" + linkStr + "</a><br>";
                            ltLinkTest.Text += linkStr;
                        }
                    }
                }

                if (testType == 2)//测试
                {
                    linkStr = Cn.DefiniteUrl(linkArray[0].ToString(), WebUrl);
                    if (linkStr == "$False")
                    {
                        Response.Write("<script>alert('获取到的链接地址无效,请检查链接设置');window.location='list_collect.aspx?action=link&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    string newsCode = Cn.GetHttpPageCode(linkStr, enType);
                    if (newsCode == "$UrlIsFalse")
                    {
                        Response.Write("<script>alert('获取到的链接地址无效,请检查链接设置');window.location='list_collect.aspx?action=link&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (newsCode == "$GetFalse")
                    {
                        Response.Write("<script>alert('无法连接内容页或连接超时');window.location='list_collect.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    string testTitle = Cn.GetBody(newsCode, TitleStart, TitleEnd, false, false);
                    string testKeyWord = Cn.GetBody(newsCode, KeyWordStart, KeyWordEnd, false, false);
                    string testDateTime = Cn.GetBody(newsCode, DateStart, DateEnd, false, false);
                    string testSource = Cn.GetBody(newsCode, SourceStart, SourceEnd, false, false);
                    //string testDateTime = Cn.GetRegValue(DateRegex, newsCode);
                    //string testSource = Cn.GetRegValue(SourceRegex, newsCode);
                    string testBody = Cn.GetBody(newsCode, ContentStart, ContentEnd, false, false);
                    if (testTitle == "$StartFalse")
                    {
                        Response.Write("<script>alert('标题开始标记设置错误,请重新设置');window.location='list_collect.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (testTitle == "$EndFalse")
                    {
                        Response.Write("<script>alert('标题结束标记设置错误,请重新设置');window.location='list_collect.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (testBody == "$StartFalse")
                    {
                        Response.Write("<script>alert('正文开始标记设置错误,请重新设置');window.location='list_collect.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (testBody == "$EndFalse")
                    {
                        Response.Write("<script>alert('正文结束标记设置错误,请重新设置');window.location='list_collect.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (RemoveTitle != "")
                    {
                        testTitle = testTitle.Replace(RemoveTitle, "");
                    }
                    this.ltTestTitle.Text = testTitle;
                    this.ltKeyWord.Text = testKeyWord;
                    this.ltDateTime.Text = testDateTime;
                    this.ltSource.Text = testSource;

                    //------------------获取详细页内容的下一页开始-------------------
                    string NewsNextUrl = Cn.GetRegValue(NextPageRegex, newsCode);
                    int PageCount = 0;
                    while (NewsNextUrl.Length > 0 && PageCount < 20)
                    {
                        string NewsPaingNextCode = string.Empty;
                        string ContentTemp = string.Empty;
                        NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl, WebUrl);
                        NewsPaingNextCode = Cn.GetHttpPageCode(NewsNextUrl, enType);
                        ContentTemp = Cn.GetBody(NewsPaingNextCode, ContentStart, ContentEnd, false, false);
                        testBody = testBody + "<br>$PageNext$<br>" + ContentTemp;
                        string NewsNextUrl_1 = Cn.GetRegValue(NextPageRegex, NewsPaingNextCode);
                        if (NewsNextUrl_1.Length > 0)
                            NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl_1, WebUrl);
                        else
                            break;
                        PageCount++;
                    }
                    //-----------------------获取详细页内容的下一页结束-------------------

                    //-------过滤正文开始-----------------------
                    if (RemoveBodyStart != "" && RemoveBodyEnd != "")
                    {
                        string[] removeBodyStartArr = RemoveBodyStart.Split('$');
                        string[] removeBodyEndArr = RemoveBodyEnd.Split('$');
                        if (removeBodyStartArr.Length > 1 && removeBodyEndArr.Length > 1)
                        {
                            if (removeBodyStartArr.Length != removeBodyEndArr.Length)
                            {
                                Response.Write("<script>alert('少了$请注意');window.location='list_collect.aspx'</script");
                                return false;
                            }
                            else
                            {
                                for (int i = 0; i < removeBodyStartArr.Length; i++)
                                {
                                    string remove = Cn.GetBody(testBody, removeBodyStartArr[i], removeBodyEndArr[i], true, true);
                                    testBody = testBody.Replace(remove, "");
                                }
                            }
                        }
                        else
                        {
                            string remove = Cn.GetBody(testBody, RemoveBodyStart, RemoveBodyEnd, true, true);
                            testBody = testBody.Replace(remove, "");
                        }
                    }
                    //--------------过滤正文结束---------------------------

                    ArrayList testBodyArray = Cn.ReplaceSaveRemoteFile(testBody, Pic, WebUrl, "0");

                    ltTestContent.Text = testBodyArray[0].ToString();
                    if (testBodyArray.Count == 2)
                    {
                        ltPhotoUrl.Text = testBodyArray[1].ToString();
                    }
                }

                if (testType == 3)//采集
                {
                    int falseNum = 0;
                    int successNum = 0;
                    SetProcessBar("从" + WebUrl + "采集信息", true);
                    for (int i = 0; i < linkArray.Count; i++)
                    {
                        int isImg = 0;
                        string photoUrl = string.Empty;
                        ProcessBar(i, linkArray.Count);
                        linkStr = Cn.DefiniteUrl(linkArray[i].ToString(), WebUrl);
                        if (linkStr == "$False$")
                        {
                            falseNum++;
                            continue;
                        }
                        string newsPageCode = Cn.GetHttpPageCode(linkStr, enType);
                        if (newsPageCode == "$UrlIsFalse" || newsPageCode == "$GetFalse")
                        {
                            falseNum++;
                            continue;
                        }
                        string cTitle = Cn.GetBody(newsPageCode, TitleStart, TitleEnd, false, false);
                        string cKeyWord = Cn.GetBody(newsPageCode, KeyWordStart, KeyWordEnd, false, false);
                        string cDateTime = Cn.GetBody(newsPageCode, DateStart, DateEnd, false, false);
                        string cSource = Cn.GetBody(newsPageCode, SourceStart, SourceEnd, false, false);
                        //string cDateTime = Cn.GetRegValue(DateRegex, newsPageCode);
                        //string cSource = Cn.GetRegValue(SourceRegex, newsPageCode);
                        string cBody = Cn.GetBody(newsPageCode, ContentStart, ContentEnd, false, false);
                        if (cTitle == "$StartFalse" || cBody == "$StartFalse" || cTitle == "$EndFalse" || cBody == "$EndFalse")
                        {
                            falseNum++;
                            continue;
                        }

                        //--------获取详细页内容的下一页开始---------------
                        string NewsNextUrl = Cn.GetRegValue(NextPageRegex, newsPageCode);
                        int PageCount = 0;
                        while (NewsNextUrl.Length > 0 && PageCount < 20)
                        {
                            string NewsPaingNextCode = string.Empty;
                            string ContentTemp = string.Empty;
                            NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl, WebUrl);
                            NewsPaingNextCode = Cn.GetHttpPageCode(NewsNextUrl, enType);
                            ContentTemp = Cn.GetBody(NewsPaingNextCode, ContentStart, ContentEnd, false, false);
                            cBody = cBody + "<br>$PageNext$<br>" + ContentTemp;
                            string NewsNextUrl_1 = Cn.GetRegValue(NextPageRegex, NewsPaingNextCode);
                            if (NewsNextUrl_1.Length > 0)
                                NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl_1, WebUrl);
                            else
                                break;
                            PageCount++;
                        }
                        //---------获取详细页内容的下一页结束--------------
                        //string url_ss = dt.Rows[0]["ListUrl"].ToString();
                        string dq_url = "/"+ListUrl.Substring(20,4);
                        string ls_newurl = WebUrl + dq_url;
                        Response.Write(dq_url + "||");

                        ArrayList bodyArray = Cn.ReplaceSaveRemoteFile(cBody, Pic, ls_newurl, IsSaveImg.ToString());
                        if (bodyArray.Count == 2)
                        {
                            isImg = 1;
                            photoUrl = bodyArray[1].ToString();
                        }
                        cBody = bodyArray[0].ToString();

                        cBody = cBody.Replace("'", "");

                        //-------过滤正文开始-----------------------
                        if (RemoveBodyStart != "" && RemoveBodyEnd != "")
                        {
                            string[] removeBodyStartArr = RemoveBodyStart.Split('$');
                            string[] removeBodyEndArr = RemoveBodyEnd.Split('$');
                            if (removeBodyStartArr.Length > 1 && removeBodyEndArr.Length > 1)
                            {
                                for (int j = 0; j < removeBodyStartArr.Length; j++)
                                {
                                    string remove = Cn.GetBody(cBody, removeBodyStartArr[j], removeBodyEndArr[j], true, true);
                                    cBody = cBody.Replace(remove, "");
                                }
                            }
                            else
                            {
                                string remove = Cn.GetBody(cBody, RemoveBodyStart, RemoveBodyEnd, true, true);
                                cBody = cBody.Replace(remove, "");
                            }
                        }
                        //--------------过滤正文结束---------------------------

                        //---------------过滤开始-----------IFRAME---------
                        if (Script_Iframe == 1)
                            cBody = Cn.ScriptHtml(cBody, "Iframe", 2);
                        if (Script_Object == 1)
                            cBody = Cn.ScriptHtml(cBody, "Object", 2);
                        if (Script_Script == 1)
                            cBody = Cn.ScriptHtml(cBody, "Script", 2);
                        if (Script_Div == 1)
                            cBody = Cn.ScriptHtml(cBody, "Div", 3);
                        if (Script_Table == 1)
                            cBody = Cn.ScriptHtml(cBody, "Table", 2);
                        if (Script_Span == 1)
                            cBody = Cn.ScriptHtml(cBody, "Span", 3);
                        if (Script_Img == 1)
                            cBody = Cn.ScriptHtml(cBody, "Img", 3);
                        if (Script_Font == 1)
                            cBody = Cn.ScriptHtml(cBody, "Font", 3);
                        if (Script_A == 1)
                            cBody = Cn.ScriptHtml(cBody, "A", 3);
                        if (Script_Html == 1)
                            cBody = Cn.HtmlScript(cBody);
                        //-------------------过滤结束-------------------------------
                        //采集的数据保存至JpArticle

                        //关键字
                        string ls_str = cKeyWord.Trim();
                        string ls_date = System.DateTime.Now.ToString();
                        string ls_ly = "本网站";
                        if (cDateTime != "")
                        {
                            ls_date = cDateTime;
                        }
                        //if (cSource != "")
                        //{
                        //    ls_ly = cSource;
                        //}
                        if (RemoveTitle != "")
                        {
                            cTitle = cTitle.Replace(RemoveTitle, "");
                        }

                        Int32 ld_aid = -1;
                        TPortalClass.JpArticle art = new TPortalClass.JpArticle(ld_aid);
                        art.cid = ClassId.ToString();  //栏目
                        art.title = cTitle;  //标题
                        art.subhead = "";    //小标题
                        art.ht_content = cBody;  //正文
                        art.summary = "";      //摘要
                        art.author = "系统管理员";
                        art.status = "发布";
                        art.crtime = ls_date;
                        art.modtime = ls_date;
                        art.pubtime = ls_date;
                        art.endtime = System.DateTime.Now.AddYears(20).ToString();
                        art.hits = 1;     //点击率
                        art.ifindexdisplay ="0";   //是否首页显示
                        art.indexdisplaypicpath = "0";//首页显示图片上传路径

                        art.seotitle = "";
                        art.seokeywords = "";
                        art.seodescription = "";

                        art.asort = 50 + successNum;    //排序号
                        art.iftop = "0";   //是否置顶
                        art.topcreatetime = ls_date;   //置顶时间
                        art.topendtime = ls_date;      //置顶结束时间
                        art.ifcomment = "0";    //是否启用评论
                        art.source = ls_ly;        //来源
                        art.role_userid = "";
                        art.role_username = "";
                        art.shtml_created = 0;  //是否已生成静态页面
                        art.GUID = System.Guid.NewGuid().ToString();  //随机数
                        art.map_aid = 0;
                        art.input = "admin";
                        art.targettype = "";
                        //art.hasimg = 0;
                        //art.set("artcontent", cBody);
                        //是否包含图片
                        //String artcontent = "";
                        //artcontent =
                        //String imgsrc = TPortalClass.JpRegx.parseIMG(artcontent);

                        //if (imgsrc.ToLower().IndexOf("<img") >= 0)
                        //    art.hasimg = 1;
                        //else
                        //    art.hasimg = 0;

                        art.Insert();

                        successNum++;

                    }
                    Response.Write("<script>alert('采集完成,成功 " + successNum + "条 ,失败 " + falseNum + "');window.location='list_collect.aspx'</script");
                }
            }
        }
        return true;
    }
Exemplo n.º 2
0
    //采集
    private bool GetTest(int testType)
    {
        string Pic = "/upload/images_collects/";
        System.Text.Encoding enType = System.Text.Encoding.Default;
        switch (WebEncode)
        {
            case "1":
                enType = System.Text.Encoding.GetEncoding("gb2312");
                break;
            case "2":
                enType = System.Text.Encoding.UTF8;
                break;
            case "3":
                enType = System.Text.Encoding.Unicode;
                break;
        }

        TPortalClass.Collection Cn = new TPortalClass.Collection();

        string testList = Cn.GetHttpPageCode(ListUrl, enType);
        if (testList == "$UrlIsFalse")
        {
            Response.Write("<script>alert('列表地址设置错误');window.location='list_collect_box.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        if (testList == "$GetFalse")
        {
            Response.Write("<script>alert('无法连接列表页或连接超时');window.location='list_collect_box.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        testList = Cn.GetBody(testList, ListStart, ListEnd, true, true);
        if (testList == "$StartFalse")
        {
            Response.Write("<script>alert('列表开始标记设置错误,请重新设置');window.location='list_collect_box.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        if (testList == "$EndFalse")
        {
            Response.Write("<script>alert('列表结束记设置错误,请重新设置');window.location='list_collect_box.aspx?action=list&id=" + this.txtId.Text + "'</script");
            return false;
        }
        if (testType == 0)
        {
            this.ltListTest.Text = testList;
        }
        else
        {
            ArrayList linkArray = Cn.GetLinkArray(testList, LinkStart, LinkEnd);
            if (linkArray.Count == 0)
            {
                Response.Write("<script>alert('未取到链接,请检查链接设置');window.location='list_collect_box.aspx?action=link&id=" + this.txtId.Text + "'</script");
                return false;
            }
            else
            {
                if (linkArray[0].ToString() == "$StartFalse")
                {
                    Response.Write("<script>alert('链接开始标记设置错误,请重新设置111');window.location='list_collect_box.aspx?action=link&id=" + this.txtId.Text + "'</script");
                    return false;
                }
                if (linkArray[0].ToString() == "$EndFalse")
                {
                    Response.Write("<script>alert('链接开始标记设置错误,请重新设置');window.location='list_collect_box.aspx?action=link&id=" + this.txtId.Text + "'</script");
                    return false;
                }
                if (linkArray[0].ToString() == "$NoneLink")
                {
                    Response.Write("<script>alert('未取到链接,请检查链接设置222');window.location='list_collect_box.aspx?action=link&id=" + this.txtId.Text + "'</script");
                    return false;
                }
                if (IsDesc == 1)
                {
                    linkArray.Reverse();
                }
                if (CollecNum > 0 && linkArray.Count > CollecNum)
                {
                    linkArray.RemoveRange(CollecNum, linkArray.Count - CollecNum);
                }
                string linkStr = string.Empty;

                if (testType == 1)//链接地址
                {
                    for (int i = 0; i < linkArray.Count; i++)
                    {
                        linkStr = Cn.DefiniteUrl(linkArray[i].ToString(), WebUrl);
                        if (linkStr != "$False")
                        {
                            linkStr = "<a href='" + linkStr + "' target=_blank>" + linkStr + "</a><br>";
                            ltLinkTest.Text += linkStr;
                        }
                    }
                }

                if (testType == 2)//测试
                {
                    linkStr = Cn.DefiniteUrl(linkArray[0].ToString(), WebUrl);
                    if (linkStr == "$False")
                    {
                        Response.Write("<script>alert('获取到的链接地址无效,请检查链接设置');window.location='list_collect_box.aspx?action=link&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    string newsCode = Cn.GetHttpPageCode(linkStr, enType);
                    if (newsCode == "$UrlIsFalse")
                    {
                        Response.Write("<script>alert('获取到的链接地址无效,请检查链接设置');window.location='list_collect_box.aspx?action=link&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (newsCode == "$GetFalse")
                    {
                        Response.Write("<script>alert('无法连接内容页或连接超时');window.location='list_collect_box.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    string testTitle = Cn.GetBody(newsCode, TitleStart, TitleEnd, false, false);
                    string testKeyWord = Cn.GetBody(newsCode, KeyWordStart, KeyWordEnd, false, false);
                    string testDateTime = Cn.GetBody(newsCode, DateStart, DateEnd, false, false);
                    string testSource = Cn.GetBody(newsCode, SourceStart, SourceEnd, false, false);
                    //string testDateTime = Cn.GetRegValue(DateRegex, newsCode);
                    //string testSource = Cn.GetRegValue(SourceRegex, newsCode);
                    string testBody = Cn.GetBody(newsCode, ContentStart, ContentEnd, false, false);
                    if (testTitle == "$StartFalse")
                    {
                        Response.Write("<script>alert('标题开始标记设置错误,请重新设置');window.location='list_collect_box.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (testTitle == "$EndFalse")
                    {
                        Response.Write("<script>alert('标题结束标记设置错误,请重新设置');window.location='list_collect_box.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (testBody == "$StartFalse")
                    {
                        Response.Write("<script>alert('正文开始标记设置错误,请重新设置');window.location='list_collect_box.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (testBody == "$EndFalse")
                    {
                        Response.Write("<script>alert('正文结束标记设置错误,请重新设置');window.location='list_collect_box.aspx?action=content&id=" + this.txtId.Text + "'</script");
                        return false;
                    }
                    if (RemoveTitle != "")
                    {
                        testTitle = testTitle.Replace(RemoveTitle, "");
                    }
                    this.ltTestTitle.Text = testTitle;
                    this.ltKeyWord.Text = testKeyWord;
                    this.ltDateTime.Text = testDateTime;
                    this.ltSource.Text = testSource;

                    //------------------获取详细页内容的下一页开始-------------------
                    string NewsNextUrl = Cn.GetRegValue(NextPageRegex, newsCode);
                    int PageCount = 0;
                    while (NewsNextUrl.Length > 0 && PageCount < 20)
                    {
                        string NewsPaingNextCode = string.Empty;
                        string ContentTemp = string.Empty;
                        NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl, WebUrl);
                        NewsPaingNextCode = Cn.GetHttpPageCode(NewsNextUrl, enType);
                        ContentTemp = Cn.GetBody(NewsPaingNextCode, ContentStart, ContentEnd, false, false);
                        testBody = testBody + "<br>$PageNext$<br>" + ContentTemp;
                        string NewsNextUrl_1 = Cn.GetRegValue(NextPageRegex, NewsPaingNextCode);
                        if (NewsNextUrl_1.Length > 0)
                            NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl_1, WebUrl);
                        else
                            break;
                        PageCount++;
                    }
                    //-----------------------获取详细页内容的下一页结束-------------------

                    //-------过滤正文开始-----------------------
                    if (RemoveBodyStart != "" && RemoveBodyEnd != "")
                    {
                        string[] removeBodyStartArr = RemoveBodyStart.Split('$');
                        string[] removeBodyEndArr = RemoveBodyEnd.Split('$');
                        if (removeBodyStartArr.Length > 1 && removeBodyEndArr.Length > 1)
                        {
                            if (removeBodyStartArr.Length != removeBodyEndArr.Length)
                            {
                                Response.Write("<script>alert('少了$请注意');window.location='list_collect_box.aspx'</script");
                                return false;
                            }
                            else
                            {
                                for (int i = 0; i < removeBodyStartArr.Length; i++)
                                {
                                    string remove = Cn.GetBody(testBody, removeBodyStartArr[i], removeBodyEndArr[i], true, true);
                                    testBody = testBody.Replace(remove, "");
                                }
                            }
                        }
                        else
                        {
                            string remove = Cn.GetBody(testBody, RemoveBodyStart, RemoveBodyEnd, true, true);
                            testBody = testBody.Replace(remove, "");
                        }
                    }
                    //--------------过滤正文结束---------------------------

                    ArrayList testBodyArray = Cn.ReplaceSaveRemoteFile(testBody, Pic, WebUrl, "0");

                    ltTestContent.Text = testBodyArray[0].ToString();
                    if (testBodyArray.Count == 2)
                    {
                        ltPhotoUrl.Text = testBodyArray[1].ToString();
                    }
                }

                if (testType == 3)//采集
                {
                    int falseNum = 0;
                    int successNum = 0;
                    SetProcessBar("从" + WebUrl + "采集信息", true);
                    for (int i = 0; i < linkArray.Count; i++)
                    {
                        int isImg = 0;
                        string photoUrl = string.Empty;
                        ProcessBar(i, linkArray.Count);
                        linkStr = Cn.DefiniteUrl(linkArray[i].ToString(), WebUrl);
                        if (linkStr == "$False$")
                        {
                            falseNum++;
                            continue;
                        }
                        string newsPageCode = Cn.GetHttpPageCode(linkStr, enType);
                        if (newsPageCode == "$UrlIsFalse" || newsPageCode == "$GetFalse")
                        {
                            falseNum++;
                            continue;
                        }
                        string cTitle = Cn.GetBody(newsPageCode, TitleStart, TitleEnd, false, false);          //标题
                        string cKeyWord = Cn.GetBody(newsPageCode, KeyWordStart, KeyWordEnd, false, false);   //摘要
                        string cDateTime = Cn.GetBody(newsPageCode, DateStart, DateEnd, false, false);        //发布日期
                        string cSource = Cn.GetBody(newsPageCode, SourceStart, SourceEnd, false, false);    //来源
                        //string cDateTime = Cn.GetRegValue(DateRegex, newsPageCode);
                        //string cSource = Cn.GetRegValue(SourceRegex, newsPageCode);
                        string cBody = Cn.GetBody(newsPageCode, ContentStart, ContentEnd, false, false);
                        if (cTitle == "$StartFalse" || cBody == "$StartFalse" || cTitle == "$EndFalse" || cBody == "$EndFalse")
                        {
                            falseNum++;
                            continue;
                        }

                        //--------获取详细页内容的下一页开始---------------
                        string NewsNextUrl = Cn.GetRegValue(NextPageRegex, newsPageCode);
                        int PageCount = 0;
                        while (NewsNextUrl.Length > 0 && PageCount < 20)
                        {
                            string NewsPaingNextCode = string.Empty;
                            string ContentTemp = string.Empty;
                            NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl, WebUrl);
                            NewsPaingNextCode = Cn.GetHttpPageCode(NewsNextUrl, enType);
                            ContentTemp = Cn.GetBody(NewsPaingNextCode, ContentStart, ContentEnd, false, false);
                            cBody = cBody + "<br>$PageNext$<br>" + ContentTemp;
                            string NewsNextUrl_1 = Cn.GetRegValue(NextPageRegex, NewsPaingNextCode);
                            if (NewsNextUrl_1.Length > 0)
                                NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl_1, WebUrl);
                            else
                                break;
                            PageCount++;
                        }
                        //---------获取详细页内容的下一页结束--------------
                        //string url_ss = dt.Rows[0]["ListUrl"].ToString();
                        //string dq_url = "/"+ListUrl.Substring(20,4);
                        //string ls_newurl = WebUrl + dq_url;
                        //Response.Write(dq_url + "||");

                        ArrayList bodyArray = Cn.ReplaceSaveRemoteFile(cBody, Pic, WebUrl, IsSaveImg.ToString());
                        if (bodyArray.Count == 2)
                        {
                            isImg = 1;
                            photoUrl = bodyArray[1].ToString();
                        }
                        cBody = bodyArray[0].ToString();

                        cBody = cBody.Replace("'", "");

                        //-------过滤正文开始-----------------------
                        if (RemoveBodyStart != "" && RemoveBodyEnd != "")
                        {
                            string[] removeBodyStartArr = RemoveBodyStart.Split('$');
                            string[] removeBodyEndArr = RemoveBodyEnd.Split('$');
                            if (removeBodyStartArr.Length > 1 && removeBodyEndArr.Length > 1)
                            {
                                for (int j = 0; j < removeBodyStartArr.Length; j++)
                                {
                                    string remove = Cn.GetBody(cBody, removeBodyStartArr[j], removeBodyEndArr[j], true, true);
                                    cBody = cBody.Replace(remove, "");
                                }
                            }
                            else
                            {
                                string remove = Cn.GetBody(cBody, RemoveBodyStart, RemoveBodyEnd, true, true);
                                cBody = cBody.Replace(remove, "");
                            }
                        }
                        //--------------过滤正文结束---------------------------

                        //---------------过滤开始-----------IFRAME---------
                        if (Script_Iframe == 1)
                            cBody = Cn.ScriptHtml(cBody, "Iframe", 2);
                        if (Script_Object == 1)
                            cBody = Cn.ScriptHtml(cBody, "Object", 2);
                        if (Script_Script == 1)
                            cBody = Cn.ScriptHtml(cBody, "Script", 2);
                        if (Script_Div == 1)
                            cBody = Cn.ScriptHtml(cBody, "Div", 3);
                        if (Script_Table == 1)
                            cBody = Cn.ScriptHtml(cBody, "Table", 2);
                        if (Script_Span == 1)
                            cBody = Cn.ScriptHtml(cBody, "Span", 3);
                        if (Script_Img == 1)
                            cBody = Cn.ScriptHtml(cBody, "Img", 3);
                        if (Script_Font == 1)
                            cBody = Cn.ScriptHtml(cBody, "Font", 3);
                        if (Script_A == 1)
                            cBody = Cn.ScriptHtml(cBody, "A", 3);
                        if (Script_Html == 1)
                            cBody = Cn.HtmlScript(cBody);
                        //-------------------过滤结束-------------------------------
                        //采集的数据保存至JpArticle

                        //关键字
                        string ls_str = cKeyWord.Trim();
                        string ls_date = System.DateTime.Now.ToString();
                        string ls_ly = "本网站";
                        if (cDateTime != "")
                        {
                            if (cDateTime.Length > 19)
                            {
                                ls_date = DateTime.Parse(cDateTime.Substring(0, 19)).ToString("yyyy-MM-dd HH:mm:ss");
                            }
                            //ls_date = cDateTime;
                        }

                        string ls_xm = "", ls_tel = "", ls_sfz = "", ls_addtime = System.DateTime.Now.ToString(), ls_dept = "", ls_mail = "", ls_type = "";
                        if (cSource != "")
                        {
                            string[] lv_sourse = cSource.Split('|');
                            ls_xm = lv_sourse[0];
                            ls_dept = lv_sourse[1];
                            //if (lv_sourse[1] == "01")
                            //{
                            //    ls_dept = "办公室";
                            //}
                            //else if (lv_sourse[1] == "02")
                            //{
                            //    ls_dept = "养老保险处";
                            //}
                            //else if (lv_sourse[1] == "03")
                            //{
                            //    ls_dept = "养老保险综合管理中心";
                            //}
                            //else if (lv_sourse[1] == "04")
                            //{
                            //    ls_dept = "医疗工伤处";
                            //}
                            //else if (lv_sourse[1] == "05")
                            //{
                            //    ls_dept = "医疗工伤保险管理中心";
                            //}
                            //else if (lv_sourse[1] == "06")
                            //{
                            //    ls_dept = "计算机信息中心";
                            //}
                            //else if (lv_sourse[1] == "07")
                            //{
                            //    ls_dept = "社会保险稽核处";
                            //}
                            //else if (lv_sourse[1] == "08")
                            //{
                            //    ls_dept = "计划财务处";
                            //}
                            //else if (lv_sourse[1] == "09")
                            //{
                            //    ls_dept = "离(退)休人员管理处";
                            //}
                            //else if (lv_sourse[1] == "10")
                            //{
                            //    ls_dept = "新城管理中心";
                            //}
                            //else if (lv_sourse[1] == "11")
                            //{
                            //    ls_dept = "定海社保";
                            //}
                            //else if (lv_sourse[1] == "12")
                            //{
                            //    ls_dept = "普陀社保";
                            //}
                            //else if (lv_sourse[1] == "13")
                            //{
                            //    ls_dept = "岱山社保";
                            //}
                            //else if (lv_sourse[1] == "14")
                            //{
                            //    ls_dept = "嵊泗社保";
                            //}
                            //else
                            //{
                            //    ls_dept = "";
                            //}
                            if (lv_sourse[2].Length > 19)
                            {
                                ls_addtime = DateTime.Parse(lv_sourse[2].Substring(0, 19)).ToString("yyyy-MM-dd HH:mm:ss");
                            }
                            ls_tel = lv_sourse[3];
                            ls_mail = lv_sourse[4];
                            ls_sfz = lv_sourse[5];
                            if (lv_sourse[6] == "1")
                            {
                                ls_type = "局长信箱";
                            }
                            if (lv_sourse[6] == "3")
                            {
                                ls_type = "投诉咨询";
                            }
                        }
                        if (RemoveTitle != "")
                        {
                            cTitle = cTitle.Replace(RemoveTitle, "");
                        }
                        //Response.Write(ls_date);
                        TPortalClass.JpBox JpBox = new TPortalClass.JpBox();

                        JpBox.lyusername = ls_xm;
                        JpBox.tel = ls_tel;
                        JpBox.mail = ls_mail;
                        JpBox.content = cBody;  //正文2
                        JpBox.addtime = ls_addtime;
                        JpBox.ip = "127.0.0.1";
                        string ls_cxm = System.Guid.NewGuid().ToString();
                        JpBox.cxm = ls_cxm;
                        JpBox.yb = ls_sfz;   //sfz
                        JpBox.nrzt = cTitle;  //标题1
                        JpBox.jtzz = "公开";   //是否公开

                        JpBox.type = ls_type;

                        JpBox.ifhf = "1";   //是否回复 默认为未回复 0
                        JpBox.ifshow = "1";   //是否前台显示 默认为未前台显示 0
                        JpBox.hfsj = ls_date;  //回复时间
                        JpBox.hfcontent = ls_str;  //回复内容
                        JpBox.by4 = ls_dept;
                        JpBox.InsertLS();

                        successNum++;

                    }
                    Response.Write("<script>alert('采集完成,成功 " + successNum + "条 ,失败 " + falseNum + "');window.location='list_collect_box.aspx'</script");
                }
            }
        }
        return true;
    }