/// <summary> /// 列表设置 /// </summary> public void UpdateListSet(BCW.Model.Collec.CollecItem model) { StringBuilder strSql = new StringBuilder(); strSql.Append("update tb_CollecItem set "); strSql.Append("ListUrl=@ListUrl,"); strSql.Append("ListStart=@ListStart,"); strSql.Append("ListEnd=@ListEnd,"); strSql.Append("NextListRegex=@NextListRegex"); strSql.Append(" where ID=@ID "); SqlParameter[] parameters = { new SqlParameter("@ID", SqlDbType.Int, 4), new SqlParameter("@ListUrl", SqlDbType.NVarChar, 300), new SqlParameter("@ListStart", SqlDbType.NVarChar, 300), new SqlParameter("@ListEnd", SqlDbType.NVarChar, 300), new SqlParameter("@NextListRegex", SqlDbType.NVarChar, 300) }; parameters[0].Value = model.ID; parameters[1].Value = model.ListUrl; parameters[2].Value = model.ListStart; parameters[3].Value = model.ListEnd; parameters[4].Value = model.NextListRegex; SqlHelper.ExecuteSql(strSql.ToString(), parameters); }
/// <summary> /// 取得每页记录 /// </summary> /// <param name="p_pageIndex">当前页</param> /// <param name="p_pageSize">分页大小</param> /// <param name="p_recordCount">返回总记录数</param> /// <param name="strWhere">查询条件</param> /// <returns>IList CollecItem</returns> public IList <BCW.Model.Collec.CollecItem> GetCollecItems(int p_pageIndex, int p_pageSize, string strWhere, out int p_recordCount) { IList <BCW.Model.Collec.CollecItem> listCollecItems = new List <BCW.Model.Collec.CollecItem>(); string sTable = "tb_CollecItem"; string sPkey = "id"; string sField = "ID,ItemName,ItemRemark,State"; string sCondition = strWhere; string sOrder = "ID Desc"; int iSCounts = 0; using (SqlDataReader reader = SqlHelper.RunProcedureMe(sTable, sPkey, sField, p_pageIndex, p_pageSize, sCondition, sOrder, iSCounts, out p_recordCount)) { //计算总页数 if (p_recordCount > 0) { int pageCount = BasePage.CalcPageCount(p_recordCount, p_pageSize, ref p_pageIndex); } else { return(listCollecItems); } while (reader.Read()) { BCW.Model.Collec.CollecItem objCollecItem = new BCW.Model.Collec.CollecItem(); objCollecItem.ID = reader.GetInt32(0); objCollecItem.ItemName = reader.GetString(1); objCollecItem.ItemRemark = reader.GetString(2); objCollecItem.State = reader.GetInt32(3); listCollecItems.Add(objCollecItem); } } return(listCollecItems); }
/// <summary> /// 正文设置 /// </summary> public void UpdateContentSet(BCW.Model.Collec.CollecItem model) { StringBuilder strSql = new StringBuilder(); strSql.Append("update tb_CollecItem set "); strSql.Append("TitleStart=@TitleStart,"); strSql.Append("TitleEnd=@TitleEnd,"); strSql.Append("KeyWordStart=@KeyWordStart,"); strSql.Append("KeyWordEnd=@KeyWordEnd,"); strSql.Append("DateRegex=@DateRegex,"); strSql.Append("ContentStart=@ContentStart,"); strSql.Append("ContentEnd=@ContentEnd,"); strSql.Append("RemoveBodyStart=@RemoveBodyStart,"); strSql.Append("RemoveBodyEnd=@RemoveBodyEnd,"); strSql.Append("RemoveTitle=@RemoveTitle,"); strSql.Append("RemoveContent=@RemoveContent,"); strSql.Append("NextPageRegex=@NextPageRegex"); strSql.Append(" where ID=@ID "); SqlParameter[] parameters = { new SqlParameter("@ID", SqlDbType.Int, 4), new SqlParameter("@TitleStart", SqlDbType.NVarChar, 50), new SqlParameter("@TitleEnd", SqlDbType.NVarChar, 50), new SqlParameter("@KeyWordStart", SqlDbType.NVarChar, 300), new SqlParameter("@KeyWordEnd", SqlDbType.NVarChar, 300), new SqlParameter("@DateRegex", SqlDbType.NVarChar, 300), new SqlParameter("@ContentStart", SqlDbType.NVarChar, 50), new SqlParameter("@ContentEnd", SqlDbType.NText), new SqlParameter("@RemoveBodyStart", SqlDbType.NVarChar, 300), new SqlParameter("@RemoveBodyEnd", SqlDbType.NVarChar, 300), new SqlParameter("@RemoveTitle", SqlDbType.NVarChar, 300), new SqlParameter("@RemoveContent", SqlDbType.NVarChar, 300), new SqlParameter("@NextPageRegex", SqlDbType.NVarChar, 300) }; parameters[0].Value = model.ID; parameters[1].Value = model.TitleStart; parameters[2].Value = model.TitleEnd; parameters[3].Value = model.KeyWordStart; parameters[4].Value = model.KeyWordEnd; parameters[5].Value = model.DateRegex; parameters[6].Value = model.ContentStart; parameters[7].Value = model.ContentEnd; parameters[8].Value = model.RemoveBodyStart; parameters[9].Value = model.RemoveBodyEnd; parameters[10].Value = model.RemoveTitle; parameters[11].Value = model.RemoveContent; parameters[12].Value = model.NextPageRegex; SqlHelper.ExecuteSql(strSql.ToString(), parameters); }
/// <summary> /// 增加一条数据 /// </summary> public int Add(BCW.Model.Collec.CollecItem model) { StringBuilder strSql = new StringBuilder(); strSql.Append("insert into tb_CollecItem("); strSql.Append("ItemName,Types,NodeId,WebEncode,WebName,WebUrl,ItemRemark,Script_Html,CollecNum,IsSaveImg,IsDesc,State)"); strSql.Append(" values ("); strSql.Append("@ItemName,@Types,@NodeId,@WebEncode,@WebName,@WebUrl,@ItemRemark,@Script_Html,@CollecNum,@IsSaveImg,@IsDesc,@State)"); strSql.Append(";select @@IDENTITY"); SqlParameter[] parameters = { new SqlParameter("@ItemName", SqlDbType.NVarChar, 50), new SqlParameter("@Types", SqlDbType.Int, 4), new SqlParameter("@NodeId", SqlDbType.Int, 4), new SqlParameter("@WebEncode", SqlDbType.Int, 4), new SqlParameter("@WebName", SqlDbType.NVarChar, 50), new SqlParameter("@WebUrl", SqlDbType.NVarChar, 50), new SqlParameter("@ItemRemark", SqlDbType.NVarChar, 50), new SqlParameter("@Script_Html", SqlDbType.NVarChar, 200), new SqlParameter("@CollecNum", SqlDbType.Int, 4), new SqlParameter("@IsSaveImg", SqlDbType.Int, 4), new SqlParameter("@IsDesc", SqlDbType.Int, 4), new SqlParameter("@State", SqlDbType.Int, 4) }; parameters[0].Value = model.ItemName; parameters[1].Value = model.Types; parameters[2].Value = model.NodeId; parameters[3].Value = model.WebEncode; parameters[4].Value = model.WebName; parameters[5].Value = model.WebUrl; parameters[6].Value = model.ItemRemark; parameters[7].Value = model.Script_Html; parameters[8].Value = model.CollecNum; parameters[9].Value = model.IsSaveImg; parameters[10].Value = model.IsDesc; parameters[11].Value = model.State; object obj = SqlHelper.GetSingle(strSql.ToString(), parameters); if (obj == null) { return(0); } else { return(Convert.ToInt32(obj)); } }
/// <summary> /// 得到一个对象实体 /// </summary> public BCW.Model.Collec.CollecItem GetCollecItem(int ID) { StringBuilder strSql = new StringBuilder(); strSql.Append("select top 1 ID,ItemName,Types,NodeId,WebEncode,WebName,WebUrl,ItemRemark,ListUrl,ListStart,ListEnd,LinkStart,LinkEnd,TitleStart,TitleEnd,KeyWordStart,KeyWordEnd,DateRegex,NextListRegex,ContentStart,ContentEnd,RemoveBodyStart,RemoveBodyEnd,RemoveTitle,RemoveContent,NextPageRegex,Script_Html,CollecNum,IsSaveImg,IsDesc,State from tb_CollecItem "); strSql.Append(" where ID=@ID "); SqlParameter[] parameters = { new SqlParameter("@ID", SqlDbType.Int, 4) }; parameters[0].Value = ID; BCW.Model.Collec.CollecItem model = new BCW.Model.Collec.CollecItem(); using (SqlDataReader reader = SqlHelper.ExecuteReader(strSql.ToString(), parameters)) { if (reader.HasRows) { reader.Read(); model.ID = reader.GetInt32(0); model.ItemName = reader.GetString(1); model.Types = reader.GetInt32(2); model.NodeId = reader.GetInt32(3); model.WebEncode = reader.GetInt32(4); model.WebName = reader.GetString(5); model.WebUrl = reader.GetString(6); model.ItemRemark = reader.GetString(7); if (!reader.IsDBNull(8)) { model.ListUrl = reader.GetString(8); } if (!reader.IsDBNull(9)) { model.ListStart = reader.GetString(9); } if (!reader.IsDBNull(10)) { model.ListEnd = reader.GetString(10); } if (!reader.IsDBNull(11)) { model.LinkStart = reader.GetString(11); } if (!reader.IsDBNull(12)) { model.LinkEnd = reader.GetString(12); } if (!reader.IsDBNull(13)) { model.TitleStart = reader.GetString(13); } if (!reader.IsDBNull(14)) { model.TitleEnd = reader.GetString(14); } if (!reader.IsDBNull(15)) { model.KeyWordStart = reader.GetString(15); } if (!reader.IsDBNull(16)) { model.KeyWordEnd = reader.GetString(16); } if (!reader.IsDBNull(17)) { model.DateRegex = reader.GetString(17); } if (!reader.IsDBNull(18)) { model.NextListRegex = reader.GetString(18); } if (!reader.IsDBNull(19)) { model.ContentStart = reader.GetString(19); } if (!reader.IsDBNull(20)) { model.ContentEnd = reader.GetString(20); } if (!reader.IsDBNull(21)) { model.RemoveBodyStart = reader.GetString(21); } else { model.RemoveBodyStart = ""; } if (!reader.IsDBNull(22)) { model.RemoveBodyEnd = reader.GetString(22); } else { model.RemoveBodyEnd = ""; } if (!reader.IsDBNull(23)) { model.RemoveTitle = reader.GetString(23); } else { model.RemoveTitle = ""; } if (!reader.IsDBNull(24)) { model.RemoveContent = reader.GetString(24); } else { model.RemoveContent = ""; } if (!reader.IsDBNull(25)) { model.NextPageRegex = reader.GetString(25); } if (!reader.IsDBNull(26)) { model.Script_Html = reader.GetString(26); } model.CollecNum = reader.GetInt32(27); model.IsSaveImg = reader.GetInt32(28); model.IsDesc = reader.GetInt32(29); model.State = reader.GetInt32(30); return(model); } else { return(null); } } }
/// <summary> /// 更新一条数据 /// </summary> public void Update(BCW.Model.Collec.CollecItem model) { StringBuilder strSql = new StringBuilder(); strSql.Append("update tb_CollecItem set "); strSql.Append("ItemName=@ItemName,"); strSql.Append("Types=@Types,"); strSql.Append("NodeId=@NodeId,"); strSql.Append("WebEncode=@WebEncode,"); strSql.Append("WebName=@WebName,"); strSql.Append("WebUrl=@WebUrl,"); strSql.Append("ItemRemark=@ItemRemark,"); strSql.Append("ListUrl=@ListUrl,"); strSql.Append("ListStart=@ListStart,"); strSql.Append("ListEnd=@ListEnd,"); strSql.Append("LinkStart=@LinkStart,"); strSql.Append("LinkEnd=@LinkEnd,"); strSql.Append("TitleStart=@TitleStart,"); strSql.Append("TitleEnd=@TitleEnd,"); strSql.Append("KeyWordStart=@KeyWordStart,"); strSql.Append("KeyWordEnd=@KeyWordEnd,"); strSql.Append("DateRegex=@DateRegex,"); strSql.Append("NextListRegex=@NextListRegex,"); strSql.Append("ContentStart=@ContentStart,"); strSql.Append("ContentEnd=@ContentEnd,"); strSql.Append("RemoveBodyStart=@RemoveBodyStart,"); strSql.Append("RemoveBodyEnd=@RemoveBodyEnd,"); strSql.Append("RemoveTitle=@RemoveTitle,"); strSql.Append("RemoveContent=@RemoveContent,"); strSql.Append("NextPageRegex=@NextPageRegex,"); strSql.Append("Script_Html=@Script_Html,"); strSql.Append("CollecNum=@CollecNum,"); strSql.Append("IsSaveImg=@IsSaveImg,"); strSql.Append("IsDesc=@IsDesc"); strSql.Append(" where ID=@ID "); SqlParameter[] parameters = { new SqlParameter("@ID", SqlDbType.Int, 4), new SqlParameter("@ItemName", SqlDbType.NVarChar, 50), new SqlParameter("@Types", SqlDbType.Int, 4), new SqlParameter("@NodeId", SqlDbType.Int, 4), new SqlParameter("@WebEncode", SqlDbType.Int, 4), new SqlParameter("@WebName", SqlDbType.NVarChar, 50), new SqlParameter("@WebUrl", SqlDbType.NVarChar, 50), new SqlParameter("@ItemRemark", SqlDbType.NVarChar, 50), new SqlParameter("@ListUrl", SqlDbType.NVarChar, 300), new SqlParameter("@ListStart", SqlDbType.NVarChar, 300), new SqlParameter("@ListEnd", SqlDbType.NVarChar, 300), new SqlParameter("@LinkStart", SqlDbType.NVarChar, 300), new SqlParameter("@LinkEnd", SqlDbType.NVarChar, 50), new SqlParameter("@TitleStart", SqlDbType.NVarChar, 50), new SqlParameter("@TitleEnd", SqlDbType.NVarChar, 50), new SqlParameter("@KeyWordStart", SqlDbType.NVarChar, 300), new SqlParameter("@KeyWordEnd", SqlDbType.NVarChar, 300), new SqlParameter("@DateRegex", SqlDbType.NVarChar, 300), new SqlParameter("@NextListRegex", SqlDbType.NVarChar, 300), new SqlParameter("@ContentStart", SqlDbType.NVarChar, 50), new SqlParameter("@ContentEnd", SqlDbType.NText), new SqlParameter("@RemoveBodyStart", SqlDbType.NVarChar, 300), new SqlParameter("@RemoveBodyEnd", SqlDbType.NVarChar, 300), new SqlParameter("@RemoveTitle", SqlDbType.NVarChar, 300), new SqlParameter("@RemoveContent", SqlDbType.NVarChar, 300), new SqlParameter("@NextPageRegex", SqlDbType.NVarChar, 300), new SqlParameter("@Script_Html", SqlDbType.NVarChar, 200), new SqlParameter("@CollecNum", SqlDbType.Int, 4), new SqlParameter("@IsSaveImg", SqlDbType.Int, 4), new SqlParameter("@IsDesc", SqlDbType.Int, 4) }; parameters[0].Value = model.ID; parameters[1].Value = model.ItemName; parameters[2].Value = model.Types; parameters[3].Value = model.NodeId; parameters[4].Value = model.WebEncode; parameters[5].Value = model.WebName; parameters[6].Value = model.WebUrl; parameters[7].Value = model.ItemRemark; parameters[8].Value = model.ListUrl; parameters[9].Value = model.ListStart; parameters[10].Value = model.ListEnd; parameters[11].Value = model.LinkStart; parameters[12].Value = model.LinkEnd; parameters[13].Value = model.TitleStart; parameters[14].Value = model.TitleEnd; parameters[15].Value = model.KeyWordStart; parameters[16].Value = model.KeyWordEnd; parameters[17].Value = model.DateRegex; parameters[18].Value = model.NextListRegex; parameters[19].Value = model.ContentStart; parameters[20].Value = model.ContentEnd; parameters[21].Value = model.RemoveBodyStart; parameters[22].Value = model.RemoveBodyEnd; parameters[23].Value = model.RemoveTitle; parameters[24].Value = model.RemoveContent; parameters[25].Value = model.NextPageRegex; parameters[26].Value = model.Script_Html; parameters[27].Value = model.CollecNum; parameters[28].Value = model.IsSaveImg; parameters[29].Value = model.IsDesc; SqlHelper.ExecuteSql(strSql.ToString(), parameters); }
public void GetTest(BCW.Model.Collec.CollecItem model, int testType, out string test) { Encoding enType = GetEnType(model.WebEncode); test = string.Empty; //分析列表地址 string Pic = string.Empty; if (model.Types == 1) { Pic = "/files/text/"; } else { Pic = "/files/pic/act/"; } BCW.Collec.Collec Cn = new BCW.Collec.Collec(); string testList = Cn.GetHttpPageCode(model.ListUrl, enType); if (testList == "$UrlIsFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("列表地址设置错误", ""); } if (testList == "$GetFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("无法连接列表页或连接超时", ""); } testList = Cn.GetBody(testList, Out.WmlDecode(model.ListStart), Out.WmlDecode(model.ListEnd), false, false); if (testList == "$StartFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("列表开始标记设置错误,请重新设置", ""); } if (testList == "$EndFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("列表结束标记设置错误,请重新设置", ""); } if (testType == 0) { test = testList; } else { ArrayList linkArray = Cn.GetLinkArray(testList, Out.WmlDecode(model.LinkStart), Out.WmlDecode(model.LinkEnd)); if (linkArray.Count == 0) { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("未取到链接,请检查链接设置", ""); } else { if (linkArray[0].ToString() == "$StartFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("链接开始标记设置错误,请重新设置", ""); } if (linkArray[0].ToString() == "$EndFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("链接结束标记设置错误,请重新设置", ""); } if (linkArray[0].ToString() == "$NoneLink") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("未取到链接,请检查链接设置", ""); } if (model.IsDesc == 1) { linkArray.Reverse(); } //if (model.CollecNum > 0 && linkArray.Count > model.CollecNum) //{ // linkArray.RemoveRange(model.CollecNum, linkArray.Count - model.CollecNum); //} string linkStr = string.Empty; if (testType == 1)//链接地址 { for (int i = 0; i < linkArray.Count; i++) { linkStr = Cn.DefiniteUrl(linkArray[i].ToString(), model.WebUrl); if (linkStr != "$False") { linkStr = "<a href=\"" + Out.UBB(linkStr) + "\" target=\"_blank\">" + Out.UBB(linkStr) + "</a><br />"; test += linkStr; } } } if (testType == 2)//测试 { linkStr = Cn.DefiniteUrl(linkArray[0].ToString(), model.WebUrl); if (linkStr == "$False") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("获取到的链接地址无效,请检查链接设置", ""); } string newsCode = Cn.GetHttpPageCode(linkStr, enType); if (newsCode == "$UrlIsFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("获取到的链接地址无效,请检查链接设置", ""); } if (newsCode == "$GetFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("无法连接内容页或连接超时", ""); } string testTitle = Cn.GetBody(newsCode, Out.WmlDecode(model.TitleStart), Out.WmlDecode(model.TitleEnd), false, false); string testKeyWord = Cn.GetBody(newsCode, Out.WmlDecode(model.KeyWordStart), Out.WmlDecode(model.KeyWordEnd), false, false); string testDateTime = Cn.GetRegValue(model.DateRegex, newsCode); //正文尾双重匹配 string testBody = string.Empty; string keyContentEnd = model.ContentEnd; if (keyContentEnd.Contains("$")) { string[] temp = keyContentEnd.Split('$'); for (int k = 0; k < temp.Length; k++) { testBody = Cn.GetBody(newsCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(temp[k]), false, false); if (testBody != "$StartFalse" && testBody != "$EndFalse") { break; } } } else { testBody = Cn.GetBody(newsCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(model.ContentEnd), false, false); } if (testTitle == "$StartFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("标题开始标记设置错误,请重新设置", ""); } if (testBody == "$StartFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("正文开始标记设置错误,请重新设置", ""); } if (testTitle == "$EndFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("标题结束标记设置错误,请重新设置", ""); } if (testBody == "$EndFalse") { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("正文结束标记设置错误,请重新设置", ""); } //------------------获取详细页内容的下一页开始------------------- string NewsNextUrl = Cn.GetRegValue(Out.WmlDecode(model.NextPageRegex), newsCode); int PageCount = 0; while (NewsNextUrl.Length > 0 && PageCount < 5) { string NewsPaingNextCode = string.Empty; string ContentTemp = string.Empty; NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl, model.WebUrl); //NewsNextUrl = NewsNextUrl.Replace("_1_x", "_-1_x");//2012-3-31采集新闻新增替换规则2012-11-13再次修正 //HttpContext.Current.Response.Write(NewsNextUrl); //HttpContext.Current.Response.End(); NewsPaingNextCode = Cn.GetHttpPageCode(NewsNextUrl, enType); //正文尾双重匹配 string keyContentEnd2 = model.ContentEnd; if (keyContentEnd2.Contains("$")) { string[] temp2 = keyContentEnd2.Split('$'); for (int k = 0; k < temp2.Length; k++) { ContentTemp = Cn.GetBody(NewsPaingNextCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(temp2[k]), false, false); if (ContentTemp != "$StartFalse" && ContentTemp != "$EndFalse") { break; } } } else { ContentTemp = Cn.GetBody(NewsPaingNextCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(model.ContentEnd), false, false); } testBody = testBody + "$PageNext$" + ContentTemp; string NewsNextUrl_1 = Cn.GetRegValue(Out.WmlDecode(model.NextPageRegex), NewsPaingNextCode); if (NewsNextUrl_1.Length > 0) { NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl_1, model.WebUrl); } else { break; } PageCount++; } //-----------------------获取详细页内容的下一页结束------------------- //-----------------------过滤正文开始----------------------- if (!string.IsNullOrEmpty(model.RemoveBodyStart) && !string.IsNullOrEmpty(model.RemoveBodyEnd)) { string[] removeBodyStartArr = Out.WmlDecode(model.RemoveBodyStart).Split('$'); string[] removeBodyEndArr = Out.WmlDecode(model.RemoveBodyEnd).Split('$'); if (removeBodyStartArr.Length > 1 && removeBodyEndArr.Length > 1) { if (removeBodyStartArr.Length != removeBodyEndArr.Length) { new BCW.BLL.Collec.CollecItem().UpdateState(model.ID, 0); Utils.Error("正文过滤中的开始和结束必须对应", ""); } else { for (int i = 0; i < removeBodyStartArr.Length; i++) { string remove = Cn.GetBody(testBody, removeBodyStartArr[i], removeBodyEndArr[i], true, true); testBody = testBody.Replace(remove, ""); } } } else { string remove = Cn.GetBody(testBody, model.RemoveBodyStart, model.RemoveBodyEnd, true, true); testBody = testBody.Replace(remove, ""); } } //-----------------------过滤正文结束--------------------------- ArrayList testBodyArray = Cn.ReplaceSaveRemoteFile(model.Types, testBody, Pic, model.WebUrl, "0"); string cBody = testBodyArray[0].ToString(); string txtPic = string.Empty; if (testBodyArray.Count == 2) { txtPic = testBodyArray[1].ToString(); } //-------------------正文纯过滤开始----------------------------- if (!string.IsNullOrEmpty(model.RemoveTitle)) { string[] temp1 = model.RemoveTitle.Split('$'); string[] temp2 = model.RemoveContent.Split('$'); for (int k = 0; k < temp1.Length; k++) { string Replacestr = ""; try { Replacestr = temp2[k]; } catch { } cBody = Regex.Replace(cBody, Out.WmlDecode(temp1[k]), Out.WmlDecode(Replacestr)); } } //过滤烦人的“&**;” cBody = Regex.Replace(cBody, @"[&|&]*[\w\d]+;", ""); //-------------------纯过滤结束----------------------------- //---------------过滤开始-------------------- if (model.Script_Html.Contains("Iframe")) { cBody = Cn.ScriptHtml(cBody, "Iframe", 1); } if (model.Script_Html.Contains("Object")) { cBody = Cn.ScriptHtml(cBody, "Object", 2); } if (model.Script_Html.Contains("Script")) { cBody = Cn.ScriptHtml(cBody, "Script", 2); } if (model.Script_Html.Contains("Div")) { cBody = Cn.ScriptHtml(cBody, "Div", 3); } if (model.Script_Html.Contains("Table")) { cBody = Cn.ScriptHtml(cBody, "Table", 2); } if (model.Script_Html.Contains("Span")) { cBody = Cn.ScriptHtml(cBody, "Span", 3); } if (model.Script_Html.Contains("Img")) { cBody = Cn.ScriptHtml(cBody, "Img", 3); } if (model.Script_Html.Contains("Font")) { cBody = Cn.ScriptHtml(cBody, "Font", 3); } if (model.Script_Html.Contains("A")) { cBody = Cn.ScriptHtml(cBody, "A", 3); } if (model.Script_Html.Contains("Html")) { cBody = Cn.HtmlScript(cBody); } //-------------------过滤结束------------------------------- //组合采样显示 test += "标题:" + testTitle + "<br />"; test += "时间:" + testDateTime + "<br />"; test += "关键字:" + testKeyWord + "<br />"; cBody = cBody.Trim(); cBody = cBody.Replace(char.ConvertFromUtf32(10), "<br/>"); cBody = cBody.Replace("\r", "<br/>"); test += "内容:" + cBody + ""; if (!string.IsNullOrEmpty(txtPic)) { test += "<br />图片地址采样" + txtPic; } } if (testType == 3)//采集 { int successNum = 0; SetProcessBar("从" + model.WebUrl + "采集信息", true); int ListCount = 0; string lUrl = ""; while (ListCount >= 0) { for (int i = 0; i < linkArray.Count; i++) { string photoUrl = string.Empty; ProcessBar(i, linkArray.Count); linkStr = Cn.DefiniteUrl(linkArray[i].ToString(), model.WebUrl); if (linkStr == "$False$") { continue; } string newsPageCode = Cn.GetHttpPageCode(linkStr, enType); if (newsPageCode.Contains("全页显示全文</a>")) { linkStr = linkStr.Replace("_1_x", "_-1_x");//2012-3-31采集新闻新增替换规则2012-11-13再次修正 newsPageCode = Cn.GetHttpPageCode(linkStr, enType); } if (newsPageCode == "$UrlIsFalse" || newsPageCode == "$GetFalse") { continue; } string cTitle = Cn.GetBody(newsPageCode, Out.WmlDecode(model.TitleStart), Out.WmlDecode(model.TitleEnd), false, false); string cKeyWord = Cn.GetBody(newsPageCode, Out.WmlDecode(model.KeyWordStart), Out.WmlDecode(model.KeyWordEnd), false, false); string cDateTime = Cn.GetRegValue(model.DateRegex, newsPageCode); //正文尾双重匹配 string cBody = string.Empty; string keyContentEnd = model.ContentEnd; if (keyContentEnd.Contains("$")) { string[] temp = keyContentEnd.Split('$'); for (int k = 0; k < temp.Length; k++) { cBody = Cn.GetBody(newsPageCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(temp[k]), false, false); if (cBody != "$StartFalse" && cBody != "$EndFalse") { break; } } } else { cBody = Cn.GetBody(newsPageCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(model.ContentEnd), false, false); } if (cTitle == "$StartFalse" || cBody == "$StartFalse" || cTitle == "$EndFalse" || cBody == "$EndFalse") { continue; } //--------获取详细页内容的下一页开始--------------- string NewsNextUrl = Cn.GetRegValue(Out.WmlDecode(model.NextPageRegex), newsPageCode); //int PageCount = 0; while (NewsNextUrl.Length > 0) { //String sLogFilePath = System.AppDomain.CurrentDomain.BaseDirectory.ToString() + "log.txt"; //LogHelper.Write(sLogFilePath, NewsNextUrl); string NewsPaingNextCode = string.Empty; string ContentTemp = string.Empty; NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl, model.WebUrl); NewsPaingNextCode = Cn.GetHttpPageCode(NewsNextUrl, enType); //正文尾双重匹配 string keyContentEnd2 = model.ContentEnd; if (keyContentEnd2.Contains("$")) { string[] temp2 = keyContentEnd2.Split('$'); for (int k = 0; k < temp2.Length; k++) { ContentTemp = Cn.GetBody(NewsPaingNextCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(temp2[k]), false, false); if (ContentTemp != "$StartFalse" && ContentTemp != "$EndFalse") { break; } } } else { ContentTemp = Cn.GetBody(NewsPaingNextCode, Out.WmlDecode(model.ContentStart), Out.WmlDecode(model.ContentEnd), false, false); } cBody = cBody + "$PageNext$" + ContentTemp; string NewsNextUrl_1 = Cn.GetRegValue(Out.WmlDecode(model.NextPageRegex), NewsPaingNextCode); if (NewsNextUrl_1.Length > 0) { NewsNextUrl = Cn.DefiniteUrl(NewsNextUrl_1, model.WebUrl); } else { break; } //PageCount++; } //---------获取详细页内容的下一页结束-------------- ArrayList bodyArray = Cn.ReplaceSaveRemoteFile(model.Types, cBody, Pic, model.WebUrl, model.IsSaveImg.ToString()); if (bodyArray.Count == 2) { photoUrl = bodyArray[1].ToString(); } cBody = bodyArray[0].ToString(); cBody = cBody.Replace("'", ""); //-------过滤正文开始----------------------- if (!string.IsNullOrEmpty(model.RemoveBodyStart) && !string.IsNullOrEmpty(model.RemoveBodyEnd)) { string[] removeBodyStartArr = Out.WmlDecode(model.RemoveBodyStart).Split('$'); string[] removeBodyEndArr = Out.WmlDecode(model.RemoveBodyEnd).Split('$'); if (removeBodyStartArr.Length > 1 && removeBodyEndArr.Length > 1) { for (int j = 0; j < removeBodyStartArr.Length; j++) { string remove = Cn.GetBody(cBody, removeBodyStartArr[j], removeBodyEndArr[j], true, true); cBody = cBody.Replace(remove, ""); } } else { string remove = Cn.GetBody(cBody, Out.WmlDecode(model.RemoveBodyStart), Out.WmlDecode(model.RemoveBodyEnd), true, true); cBody = cBody.Replace(remove, ""); } } //--------------过滤正文结束--------------------------- //-------------------正文纯过滤开始----------------------------- if (!string.IsNullOrEmpty(model.RemoveTitle)) { string[] temp1 = model.RemoveTitle.Split('$'); string[] temp2 = model.RemoveContent.Split('$'); for (int k = 0; k < temp1.Length; k++) { string Replacestr = ""; try { Replacestr = temp2[k]; } catch { } cBody = Regex.Replace(cBody, Out.WmlDecode(temp1[k]), Out.WmlDecode(Replacestr)); } } //过滤烦人的“&**;” cBody = Regex.Replace(cBody, @"[&|&]*[\w\d]+;", ""); //-------------------纯过滤结束----------------------------- //---------------过滤开始-------------------- if (model.Script_Html.Contains("Iframe")) { cBody = Cn.ScriptHtml(cBody, "Iframe", 1); } if (model.Script_Html.Contains("Object")) { cBody = Cn.ScriptHtml(cBody, "Object", 2); } if (model.Script_Html.Contains("Script")) { cBody = Cn.ScriptHtml(cBody, "Script", 2); } if (model.Script_Html.Contains("Div")) { cBody = Cn.ScriptHtml(cBody, "Div", 3); } if (model.Script_Html.Contains("Table")) { cBody = Cn.ScriptHtml(cBody, "Table", 2); } if (model.Script_Html.Contains("Span")) { cBody = Cn.ScriptHtml(cBody, "Span", 3); } if (model.Script_Html.Contains("Img")) { cBody = Cn.ScriptHtml(cBody, "Img", 3); } if (model.Script_Html.Contains("Font")) { cBody = Cn.ScriptHtml(cBody, "Font", 3); } if (model.Script_Html.Contains("A")) { cBody = Cn.ScriptHtml(cBody, "A", 3); } if (model.Script_Html.Contains("Html")) { cBody = Cn.HtmlScript(cBody); } //-------------------过滤结束------------------------------- cBody = cBody.Replace("\r", "");//过滤空行 //写入数据库 if (cDateTime == "") { cDateTime = DateTime.Now.ToString(); } //添加验证 if (!new BCW.BLL.Detail().Exists(Out.UBB(cTitle))) { BCW.Model.Detail dmodel = new BCW.Model.Detail(); dmodel.Title = Out.UBB(cTitle); dmodel.KeyWord = Out.CreateKeyWord(cTitle, 2); dmodel.Model = ""; dmodel.IsAd = true; dmodel.Types = model.Types + 10; //取分类ID int NodeId = 0; string strpattern = @"http://m.news.cn/entityitem/(\d+)/(\d+)/[\s\S]+?shtml"; Match mtitle = Regex.Match(linkStr, strpattern, RegexOptions.IgnoreCase); if (mtitle.Success) { NodeId = Convert.ToInt32(mtitle.Groups[1].Value); } dmodel.NodeId = NodeId; dmodel.Content = Out.UBB(cBody.Trim()).Replace("$PageNext$", ""); dmodel.TarText = ""; dmodel.LanText = ""; dmodel.SafeText = ""; dmodel.LyText = ""; dmodel.UpText = ""; dmodel.IsVisa = 0; try { dmodel.AddTime = DateTime.Parse(cDateTime); } catch { dmodel.AddTime = DateTime.Now; } dmodel.Readcount = 0; dmodel.Recount = 0; dmodel.Cent = 0; dmodel.BzType = 0; dmodel.Hidden = 0; dmodel.UsID = 0; int newId = new BCW.BLL.Detail().Add(dmodel); //更新附件与封面 string Pics = photoUrl; new BCW.BLL.Detail().UpdatePics(newId, Pics); if (Pics != "" && Pics.Contains("#")) { string[] sTemp = Pics.Split('#'); string sPics = string.Empty; try { if (Pics.Contains("#")) { sPics = sTemp[sTemp.Length - 1]; } else { sPics = Pics; } } catch { } sPics = sPics.Replace("act/", "act/cover/"); sPics = sPics.Replace("text/", "text/cover/"); new BCW.BLL.Detail().UpdateCover(newId, sPics); } } successNum++; } //--------获取列表下一页开始--------------- string ListNextUrl = ""; if (lUrl == "") { string gettestList = Cn.GetHttpPageCode(model.ListUrl, enType); ListNextUrl = Cn.GetRegValue(Out.WmlDecode(model.NextListRegex), gettestList); } else { ListNextUrl = lUrl; } ListNextUrl = Cn.DefiniteUrl(ListNextUrl, model.WebUrl); string testList2 = Cn.GetHttpPageCode(ListNextUrl, enType); if (testList2 != "") { linkArray = Cn.GetLinkArray(testList2, Out.WmlDecode(model.LinkStart), Out.WmlDecode(model.LinkEnd)); if (linkArray.Count == 0 || linkArray[0].ToString().Contains("$")) { ListCount = -1; } else { string ListNextUrl_1 = Cn.GetRegValue(Out.WmlDecode(model.NextListRegex), testList2); if (ListNextUrl_1.Length > 0) { lUrl = ListNextUrl_1; } else { break; } ListCount++; } } else { ListCount = -1; } //---------获取列表下一页结束-------------- } HttpContext.Current.Response.End(); //HttpContext.Current.Response.Write("<script language='javascript' type='text/javascript'>alert('采集完成,成功采集 " + successNum + "条');window.location='" + Utils.getUrl("collecitem.aspx?act=view&id=" + model.ID + "") + "'</script"); } } } }