/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage2(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script"); tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", ""); tempString = tempString.Replace("\r\n", "").Replace("\t", "").Replace("手机请访问::feisuz", "").Replace("feisuz", "") .Replace("feisuz", "").Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); returndata.Add(CollectionFieldName.ExContent, tempString); } HtmlNode nextLink = documentNode.SelectNodes("//*[@id='content']/div[@class='text']/a")?.FirstOrDefault(x => x.InnerText == "下一节"); if (nextLink != null) { string url = nextLink.GetAttributeValue("href", ""); if (!string.IsNullOrEmpty(url) && url != "#") { returndata.Add(CollectionFieldName.NextUrl, url); } } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("全本小说网欢迎您!WWW.YZNN.COM T1706231537", "") .Replace("F606121", "") .Replace("全本小说网欢迎您!WWW.YZNN.COM", ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } else { int i = 1; int j = 1 + 1; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id=\"ccontent\"]"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "a"); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "") .Replace("\t", "") .Replace("領域文學首發地址httP://www.lingyu.org<br><br>", "") .Replace("領域文學首發地址www.lingyu.org<br><br> <br><br>", "") .Replace("请记住本书首发域名:http://www.lingyu.org 领域文学手机版阅读网址: m.lingyu.org", ""); string pattern = @"http://www.lingyu.org/\w+/\d+/\d+/\d+.html"; tempString = Regex.Replace(tempString, pattern, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString) .Replace(" ", "") .Replace("領域文學首發地址httP://www.lingyu.org<br><br>", "") .Replace("領域文學首發地址www.lingyu.org<br><br> <br><br>", "") .Replace("请记住本书首发域名:http://www.lingyu.org 领域文学手机版阅读网址: m.lingyu.org", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempInnerText.Length / 500) * 3; if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script"); tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } HtmlNode nextLink = documentNode.SelectNodes("//*[@id='content']/div[@class='text']/a")?.FirstOrDefault(x => x.InnerText == "下一节"); if (nextLink != null) { string url = nextLink.GetAttributeValue("href", ""); if (!string.IsNullOrEmpty(url) && url != "#") { returndata.Add(CollectionFieldName.NextUrl, url); } } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@class=\"panel-body content-body content-ext\"]"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script"); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", "").Replace("手机请访问::feisuz", "") .Replace("feisuz", "").Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", "").Replace("feisuz", "") .Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@class='readout']/div[@class='shuneirong']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("免费小说", "") .Replace("biquge5200.com", "") .Replace("biquge5200", "") .Replace("笔趣阁", "") .Replace("http://", "") .Replace("本书红薯网首发,请勿转载!", ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 150) { into = tempInnerText.Substring(0, 150) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } else { int i = 1; int j = 1 + 1; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage2(HtmlNode documentNode) { Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@class='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = HTMLUtil.RemoveHtmlContent(tempString, "style", "script"); returndata.Add(CollectionFieldName.Chap_Content, tempString); tempInnerText = tempNode.InnerText; if (!string.IsNullOrEmpty(tempInnerText)) { int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_ContentLen, tempString.Length); returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Pay); } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@class='cDetail']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = HTMLUtil.RemoveHtmlContent(tempString, "style", "script"); returndata.Add(CollectionFieldName.Chap_Content, tempString); tempInnerText = tempNode.InnerText; if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@class='articleCon']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt", "a"); tempString = tempString.ToLower(); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("本站访问地址http://www.ziyouge.com 任意搜索引擎内输入:紫幽阁 即可访问!", "") .Replace("http://www.ziyouge.com", "") .Replace("紫幽阁", "") .Replace("wanben.me", "") .Replace("ziyouge.com", "") .Replace("ziyouge", "") .Replace("http://", "") .Replace("http", "") .Replace("紫You阁 WwW.ZiyouGE.com", "") .Replace("WWw.ZiyoUgE.com", "") .Replace("品书网", "") .Replace("www.vodtw.com", "") .Replace("本书来自", "") .Replace("/html/book/19/19092/", "") ; //正则替换域名 string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+"; tempString = Regex.Replace(tempString, pattern, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", "").Replace("feisuz", "") .Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt", "a"); tempString = tempString.ToLower(); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("本站访问地址http://www.ziyouge.com 任意搜索引擎内输入:紫幽阁 即可访问!", "") .Replace("http://www.ziyouge.com", "") .Replace("紫幽阁", "") .Replace("wanben.me", "") .Replace("ziyouge.com", "") .Replace("ziyouge", "") .Replace("http://", "") .Replace("http", "") .Replace("紫You阁 WwW.ZiyouGE.com", "") .Replace("WWw.ZiyoUgE.com", "") .Replace("品书网", "") .Replace("www.vodtw.com", "") .Replace("本书来自", "") .Replace("/html/book/19/19092/", "") .Replace("大家想继续看我的书,可以加我微信gdy3208新书出了,我会第一时间发动态通知大家!", "") .Replace("本站重要通知:请使用本站的免费小说app,无广告、破防盗版、更新快,会员同步书架,请关注微信公众号 appxsyd (按住三秒复制) 下载免费阅读器!!", "") .Replace("本站重要通知: 请使用本站的免费小说app,无广告、破防盗版、更新快,会员同步书架,请关注微信公众号 gegegengxin (按住三秒复制)下载免费阅读器!!", "") .Replace("本站重要通知:", "") .Replace("请使用本站的免费小说", "") .Replace("app", "") .Replace("无广告、破防盗版、更新快,会员同步书架", "") .Replace("请关注微信公众号", "") .Replace("appxsyd", "") .Replace("gegegengxin", "") .Replace("(按住三秒复制)", "") .Replace("(按住三秒复制)", "") .Replace("下载免费阅读器", "") ; //正则替换域名 string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+"; tempString = Regex.Replace(tempString, pattern, ""); string pattern1 = @"<.+>"; tempString = Regex.Replace(tempString, pattern1, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", "").Replace("feisuz", "") .Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='booktext']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt", "a"); tempString = tempString.ToLower(); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", ""); tempString = tempString.Replace("【快速查找本站请百度搜索: 书包520】", "") .Replace("【本站域名更改为“ www.shubao520.net ” ,或者在百度搜索: 书包520】", ""); tempString = tempString.Replace("【本站域名更改为“www.shubao520.net”,或者在百度搜索:书包520】", "") .Replace("【快速查找本站请百度搜索:书包520】", "") .Replace("书包520", "") .Replace("www.shubao520.net", "") .Replace("百度", "") .Replace("搜索", "") .Replace("域名", ""); //正则替换域名 string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+"; tempString = Regex.Replace(tempString, pattern, ""); string pattern1 = @"<.+>"; tempString = Regex.Replace(tempString, pattern1, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", "").Replace("feisuz", "") .Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempInnerText.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@class='messagecontent']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script"); tempString = tempString.Replace("\r\n", "").Replace("\t", ""); //正则替换 string pattern = @"www\.[a-zA-Z0-9]+\.(?:com|cn|net|org)/(\w+|/)+\.html"; tempString = Regex.Replace(tempString, pattern, ""); string pattern1 = @"www\.[a-zA-Z0-9]+\.(?:com|cn|net|org)/(\w+|/)+"; tempString = Regex.Replace(tempString, pattern1, ""); string pattern2 = @"www\.[a-zA-Z0-9]+\.(?:com|cn|net|org)/?"; tempString = Regex.Replace(tempString, pattern2, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id=\"content\"]"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "a"); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "") .Replace("\t", "") .Replace("手机请访问::feisuz", "") .Replace("feisuz", "") .Replace("作者的话:", "") .Replace("新书,求收藏求推荐", "") .Replace("本书红薯网首发,请勿转载!", "") .Replace("老铁!还在找\"美艳冥妻\"免费小说?", "") .Replace(" (www.yikanxiaoshuo.com = ", "").Trim().TrimEnd(')') .Replace("<br> <br> 百度直接搜索: \"易看小说\" 看免费小说,没毛病!<br>", "") .Replace("老铁!还在找\"绝望游戏\"免费小说?", "") .Replace("百度直接搜索: \"易看小说\" 看免费小说,没毛病!", "") .Replace("\"易看小说\"", "") .Replace("易看小说", "") .Replace("免费小说", "") .Replace("(更快免费阅读加微信:jxxs9966)", "") .Replace("jxxs9966", "") ; returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt"); //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("本站访问地址http://www.ziyouge.com 任意搜索引擎内输入:紫幽阁 即可访问!", "") .Replace("http://www.ziyouge.com", "") .Replace("紫幽阁", "") .Replace("www.ziyouge.com", "") .Replace("ziyouge.com", "") .Replace("ziyouge", "") .Replace("http://", "") .Replace("http", "") .Replace("紫You阁 WwW.ZiyouGE.com", "") .Replace("WWw.ZiyoUgE.com", "") .Replace("一秒记住【笔♂趣→阁 WWW.52BQG.COM】,精彩小说无弹窗免费阅读!", "") .Replace("WWW.52BQG.COM", "") .Replace("一秒记住", "") .Replace("精彩小说无弹窗免费阅读!", "") .Replace("给大家推荐一个公众号,搜索“春话秋阅”或者“yigewuquderen”(一个无趣的人的拼音),是我和基友做的,里面有基友写的段子、短篇小文章、乐评之类的,以及我写的关于本书的一些事情、番外和一些有意思的故事,还可能有提前发新章节,当然不会很多。可能会在公众号开本新书,不长,完全免费,可能会开。大家可以关注一下,到时候加更。", "") ; //正则替换域名 string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+"; tempString = Regex.Replace(tempString, pattern, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", "").Replace("feisuz", "") .Replace("作者的话:", "").Replace("新书,求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("一秒记住【爱看小说网www.akxs6.com】,为您提供精彩小说阅读。", "") .Replace("PS:作者群:561371881,新书发布,求收藏,求打赏!您的支持,是我写作的动力!", "") .Replace("另:上架十万字更新!", "") .Replace("手机用户请浏览m.akxs6.com阅读,更优质的阅读体验来自爱看小说网。", "") ; returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } else { int i = 1; int j = 1 + 1; } return(returndata); }
/// <summary> /// 解析明细页内容 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseDetailPage1(HtmlNode documentNode) { List <string> multipage = null; Hashtable returndata = new Hashtable(); HtmlNode tempNode = null; string tempString = null; string tempInnerText = null; Regex tempReg = null; Match tempMatch = null; tempNode = documentNode.SelectSingleNode("//div[@id='content']"); if (tempNode != null) { tempString = tempNode.InnerHtml; tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span"); tempString = tempString.Replace("\r\n", "").Replace("\t", "") .Replace("最快更新无错小说阅读,请访问www.feizw.com", "") .Replace("手机请访问:http://m.feizw.com", ""); //正则替换域名 string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+"; tempString = Regex.Replace(tempString, pattern, ""); returndata.Add(CollectionFieldName.Chap_Content, tempString); //移除无效字符,用来计算长度 tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace(" ", ""); if (!string.IsNullOrEmpty(tempInnerText)) { returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length); string into = ""; if (tempInnerText.Length > 40) { into = tempInnerText.Substring(0, 40) + "..."; } else { into = tempInnerText; } returndata.Add(CollectionFieldName.Chap_Intro, into); int price = (tempString.Length / 1000) * 5; if (price == 0) { price = 5; } if (price > 15) { price = 15; } returndata.Add(CollectionFieldName.Chap_Pirce, price); } returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine); returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free); tempInnerText = tempNode.InnerText; } else { int i = 1; int j = 1 + 1; } return(returndata); }