/// <summary> /// 图片相对路径改为全路径 /// </summary> /// <param name="documentNode"></param> /// <param name="InternalRealUrl"></param> protected static void ReplaceIncorrectImageSrc(HtmlNode documentNode, string InternalRealUrl) { HtmlNodeCollection imgNode = documentNode.SelectNodes("//img"); if (imgNode == null) { return; } foreach (var item in imgNode) { var url = item.GetAttributeValue("src", ""); var realUrl = HTMLUtil.GetFullURL(InternalRealUrl, url); item.SetAttributeValue("src", realUrl); //documentNode.InnerHtml=documentNode.InnerHtml.Replace(url, realUrl); } }
/// <summary> /// 解析开始页 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseStartupPage(HtmlNode documentNode) { Hashtable ht = new Hashtable(); Regex tempReg = null; Match tempMatch = null; HtmlNode linkNodes = documentNode.SelectSingleNode("//meta[@property='og:title']"); if (linkNodes != null) { var title = linkNodes.GetAttributeValue("content", ""); ht.Add(CollectionFieldName.Novel_Name, title); } linkNodes = documentNode.SelectSingleNode("//div[@class='pic text-center']/img"); if (linkNodes != null) { var imgUrl = linkNodes?.GetAttributeValue("src", ""); imgUrl = HTMLUtil.GetFullURL(InternalRealUrl, imgUrl); ht.Add(CollectionFieldName.Novel_CoverImgs, imgUrl); } linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']"); if (linkNodes != null) { var tag = linkNodes?.GetAttributeValue("content", ""); ht.Add(CollectionFieldName.Novel_Tag, tag); } var url = InternalRealUrl; ht.Add(CollectionFieldName.Url, url); tempReg = new Regex(@"/(\d+).html"); tempMatch = tempReg.Match(InternalRealUrl); if (tempMatch.Success) { ht.Add(CollectionFieldName.Novel_UniqueFlag, tempReg.Replace(tempMatch.Value, "$1")); } Hashtable returndata = new Hashtable(); if (ht != null) { returndata.Add(CollectionFieldName.BookInfo, ht); } return(returndata); }
/// <summary> /// 修改占位图片路径 /// </summary> /// <param name="documentNode"></param> /// <param name="InternalRealUrl"></param> protected static void ReplacePlaceholderImageSrc(HtmlNode documentNode, string InternalRealUrl, string PlaceholderAttribute) { HtmlNodeCollection imgNode = documentNode.SelectNodes("//img"); if (imgNode == null) { return; } foreach (var item in imgNode) { var url = item.GetAttributeValue(PlaceholderAttribute, ""); if (!string.IsNullOrEmpty(url)) { var realUrl = HTMLUtil.GetFullURL(InternalRealUrl, url); item.SetAttributeValue("src", realUrl); item.Attributes.Remove(PlaceholderAttribute); } } }
/// <summary> /// 解析开始页 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseStartupPage(HtmlNode documentNode) { Hashtable ht = new Hashtable(); Regex tempReg = null; Match tempMatch = null; HtmlNode linkNodes = documentNode.SelectSingleNode("//span[@class='book_name']"); if (linkNodes != null) { var title = linkNodes.InnerText; ht.Add(CollectionFieldName.Novel_Name, title); } linkNodes = documentNode.SelectSingleNode("//div[@class='books_bar clear']"); if (linkNodes != null) { var imgUrl = linkNodes.SelectSingleNode("div[@class='lr_list']/img")?.GetAttributeValue("src", ""); ht.Add(CollectionFieldName.Novel_CoverImgs, imgUrl); var tag = linkNodes.SelectSingleNode("//ul[@class='book_list']/li[4]")?.InnerText; ht.Add(CollectionFieldName.Novel_Tag, tag.Replace("类别:", "")); var statusName = linkNodes.SelectSingleNode("//ul[@class='book_list']/li[3]")?.InnerText; if (!string.IsNullOrEmpty(statusName) && statusName.Contains("完结")) { ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Finish); } else { ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Update); } var ContentLenStr = linkNodes.SelectSingleNode("//ul[@class='book_list']/li[2]")?.InnerText; int ContentLen = 0; if (!string.IsNullOrEmpty(ContentLenStr)) { tempReg = new Regex(@"(\d+.\d+)|(\d+)"); tempMatch = tempReg.Match(ContentLenStr); if (tempMatch.Success) { if (ContentLenStr.Contains("万")) { ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 10000); } else if (ContentLenStr.Contains("千")) { ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 1000); } else { ContentLen = Convert.ToInt32(tempMatch.Value); } } ht.Add(CollectionFieldName.Novel_ContentLen, ContentLen); } } linkNodes = documentNode.SelectSingleNode("//a[@class='more_link']"); if (linkNodes != null) { var url = linkNodes.GetAttributeValue("href", ""); url = HTMLUtil.GetFullURL(InternalRealUrl, url); ht.Add(CollectionFieldName.Url, url); } linkNodes = documentNode.SelectSingleNode("//div[@id='divDescription']"); if (linkNodes != null) { var intr = linkNodes.InnerHtml; ht.Add(CollectionFieldName.Novel_Intr, intr); } tempReg = new Regex(@"/book/(\d+).html"); tempMatch = tempReg.Match(InternalRealUrl); if (tempMatch.Success) { ht.Add(CollectionFieldName.Novel_UniqueFlag, tempReg.Replace(tempMatch.Value, "$1")); } Hashtable returndata = new Hashtable(); if (ht != null) { returndata.Add(CollectionFieldName.BookInfo, ht); } return(returndata); }
/// <summary> /// 解析开始页 /// </summary> /// <param name="documentNode"></param> /// <returns></returns> private Hashtable parseStartupPage(HtmlNode documentNode) { Hashtable ht = new Hashtable(); Regex tempReg = null; Match tempMatch = null; HtmlNode linkNodes = documentNode.SelectSingleNode("//meta[@property='og:title']"); if (linkNodes != null) { var title = linkNodes.GetAttributeValue("content", ""); ht.Add(CollectionFieldName.Novel_Name, title); } linkNodes = documentNode.SelectSingleNode("//div[@class='pic text-center']/img"); if (linkNodes != null) { var imgUrl = linkNodes?.GetAttributeValue("src", ""); imgUrl = HTMLUtil.GetFullURL(InternalRealUrl, imgUrl); ht.Add(CollectionFieldName.Novel_CoverImgs, imgUrl); } linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']"); if (linkNodes != null) { var tag = linkNodes?.GetAttributeValue("content", ""); ht.Add(CollectionFieldName.Novel_Tag, tag); } //linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']"); //if (linkNodes != null) //{ // var statusName = linkNodes?.GetAttributeValue("content",""); // if (!string.IsNullOrEmpty(statusName) && statusName.Contains("完结")) // ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Finish); // else // ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Update); //} //linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']"); //if (linkNodes != null) //{ // var ContentLenStr = linkNodes.SelectSingleNode("div[@class='fn-clear']/ul/li[4]")?.InnerText; // int ContentLen = 0; // if (!string.IsNullOrEmpty(ContentLenStr)) // { // tempReg = new Regex(@"(\d+.\d+)|(\d+)"); // tempMatch = tempReg.Match(ContentLenStr); // if (tempMatch.Success) // { // if (ContentLenStr.Contains("万")) // ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 10000); // if (ContentLenStr.Contains("千")) // ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 1000); // } // ht.Add(CollectionFieldName.Novel_ContentLen, ContentLen); // } //} //linkNodes = documentNode.SelectSingleNode("//div[@class='panel']/a[@class='btn block white']"); //if (linkNodes != null) //{ // var url = linkNodes.GetAttributeValue("href", ""); // ht.Add(CollectionFieldName.Url, url); //} var url = InternalRealUrl; ht.Add(CollectionFieldName.Url, url); //linkNodes = documentNode.SelectSingleNode("//div[@class='book_intro']/p[@id='summary']"); //if (linkNodes != null) //{ // var intr = linkNodes.InnerHtml; // ht.Add(CollectionFieldName.Novel_Intr, intr); //} tempReg = new Regex(@"/(\d+).html"); tempMatch = tempReg.Match(InternalRealUrl); if (tempMatch.Success) { ht.Add(CollectionFieldName.Novel_UniqueFlag, tempReg.Replace(tempMatch.Value, "$1")); } Hashtable returndata = new Hashtable(); if (ht != null) { returndata.Add(CollectionFieldName.BookInfo, ht); } return(returndata); }
/// <summary> /// 获取完整url地址 /// </summary> /// <param name="relativeURL"></param> /// <returns></returns> protected string GetFullURL(string relativeURL) { return(HTMLUtil.GetFullURL(baseUrl, relativeURL)); }