예제 #1
0
        /// <summary>
        /// 图片相对路径改为全路径
        /// </summary>
        /// <param name="documentNode"></param>
        /// <param name="InternalRealUrl"></param>
        protected static void ReplaceIncorrectImageSrc(HtmlNode documentNode, string InternalRealUrl)
        {
            HtmlNodeCollection imgNode = documentNode.SelectNodes("//img");

            if (imgNode == null)
            {
                return;
            }
            foreach (var item in imgNode)
            {
                var url     = item.GetAttributeValue("src", "");
                var realUrl = HTMLUtil.GetFullURL(InternalRealUrl, url);
                item.SetAttributeValue("src", realUrl);
                //documentNode.InnerHtml=documentNode.InnerHtml.Replace(url, realUrl);
            }
        }
예제 #2
0
        /// <summary>
        /// 解析开始页
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseStartupPage(HtmlNode documentNode)
        {
            Hashtable ht        = new Hashtable();
            Regex     tempReg   = null;
            Match     tempMatch = null;
            HtmlNode  linkNodes = documentNode.SelectSingleNode("//meta[@property='og:title']");

            if (linkNodes != null)
            {
                var title = linkNodes.GetAttributeValue("content", "");
                ht.Add(CollectionFieldName.Novel_Name, title);
            }
            linkNodes = documentNode.SelectSingleNode("//div[@class='pic text-center']/img");
            if (linkNodes != null)
            {
                var imgUrl = linkNodes?.GetAttributeValue("src", "");
                imgUrl = HTMLUtil.GetFullURL(InternalRealUrl, imgUrl);
                ht.Add(CollectionFieldName.Novel_CoverImgs, imgUrl);
            }
            linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']");
            if (linkNodes != null)
            {
                var tag = linkNodes?.GetAttributeValue("content", "");
                ht.Add(CollectionFieldName.Novel_Tag, tag);
            }

            var url = InternalRealUrl;

            ht.Add(CollectionFieldName.Url, url);

            tempReg   = new Regex(@"/(\d+).html");
            tempMatch = tempReg.Match(InternalRealUrl);
            if (tempMatch.Success)
            {
                ht.Add(CollectionFieldName.Novel_UniqueFlag, tempReg.Replace(tempMatch.Value, "$1"));
            }
            Hashtable returndata = new Hashtable();

            if (ht != null)
            {
                returndata.Add(CollectionFieldName.BookInfo, ht);
            }
            return(returndata);
        }
예제 #3
0
        /// <summary>
        /// 修改占位图片路径
        /// </summary>
        /// <param name="documentNode"></param>
        /// <param name="InternalRealUrl"></param>
        protected static void ReplacePlaceholderImageSrc(HtmlNode documentNode, string InternalRealUrl, string PlaceholderAttribute)
        {
            HtmlNodeCollection imgNode = documentNode.SelectNodes("//img");

            if (imgNode == null)
            {
                return;
            }
            foreach (var item in imgNode)
            {
                var url = item.GetAttributeValue(PlaceholderAttribute, "");
                if (!string.IsNullOrEmpty(url))
                {
                    var realUrl = HTMLUtil.GetFullURL(InternalRealUrl, url);
                    item.SetAttributeValue("src", realUrl);
                    item.Attributes.Remove(PlaceholderAttribute);
                }
            }
        }
예제 #4
0
        /// <summary>
        /// 解析开始页
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseStartupPage(HtmlNode documentNode)
        {
            Hashtable ht        = new Hashtable();
            Regex     tempReg   = null;
            Match     tempMatch = null;
            HtmlNode  linkNodes = documentNode.SelectSingleNode("//span[@class='book_name']");

            if (linkNodes != null)
            {
                var title = linkNodes.InnerText;
                ht.Add(CollectionFieldName.Novel_Name, title);
            }
            linkNodes = documentNode.SelectSingleNode("//div[@class='books_bar clear']");
            if (linkNodes != null)
            {
                var imgUrl = linkNodes.SelectSingleNode("div[@class='lr_list']/img")?.GetAttributeValue("src", "");
                ht.Add(CollectionFieldName.Novel_CoverImgs, imgUrl);
                var tag = linkNodes.SelectSingleNode("//ul[@class='book_list']/li[4]")?.InnerText;
                ht.Add(CollectionFieldName.Novel_Tag, tag.Replace("类别:", ""));
                var statusName = linkNodes.SelectSingleNode("//ul[@class='book_list']/li[3]")?.InnerText;
                if (!string.IsNullOrEmpty(statusName) && statusName.Contains("完结"))
                {
                    ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Finish);
                }
                else
                {
                    ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Update);
                }
                var ContentLenStr = linkNodes.SelectSingleNode("//ul[@class='book_list']/li[2]")?.InnerText;
                int ContentLen    = 0;
                if (!string.IsNullOrEmpty(ContentLenStr))
                {
                    tempReg   = new Regex(@"(\d+.\d+)|(\d+)");
                    tempMatch = tempReg.Match(ContentLenStr);
                    if (tempMatch.Success)
                    {
                        if (ContentLenStr.Contains("万"))
                        {
                            ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 10000);
                        }
                        else if (ContentLenStr.Contains("千"))
                        {
                            ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 1000);
                        }
                        else
                        {
                            ContentLen = Convert.ToInt32(tempMatch.Value);
                        }
                    }
                    ht.Add(CollectionFieldName.Novel_ContentLen, ContentLen);
                }
            }
            linkNodes = documentNode.SelectSingleNode("//a[@class='more_link']");
            if (linkNodes != null)
            {
                var url = linkNodes.GetAttributeValue("href", "");
                url = HTMLUtil.GetFullURL(InternalRealUrl, url);
                ht.Add(CollectionFieldName.Url, url);
            }
            linkNodes = documentNode.SelectSingleNode("//div[@id='divDescription']");
            if (linkNodes != null)
            {
                var intr = linkNodes.InnerHtml;
                ht.Add(CollectionFieldName.Novel_Intr, intr);
            }
            tempReg   = new Regex(@"/book/(\d+).html");
            tempMatch = tempReg.Match(InternalRealUrl);
            if (tempMatch.Success)
            {
                ht.Add(CollectionFieldName.Novel_UniqueFlag, tempReg.Replace(tempMatch.Value, "$1"));
            }
            Hashtable returndata = new Hashtable();

            if (ht != null)
            {
                returndata.Add(CollectionFieldName.BookInfo, ht);
            }
            return(returndata);
        }
예제 #5
0
        /// <summary>
        /// 解析开始页
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseStartupPage(HtmlNode documentNode)
        {
            Hashtable ht        = new Hashtable();
            Regex     tempReg   = null;
            Match     tempMatch = null;
            HtmlNode  linkNodes = documentNode.SelectSingleNode("//meta[@property='og:title']");

            if (linkNodes != null)
            {
                var title = linkNodes.GetAttributeValue("content", "");
                ht.Add(CollectionFieldName.Novel_Name, title);
            }
            linkNodes = documentNode.SelectSingleNode("//div[@class='pic text-center']/img");
            if (linkNodes != null)
            {
                var imgUrl = linkNodes?.GetAttributeValue("src", "");
                imgUrl = HTMLUtil.GetFullURL(InternalRealUrl, imgUrl);
                ht.Add(CollectionFieldName.Novel_CoverImgs, imgUrl);
            }
            linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']");
            if (linkNodes != null)
            {
                var tag = linkNodes?.GetAttributeValue("content", "");
                ht.Add(CollectionFieldName.Novel_Tag, tag);
            }
            //linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']");
            //if (linkNodes != null)
            //{
            //    var statusName = linkNodes?.GetAttributeValue("content","");
            //    if (!string.IsNullOrEmpty(statusName) && statusName.Contains("完结"))
            //        ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Finish);
            //    else
            //        ht.Add(CollectionFieldName.Novel_Status, BookStatus.BookStatus_Update);
            //}
            //linkNodes = documentNode.SelectSingleNode("//meta[@property='og:novel:category']");
            //if (linkNodes != null)
            //{
            //    var ContentLenStr = linkNodes.SelectSingleNode("div[@class='fn-clear']/ul/li[4]")?.InnerText;
            //    int ContentLen = 0;
            //    if (!string.IsNullOrEmpty(ContentLenStr))
            //    {
            //        tempReg = new Regex(@"(\d+.\d+)|(\d+)");
            //        tempMatch = tempReg.Match(ContentLenStr);
            //        if (tempMatch.Success)
            //        {
            //            if (ContentLenStr.Contains("万"))
            //                ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 10000);
            //            if (ContentLenStr.Contains("千"))
            //                ContentLen = (int)(Convert.ToDouble(tempMatch.Value) * 1000);
            //        }
            //        ht.Add(CollectionFieldName.Novel_ContentLen, ContentLen);
            //    }

            //}
            //linkNodes = documentNode.SelectSingleNode("//div[@class='panel']/a[@class='btn block white']");
            //if (linkNodes != null)
            //{
            //    var url = linkNodes.GetAttributeValue("href", "");
            //    ht.Add(CollectionFieldName.Url, url);
            //}
            var url = InternalRealUrl;

            ht.Add(CollectionFieldName.Url, url);
            //linkNodes = documentNode.SelectSingleNode("//div[@class='book_intro']/p[@id='summary']");
            //if (linkNodes != null)
            //{
            //    var intr = linkNodes.InnerHtml;
            //    ht.Add(CollectionFieldName.Novel_Intr, intr);
            //}
            tempReg   = new Regex(@"/(\d+).html");
            tempMatch = tempReg.Match(InternalRealUrl);
            if (tempMatch.Success)
            {
                ht.Add(CollectionFieldName.Novel_UniqueFlag, tempReg.Replace(tempMatch.Value, "$1"));
            }
            Hashtable returndata = new Hashtable();

            if (ht != null)
            {
                returndata.Add(CollectionFieldName.BookInfo, ht);
            }
            return(returndata);
        }
예제 #6
0
 /// <summary>
 /// 获取完整url地址
 /// </summary>
 /// <param name="relativeURL"></param>
 /// <returns></returns>
 protected string GetFullURL(string relativeURL)
 {
     return(HTMLUtil.GetFullURL(baseUrl, relativeURL));
 }