HTMLUtil.RemoveHtmlContent C# (CSharp) Code-Beispiele

Beispiel #1

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage2(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script");
                tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "").Replace("手机请访问：:feisuz", "").Replace("feisuz", "")
                             .Replace("feisuz", "").Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                returndata.Add(CollectionFieldName.ExContent, tempString);
            }
            HtmlNode nextLink = documentNode.SelectNodes("//*[@id='content']/div[@class='text']/a")?.FirstOrDefault(x => x.InnerText == "下一节");

            if (nextLink != null)
            {
                string url = nextLink.GetAttributeValue("href", "");
                if (!string.IsNullOrEmpty(url) && url != "#")
                {
                    returndata.Add(CollectionFieldName.NextUrl, url);
                }
            }
            return(returndata);
        }

Beispiel #2

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span");

                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("全本小说网欢迎您！WWW.YZNN.COM T1706231537", "")
                             .Replace("F606121", "")
                             .Replace("全本小说网欢迎您！WWW.YZNN.COM", "");

                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            else
            {
                int i = 1;
                int j = 1 + 1;
            }
            return(returndata);
        }

Beispiel #3

0

Datei anzeigen

Datei: web_qqkanshu.cs Projekt: change008/boruinoveltools

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id=\"ccontent\"]");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "a");
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "")
                             .Replace("\t", "")
                             .Replace("領域文學首發地址httP://ｗｗｗ.ｌｉｎｇｙｕ.ｏｒｇ<br><br>", "")
                             .Replace("領域文學首發地址ｗｗｗ.ｌｉｎｇｙｕ.ｏｒｇ<br><br>　　<br><br>", "")
                             .Replace("请记住本书首发域名：http://www.lingyu.org&nbsp;&nbsp;领域文学手机版阅读网址： m.lingyu.org", "");

                string pattern = @"http://www.lingyu.org/\w+/\d+/\d+/\d+.html";
                tempString = Regex.Replace(tempString, pattern, "");
                returndata.Add(CollectionFieldName.Chap_Content, tempString);


                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString)
                                .Replace("&nbsp;", "")
                                .Replace("領域文學首發地址httP://ｗｗｗ.ｌｉｎｇｙｕ.ｏｒｇ<br><br>", "")
                                .Replace("領域文學首發地址ｗｗｗ.ｌｉｎｇｙｕ.ｏｒｇ<br><br>　　<br><br>", "")
                                .Replace("请记住本书首发域名：http://www.lingyu.org&nbsp;&nbsp;领域文学手机版阅读网址： m.lingyu.org", "");

                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempInnerText.Length / 500) * 3;
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #4

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script");
                tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "");
                returndata.Add(CollectionFieldName.Chap_Content, tempString);
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            HtmlNode nextLink = documentNode.SelectNodes("//*[@id='content']/div[@class='text']/a")?.FirstOrDefault(x => x.InnerText == "下一节");

            if (nextLink != null)
            {
                string url = nextLink.GetAttributeValue("href", "");
                if (!string.IsNullOrEmpty(url) && url != "#")
                {
                    returndata.Add(CollectionFieldName.NextUrl, url);
                }
            }
            return(returndata);
        }

Beispiel #5

0

Datei anzeigen

Datei: web_shumanwu.cs Projekt: change008/boruinoveltools

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@class=\"panel-body content-body content-ext\"]");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script");
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "").Replace("手机请访问：:feisuz", "")
                             .Replace("feisuz", "").Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "").Replace("feisuz", "")
                                .Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #6

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@class='readout']/div[@class='shuneirong']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span");

                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("免费小说", "")
                             .Replace("biquge5200.com", "")
                             .Replace("biquge5200", "")
                             .Replace("笔趣阁", "")
                             .Replace("http://", "")
                             .Replace("本书红薯网首发,请勿转载!", "");

                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 150)
                    {
                        into = tempInnerText.Substring(0, 150) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            else
            {
                int i = 1;
                int j = 1 + 1;
            }
            return(returndata);
        }

Beispiel #7

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage2(HtmlNode documentNode)
        {
            Hashtable returndata    = new Hashtable();
            HtmlNode  tempNode      = null;
            string    tempString    = null;
            string    tempInnerText = null;
            Regex     tempReg       = null;
            Match     tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@class='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "style", "script");
                returndata.Add(CollectionFieldName.Chap_Content, tempString);
                tempInnerText = tempNode.InnerText;
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempString.Length);
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Pay);
            }
            return(returndata);
        }

Beispiel #8

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            Hashtable returndata    = new Hashtable();
            HtmlNode  tempNode      = null;
            string    tempString    = null;
            string    tempInnerText = null;
            Regex     tempReg       = null;
            Match     tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@class='cDetail']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "style", "script");
                returndata.Add(CollectionFieldName.Chap_Content, tempString);
                tempInnerText = tempNode.InnerText;
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
            }
            return(returndata);
        }

Beispiel #9

0

Datei anzeigen

Datei: web_wanben.cs Projekt: change008/noveltools

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@class='articleCon']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt", "a");
                tempString = tempString.ToLower();
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("本站访问地址http://www.ziyouge.com 任意搜索引擎内输入:紫幽阁 即可访问!", "")
                             .Replace("http://www.ziyouge.com", "")
                             .Replace("紫幽阁", "")
                             .Replace("wanben.me", "")
                             .Replace("ziyouge.com", "")
                             .Replace("ziyouge", "")
                             .Replace("http://", "")
                             .Replace("http", "")
                             .Replace("紫Ｙou阁 ＷwＷ.ZiyouＧＥ.com", "")
                             .Replace("WWw.ZiyoUgE.com", "")
                             .Replace("品书网", "")
                             .Replace("www.vodtw.com", "")
                             .Replace("本书来自", "")
                             .Replace("/html/book/19/19092/", "")
                ;

                //正则替换域名
                string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+";
                tempString = Regex.Replace(tempString, pattern, "");


                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "").Replace("feisuz", "")
                                .Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #10

0

Datei anzeigen

Datei: web_23us.cs Projekt: change008/noveltools

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt", "a");
                tempString = tempString.ToLower();
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("本站访问地址http://www.ziyouge.com 任意搜索引擎内输入:紫幽阁 即可访问!", "")
                             .Replace("http://www.ziyouge.com", "")
                             .Replace("紫幽阁", "")
                             .Replace("wanben.me", "")
                             .Replace("ziyouge.com", "")
                             .Replace("ziyouge", "")
                             .Replace("http://", "")
                             .Replace("http", "")
                             .Replace("紫Ｙou阁 ＷwＷ.ZiyouＧＥ.com", "")
                             .Replace("WWw.ZiyoUgE.com", "")
                             .Replace("品书网", "")
                             .Replace("www.vodtw.com", "")
                             .Replace("本书来自", "")
                             .Replace("/html/book/19/19092/", "")
                             .Replace("大家想继续看我的书，可以加我微信gdy3208新书出了，我会第一时间发动态通知大家！", "")
                             .Replace("本站重要通知:请使用本站的免费小说app,无广告、破防盗版、更新快,会员同步书架,请关注微信公众号 appxsyd (按住三秒复制) 下载免费阅读器!!", "")
                             .Replace("本站重要通知: 请使用本站的免费小说app,无广告、破防盗版、更新快,会员同步书架,请关注微信公众号 gegegengxin (按住三秒复制)下载免费阅读器!!", "")
                             .Replace("本站重要通知:", "")
                             .Replace("请使用本站的免费小说", "")
                             .Replace("app", "")
                             .Replace("无广告、破防盗版、更新快,会员同步书架", "")
                             .Replace("请关注微信公众号", "")
                             .Replace("appxsyd", "")
                             .Replace("gegegengxin", "")
                             .Replace("(按住三秒复制)", "")
                             .Replace("(按住三秒复制)", "")
                             .Replace("下载免费阅读器", "")
                ;

                //正则替换域名
                string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+";
                tempString = Regex.Replace(tempString, pattern, "");

                string pattern1 = @"&lt;.+&gt;";
                tempString = Regex.Replace(tempString, pattern1, "");

                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "").Replace("feisuz", "")
                                .Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #11

0

Datei anzeigen

Datei: web_shubao520.cs Projekt: change008/noveltools

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='booktext']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt", "a");
                tempString = tempString.ToLower();
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "");

                tempString = tempString.Replace("【快速查找本站请百度搜索:&nbsp;书包520】", "")
                             .Replace("【本站域名更改为“&nbsp;www.shubao520.net&nbsp;”&nbsp;,或者在百度搜索:&nbsp;书包520】", "");

                tempString = tempString.Replace("【本站域名更改为“www.shubao520.net”,或者在百度搜索:书包520】", "")
                             .Replace("【快速查找本站请百度搜索:书包520】", "")
                             .Replace("书包520", "")
                             .Replace("www.shubao520.net", "")
                             .Replace("百度", "")
                             .Replace("搜索", "")
                             .Replace("域名", "");



                //正则替换域名
                string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+";
                tempString = Regex.Replace(tempString, pattern, "");

                string pattern1 = @"&lt;.+&gt;";
                tempString = Regex.Replace(tempString, pattern1, "");

                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "").Replace("feisuz", "")
                                .Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempInnerText.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #12

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@class='messagecontent']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script");

                tempString = tempString.Replace("\r\n", "").Replace("\t", "");

                //正则替换
                string pattern = @"www\.[a-zA-Z0-9]+\.(?:com|cn|net|org)/(\w+|/)+\.html";
                tempString = Regex.Replace(tempString, pattern, "");

                string pattern1 = @"www\.[a-zA-Z0-9]+\.(?:com|cn|net|org)/(\w+|/)+";
                tempString = Regex.Replace(tempString, pattern1, "");

                string pattern2 = @"www\.[a-zA-Z0-9]+\.(?:com|cn|net|org)/?";
                tempString = Regex.Replace(tempString, pattern2, "");

                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #13

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id=\"content\"]");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "a");
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "")
                             .Replace("\t", "")
                             .Replace("手机请访问：:feisuz", "")
                             .Replace("feisuz", "")
                             .Replace("作者的话:", "")
                             .Replace("新书，求收藏求推荐", "")
                             .Replace("本书红薯网首发,请勿转载!", "")
                             .Replace("老铁!还在找\"美艳冥妻\"免费小说?", "")
                             .Replace("&nbsp;&nbsp;&nbsp;&nbsp;(www.yikanxiaoshuo.com = ", "").Trim().TrimEnd(')')
                             .Replace("<br>&nbsp;&nbsp;&nbsp;&nbsp;<br>&nbsp;&nbsp;&nbsp;&nbsp;百度直接搜索: \"易看小说\" 看免费小说,没毛病!<br>", "")
                             .Replace("老铁!还在找\"绝望游戏\"免费小说?", "")
                             .Replace("百度直接搜索: \"易看小说\" 看免费小说,没毛病!", "")
                             .Replace("\"易看小说\"", "")
                             .Replace("易看小说", "")
                             .Replace("免费小说", "")
                             .Replace("(更快免费阅读加微信：jxxs9966)", "")
                             .Replace("jxxs9966", "")
                ;

                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");

                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #14

0

Datei anzeigen

Datei: web_52bqg.cs Projekt: change008/boruinoveltools

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "dt");
                //tempString = HTMLUtil.RemoveHtmlTag(tempString, "p", "img", "br");
                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("本站访问地址http://www.ziyouge.com 任意搜索引擎内输入:紫幽阁 即可访问!", "")
                             .Replace("http://www.ziyouge.com", "")
                             .Replace("紫幽阁", "")
                             .Replace("www.ziyouge.com", "")
                             .Replace("ziyouge.com", "")
                             .Replace("ziyouge", "")
                             .Replace("http://", "")
                             .Replace("http", "")
                             .Replace("紫Ｙou阁 ＷwＷ.ZiyouＧＥ.com", "")
                             .Replace("WWw.ZiyoUgE.com", "")
                             .Replace("一秒记住【笔♂趣→阁 WWW.52BQG.COM】，精彩小说无弹窗免费阅读！", "")
                             .Replace("WWW.52BQG.COM", "")
                             .Replace("一秒记住", "")
                             .Replace("精彩小说无弹窗免费阅读！", "")
                             .Replace("给大家推荐一个公众号，搜索“春话秋阅”或者“yigewuquderen”(一个无趣的人的拼音），是我和基友做的，里面有基友写的段子、短篇小文章、乐评之类的，以及我写的关于本书的一些事情、番外和一些有意思的故事，还可能有提前发新章节，当然不会很多。可能会在公众号开本新书，不长，完全免费，可能会开。大家可以关注一下，到时候加更。", "")


                ;

                //正则替换域名
                string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+";
                tempString = Regex.Replace(tempString, pattern, "");


                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "").Replace("feisuz", "")
                                .Replace("作者的话:", "").Replace("新书，求收藏求推荐", "").Replace("本书红薯网首发,请勿转载!", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            return(returndata);
        }

Beispiel #15

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span");

                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("一秒记住【爱看小说网www.akxs6.com】，为您提供精彩小说阅读。", "")
                             .Replace("PS：作者群：561371881，新书发布，求收藏，求打赏！您的支持，是我写作的动力！", "")
                             .Replace("另：上架十万字更新！", "")
                             .Replace("手机用户请浏览m.akxs6.com阅读，更优质的阅读体验来自爱看小说网。", "")
                ;


                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            else
            {
                int i = 1;
                int j = 1 + 1;
            }
            return(returndata);
        }

Beispiel #16

0

Datei anzeigen

        /// <summary>
        /// 解析明细页内容
        /// </summary>
        /// <param name="documentNode"></param>
        /// <returns></returns>
        private Hashtable parseDetailPage1(HtmlNode documentNode)
        {
            List <string> multipage     = null;
            Hashtable     returndata    = new Hashtable();
            HtmlNode      tempNode      = null;
            string        tempString    = null;
            string        tempInnerText = null;
            Regex         tempReg       = null;
            Match         tempMatch     = null;

            tempNode = documentNode.SelectSingleNode("//div[@id='content']");
            if (tempNode != null)
            {
                tempString = tempNode.InnerHtml;
                tempString = HTMLUtil.RemoveHtmlContent(tempString, "div", "style", "script", "center", "span");

                tempString = tempString.Replace("\r\n", "").Replace("\t", "")
                             .Replace("最快更新无错小说阅读，请访问www.feizw.com", "")
                             .Replace("手机请访问：http://m.feizw.com", "");



                //正则替换域名
                string pattern = @"(?=.{3,255}$)[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+";
                tempString = Regex.Replace(tempString, pattern, "");



                returndata.Add(CollectionFieldName.Chap_Content, tempString);

                //移除无效字符,用来计算长度
                tempInnerText = HTMLUtil.RemoveHtmlTag(tempString).Replace("&nbsp;", "");
                if (!string.IsNullOrEmpty(tempInnerText))
                {
                    returndata.Add(CollectionFieldName.Chap_ContentLen, tempInnerText.Length);
                    string into = "";
                    if (tempInnerText.Length > 40)
                    {
                        into = tempInnerText.Substring(0, 40) + "...";
                    }
                    else
                    {
                        into = tempInnerText;
                    }
                    returndata.Add(CollectionFieldName.Chap_Intro, into);
                    int price = (tempString.Length / 1000) * 5;
                    if (price == 0)
                    {
                        price = 5;
                    }
                    if (price > 15)
                    {
                        price = 15;
                    }
                    returndata.Add(CollectionFieldName.Chap_Pirce, price);
                }
                returndata.Add(CollectionFieldName.Chap_Status, ChapterStatus.ChapterStatus_OnLine);
                returndata.Add(CollectionFieldName.Chap_ChapterType, ChapterType.ChapterType_Free);
                tempInnerText = tempNode.InnerText;
            }
            else
            {
                int i = 1;
                int j = 1 + 1;
            }
            return(returndata);
        }

C# (CSharp) HTMLUtil.RemoveHtmlContent Beispiele