示例#1
0
        /// <summary>
        /// 处理内容中的图片地址,如果是相对地址的则要加上地址前缀,构成完整的url
        /// </summary>
        /// <param name="contents"></param>
        /// <param name="prexUrl"></param>
        /// <returns></returns>
        public static string DealImgUrlPrex(string contents, string prexUrl)
        {
            var str = contents;

            try
            {
                //因为内容中的图片链接是相对地址,要改成绝对地址
                var imgList = XpathHelper.GetAttrValueListByXPath(str, "//img", "src");
                if (imgList != null && imgList.Count > 0)
                {
                    foreach (var img in imgList)
                    {
                        try
                        {
                            if (!img.Contains("http"))
                            {
                                var newUrl = prexUrl + img.Replace("./", "/");;
                                str = str.Replace(img, newUrl);
                            }
                        }
                        catch (Exception ex)
                        {
                        }
                    }
                }
            }
            catch (Exception ex)
            {
            }
            return(str);
        }
示例#2
0
        /// <summary>
        /// 主要处理内容里广告性质的文字及样式
        /// </summary>
        /// <param name="contents"></param>
        /// <returns></returns>
        public static List <DtoNewsMedia> GetImgList(string contents)
        {
            var str  = contents;
            var list = new List <DtoNewsMedia>();

            try
            {
                //提取内容中的图片链接
                var imgList = XpathHelper.GetAttrValueListByXPath(str, "//img", "src");
                if (imgList != null && imgList.Count > 0)
                {
                    var i = 0;
                    foreach (var img in imgList)
                    {
                        try
                        {
                            var model = new DtoNewsMedia()
                            {
                                Description    = "",
                                IsShow         = 1,
                                NewsId         = 0,
                                Orders         = i,
                                PicOriginalUrl = img,
                                PicUrl         = img,
                                ThumbnailUrl   = img
                            };
                            list.Add(model);
                        }
                        catch (Exception ex)
                        {
                        }
                        i++;
                    }
                }
            }
            catch (Exception ex)
            {
            }
            return(list);
        }
示例#3
0
        public DtoNews NewsGathering(string newsUrl)
        {
            try
            {
                var title   = "";
                var content = "";
                var pubTime = "";
                var from    = "";
                var author  = "";
                var picUrl  = "";

                var strNewContent = HttpHelper.GetContent(newsUrl, Encoding.GetEncoding("gb2312"));

                content = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='Cnt-Main-Article-QQ']", "");

                //从content里去除最下面的广告部分
                //var contentlast = StrHelper.GetStrByXPath(content, "//span[last()]", "");
                //content = content.Replace(contentlast, "");
                //content = Regex.Replace(content, contentlast, "", RegexOptions.IgnoreCase);


                //从content里去除最上面的分享部分
                //var contentfirst = StrHelper.GetStrByXPath(strNewContent, "//div[@class='tit-bar clearfix']", "");
                //content = content.Replace(contentfirst, "");
                //content = content.Replace("<div class='tit-bar clearfix' bosszone='titleDown'></div>","");
                content = content.Trim();

                pubTime = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//span[@class='article-time']", "");
                pubTime = StrHelper.FormatHtml(pubTime).Trim();
                from    = "腾讯佛学";
                //from = StrHelper.GetStrByXPath(strNewContent, "//span[@bosszone='jgname']/a", "");
                //from = StrHelper.FormatHtml(from);

                var picUrlList = XpathHelper.GetAttrValueListByXPath(content, "//img", "src");
                if (picUrlList != null && picUrlList.Count > 0)
                {
                    picUrl = picUrlList[0];
                }

                author = XpathHelper.GetInnerHtmlByXPath(strNewContent, "//div[@id='C-Main-Article-QQ']/div[1]/div/div[1]/span[5]", "");
                author = StrHelper.FormatHtml(author).Trim();


                //*[@id="Cnt-Main-Article-QQ"]/p/div[@r='1']
                content = DealContent(content);

                var news = new DtoNews
                {
                    Contents        = content,
                    Title           = title,
                    PubTime         = StrHelper.ToDateTime(pubTime),
                    FromUrl         = newsUrl,
                    FromSiteName    = from,
                    Author          = author,
                    CreateTime      = DateTime.Now,
                    IsShow          = 1,
                    LogoOriginalUrl = picUrl,
                    LogoUrl         = picUrl
                };
                return(news);
            }
            catch (Exception ex)
            {
                Log.Error(newsUrl + " 错误:" + ex.Message + ex.StackTrace);
            }
            return(null);
        }