public static void ParseUrl(List <VideoData> vdList, string Url) { try { string url = ""; string html = Http.Downloader.Download(Url); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); List <string> liststring = XpathUtil.GetAttributes(hn.DocumentNode, "//p[@class='treeTitle']/a", "href"); foreach (var item in liststring) { if (!item.Contains("http://bbs1.people.com")) { url = "http://bbs1.people.com.cn" + item; } else { url = item; } Uri uri = new Uri(url); GetNeedData(uri, vdList); } } catch (Exception e) { Console.WriteLine(e.ToString()); } }
private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList) { VideoData vd = new VideoData(); string html = Http.Downloader.Download(uri.AbsoluteUri); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); vd.Url = uri.AbsoluteUri; vd.Title = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-title']"); vd.Author = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-posted']/span[1]/a"); vd.Time = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-posted']/text()").Replace(" 于 ", "").Replace(" 发布在 ", ""); vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-cont']").Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace(" ", ""); vd.Source = "凯迪社区"; vdList.Add(vd); return(vdList); }
private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList) { VideoData vd = new VideoData(); string html = Http.Downloader.Download(uri.AbsoluteUri); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); vd.Url = uri.AbsoluteUri; vd.Title = XpathUtil.GetText(hn.DocumentNode, "//h1[@class='c333 subTitle']").Replace(" ", ""); vd.Author = XpathUtil.GetText(hn.DocumentNode, "//div[@class='r-landlordMsg p20 clearfix']/div[2]/a"); vd.Time = XpathUtil.GetText(hn.DocumentNode, "//span[@class='c999 mr15']"); vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='article-cont p20 c666']").Replace("\r", "").Replace("\n", ""); vd.Source = "猫扑社区"; vdList.Add(vd); return(vdList); }
private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList) { VideoData vd = new VideoData(); string html = Http.Downloader.Download(uri.AbsoluteUri); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); vd.Url = uri.AbsoluteUri; vd.Title = XpathUtil.GetText(hn.DocumentNode, "//h1[@class='atl-title']/span/span"); vd.Author = XpathUtil.GetText(hn.DocumentNode, "//div[@class='atl-info']/span/a"); vd.Time = XpathUtil.GetText(hn.DocumentNode, "//div[@class='atl-info']/span[2]").Replace("时间:", ""); vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='bbs-content clearfix']").Replace("\r", "").Replace("\n", "").Replace("\t", ""); vd.Source = "天涯社区"; vdList.Add(vd); return(vdList); }
private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList) { VideoData vd = new VideoData(); string html = Http.Downloader.Download(uri.AbsoluteUri); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); vd.Url = uri.AbsoluteUri; vd.Title = XpathUtil.GetText(hn.DocumentNode, "//td[@class='biaoti']"); vd.Author = XpathUtil.GetText(hn.DocumentNode, "//span[@class='zuozhe01']/a"); //vd.Time = XpathUtil.GetText(hn.DocumentNode, "//td[@class='zuozhe']"); vd.Time = RegexUtil.MatchText(XpathUtil.GetText(hn.DocumentNode, "//td[@class='zuozhe']"), "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); vd.Content = XpathUtil.GetText(hn.DocumentNode, "//td[@width='941']/p").Replace(" ", "").Replace("\r", "").Replace("\n", ""); vd.Source = "发展论坛"; vdList.Add(vd); return(vdList); }
private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList) { VideoData vd = new VideoData(); string html = Downloader.Download(uri.AbsoluteUri); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); vd.Url = uri.AbsoluteUri; //vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='l_post j_l_post l_post_bright noborder ']//cc"); vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='l_post j_l_post l_post_bright noborder ']//div[contains(@id,'post_content_')]|//div[@class='l_post l_post_bright j_l_post clearfix ']//div[contains(@id,'post_content_')]"); //vd.Title = XpathUtil.GetText(hn.DocumentNode, "//h1[@class='core_title_txt ']"); vd.Title = XpathUtil.GetText(hn.DocumentNode, "//h1|//div[@id='j_core_title_wrap']//h3"); //vd.Author = XpathUtil.GetText(hn.DocumentNode, "//li[@class='d_name']/a"); vd.Author = RegexUtil.RemoveNoise(XpathUtil.GetText(hn.DocumentNode, "//li[@class='d_name']"), "\\s"); //vd.Time = XpathUtil.GetText(hn.DocumentNode, "//div[@class='core_reply_tail ']//ul[@class='p_tail']"); vd.Time = RegexUtil.MatchText(html, "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); vd.Source = "百度贴吧"; if (string.IsNullOrEmpty(vd.Content)) { vd.Content = vd.Title; } else { vd.Content = vd.Content.Replace("", ""); } if (string.IsNullOrEmpty(vd.Time)) { vd.Time = RegexUtil.MatchText(html, "\"date\":\"(?<time>\\d+-\\d+-\\d+ \\d+:\\d+)[\\s\\S]+?floor\":\\d+,|"date":"(?<time>\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2})",", "time"); } vdList.Add(vd); return(vdList); }
private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList) { VideoData vd = new VideoData(); string html = Http.Downloader.Download(uri.AbsoluteUri); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); vd.Url = uri.AbsoluteUri; string contenturl = XpathUtil.GetAttribute(hn.DocumentNode, "//div[@class='article scrollFlag']", "content_path"); string content = Http.Downloader.Download(contenturl, Encoding.GetEncoding("UTF-8")); vd.Content = content; //vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='d_post_content j_d_post_content ']"); vd.Title = XpathUtil.GetText(hn.DocumentNode, "//div[@class='navBar']/h2"); vd.Author = XpathUtil.GetText(hn.DocumentNode, "//div[@class='clearfix']/a|//div[@class='clearfix']/span[@class='float_l']"); vd.Time = RegexUtil.MatchText(XpathUtil.GetText(hn.DocumentNode, "//span[@class='float_l mT10']"), "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}"); vd.Source = "强国论坛"; vdList.Add(vd); return(vdList); }
public static void ParseUrl(List <VideoData> vdList, string Url) { try { string url = ""; string html = Http.Downloader.Download(Url); HtmlDocument hn = new HtmlDocument(); hn.LoadHtml(html); List <string> liststring = XpathUtil.GetAttributes(hn.DocumentNode, "//h3/a", "href"); foreach (var item in liststring) { url = item; Uri uri = new Uri(url); GetNeedData(uri, vdList); } } catch (Exception e) { Console.WriteLine(e.ToString()); } }