コード例 #1
0
        public static void ParseUrl(List <VideoData> vdList, string Url)
        {
            try
            {
                string       url  = "";
                string       html = Http.Downloader.Download(Url);
                HtmlDocument hn   = new HtmlDocument();
                hn.LoadHtml(html);
                List <string> liststring = XpathUtil.GetAttributes(hn.DocumentNode, "//p[@class='treeTitle']/a", "href");
                foreach (var item in liststring)
                {
                    if (!item.Contains("http://bbs1.people.com"))
                    {
                        url = "http://bbs1.people.com.cn" + item;
                    }
                    else
                    {
                        url = item;
                    }
                    Uri uri = new Uri(url);

                    GetNeedData(uri, vdList);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
        }
コード例 #2
0
        private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList)
        {
            VideoData    vd   = new VideoData();
            string       html = Http.Downloader.Download(uri.AbsoluteUri);
            HtmlDocument hn   = new HtmlDocument();

            hn.LoadHtml(html);
            vd.Url     = uri.AbsoluteUri;
            vd.Title   = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-title']");
            vd.Author  = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-posted']/span[1]/a");
            vd.Time    = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-posted']/text()").Replace(" 于  ", "").Replace(" 发布在 ", "");
            vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='posts-cont']").Replace("\r", "").Replace("\n", "").Replace("\t", "").Replace("&nbsp;", "");
            vd.Source  = "凯迪社区";
            vdList.Add(vd);
            return(vdList);
        }
コード例 #3
0
ファイル: Mop.cs プロジェクト: AnonymousDotNet/MetaSearch
        private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList)
        {
            VideoData    vd   = new VideoData();
            string       html = Http.Downloader.Download(uri.AbsoluteUri);
            HtmlDocument hn   = new HtmlDocument();

            hn.LoadHtml(html);
            vd.Url     = uri.AbsoluteUri;
            vd.Title   = XpathUtil.GetText(hn.DocumentNode, "//h1[@class='c333 subTitle']").Replace("&nbsp;", "");
            vd.Author  = XpathUtil.GetText(hn.DocumentNode, "//div[@class='r-landlordMsg p20 clearfix']/div[2]/a");
            vd.Time    = XpathUtil.GetText(hn.DocumentNode, "//span[@class='c999 mr15']");
            vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='article-cont p20 c666']").Replace("\r", "").Replace("\n", "");
            vd.Source  = "猫扑社区";
            vdList.Add(vd);
            return(vdList);
        }
コード例 #4
0
ファイル: TianYa.cs プロジェクト: AnonymousDotNet/MetaSearch
        private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList)
        {
            VideoData    vd   = new VideoData();
            string       html = Http.Downloader.Download(uri.AbsoluteUri);
            HtmlDocument hn   = new HtmlDocument();

            hn.LoadHtml(html);
            vd.Url = uri.AbsoluteUri;

            vd.Title   = XpathUtil.GetText(hn.DocumentNode, "//h1[@class='atl-title']/span/span");
            vd.Author  = XpathUtil.GetText(hn.DocumentNode, "//div[@class='atl-info']/span/a");
            vd.Time    = XpathUtil.GetText(hn.DocumentNode, "//div[@class='atl-info']/span[2]").Replace("时间:", "");
            vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='bbs-content clearfix']").Replace("\r", "").Replace("\n", "").Replace("\t", "");
            vd.Source  = "天涯社区";
            vdList.Add(vd);
            return(vdList);
        }
コード例 #5
0
        private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList)
        {
            VideoData    vd   = new VideoData();
            string       html = Http.Downloader.Download(uri.AbsoluteUri);
            HtmlDocument hn   = new HtmlDocument();

            hn.LoadHtml(html);
            vd.Url    = uri.AbsoluteUri;
            vd.Title  = XpathUtil.GetText(hn.DocumentNode, "//td[@class='biaoti']");
            vd.Author = XpathUtil.GetText(hn.DocumentNode, "//span[@class='zuozhe01']/a");
            //vd.Time = XpathUtil.GetText(hn.DocumentNode, "//td[@class='zuozhe']");
            vd.Time = RegexUtil.MatchText(XpathUtil.GetText(hn.DocumentNode, "//td[@class='zuozhe']"), "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}");


            vd.Content = XpathUtil.GetText(hn.DocumentNode, "//td[@width='941']/p").Replace("&nbsp;", "").Replace("\r", "").Replace("\n", "");
            vd.Source  = "发展论坛";
            vdList.Add(vd);
            return(vdList);
        }
コード例 #6
0
        private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList)
        {
            VideoData    vd   = new VideoData();
            string       html = Downloader.Download(uri.AbsoluteUri);
            HtmlDocument hn   = new HtmlDocument();

            hn.LoadHtml(html);


            vd.Url = uri.AbsoluteUri;

            //vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='l_post j_l_post l_post_bright noborder ']//cc");
            vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='l_post j_l_post l_post_bright noborder ']//div[contains(@id,'post_content_')]|//div[@class='l_post l_post_bright j_l_post clearfix  ']//div[contains(@id,'post_content_')]");

            //vd.Title = XpathUtil.GetText(hn.DocumentNode, "//h1[@class='core_title_txt  ']");
            vd.Title = XpathUtil.GetText(hn.DocumentNode, "//h1|//div[@id='j_core_title_wrap']//h3");

            //vd.Author = XpathUtil.GetText(hn.DocumentNode, "//li[@class='d_name']/a");
            vd.Author = RegexUtil.RemoveNoise(XpathUtil.GetText(hn.DocumentNode, "//li[@class='d_name']"), "\\s");

            //vd.Time = XpathUtil.GetText(hn.DocumentNode, "//div[@class='core_reply_tail ']//ul[@class='p_tail']");
            vd.Time = RegexUtil.MatchText(html, "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}");

            vd.Source = "百度贴吧";

            if (string.IsNullOrEmpty(vd.Content))
            {
                vd.Content = vd.Title;
            }
            else
            {
                vd.Content = vd.Content.Replace("&#xFFFF;", "");
            }

            if (string.IsNullOrEmpty(vd.Time))
            {
                vd.Time = RegexUtil.MatchText(html, "\"date\":\"(?<time>\\d+-\\d+-\\d+ \\d+:\\d+)[\\s\\S]+?floor\":\\d+,|&quot;date&quot;:&quot;(?<time>\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2})&quot;,", "time");
            }

            vdList.Add(vd);
            return(vdList);
        }
コード例 #7
0
        private static List <VideoData> GetNeedData(Uri uri, List <VideoData> vdList)
        {
            VideoData    vd   = new VideoData();
            string       html = Http.Downloader.Download(uri.AbsoluteUri);
            HtmlDocument hn   = new HtmlDocument();

            hn.LoadHtml(html);
            vd.Url = uri.AbsoluteUri;
            string contenturl = XpathUtil.GetAttribute(hn.DocumentNode, "//div[@class='article scrollFlag']", "content_path");
            string content    = Http.Downloader.Download(contenturl, Encoding.GetEncoding("UTF-8"));

            vd.Content = content;
            //vd.Content = XpathUtil.GetText(hn.DocumentNode, "//div[@class='d_post_content j_d_post_content ']");
            vd.Title  = XpathUtil.GetText(hn.DocumentNode, "//div[@class='navBar']/h2");
            vd.Author = XpathUtil.GetText(hn.DocumentNode, "//div[@class='clearfix']/a|//div[@class='clearfix']/span[@class='float_l']");
            vd.Time   = RegexUtil.MatchText(XpathUtil.GetText(hn.DocumentNode, "//span[@class='float_l mT10']"), "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}");
            vd.Source = "强国论坛";
            vdList.Add(vd);
            return(vdList);
        }
コード例 #8
0
ファイル: TianYa.cs プロジェクト: AnonymousDotNet/MetaSearch
        public static void ParseUrl(List <VideoData> vdList, string Url)
        {
            try
            {
                string       url  = "";
                string       html = Http.Downloader.Download(Url);
                HtmlDocument hn   = new HtmlDocument();
                hn.LoadHtml(html);
                List <string> liststring = XpathUtil.GetAttributes(hn.DocumentNode, "//h3/a", "href");
                foreach (var item in liststring)
                {
                    url = item;
                    Uri uri = new Uri(url);

                    GetNeedData(uri, vdList);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
        }