コード例 #1
0
        public static MovieInfo GetMovieInfoByOnlineURL(string onlineURL, bool isContainIntro = false)
        {
            var html = HTTPHelper.GetHTMLByURL(onlineURL);

            if (string.IsNullOrEmpty(html))
            {
                return(null);
            }
            var htmlDom   = htmlParser.Parse(html);
            var nameDom   = htmlDom.QuerySelector("h1.font14");
            var introDom  = htmlDom.QuerySelector("div.Drama_c");
            var infoTable = htmlDom.QuerySelectorAll("tr.CommonListCell");
            var pubDate   = DateTime.Now;

            if (infoTable != null && infoTable.Length > 2)
            {
                if (infoTable[1] != null && !string.IsNullOrEmpty(infoTable[1].TextContent))
                {
                    DateTime.TryParse(infoTable[1].TextContent.Replace("发布时间", "").Replace("\n", ""), out pubDate);
                }
            }
            return(new MovieInfo()
            {
                MovieName = nameDom != null ? nameDom.InnerHtml : "获取名称失败...",
                Dy2018OnlineUrl = onlineURL,
                MovieIntro = introDom != null && isContainIntro ? introDom.InnerHtml : "",
                PubDate = pubDate,
            });
        }
コード例 #2
0
        public static void CrawlHotClickMovieInfo(int endIndex = 10)
        {
            Task.Factory.StartNew(() =>
            {
                try
                {
                    LogHelper.Info("CrawlHotClickMovieInfo Start...");
                    for (int index = 1; index <= endIndex; index++)
                    {
                        var indexURL = $"http://www.btdytt520.com/hotclick/p_{index}.html";
                        var html     = HTTPHelper.GetHTMLByURL(indexURL);
                        if (string.IsNullOrEmpty(html))
                        {
                            return;
                        }
                        var htmlDom = htmlParser.Parse(html);
                        foreach (var li in htmlDom.QuerySelectorAll("li.newsli"))
                        {
                            var aDom = li.QuerySelectorAll("a").FirstOrDefault(a => !string.IsNullOrEmpty(a.GetAttribute("target")));
                            if (aDom == null)
                            {
                                continue;
                            }
                            var onlineURL = "http://www.btdytt520.com/" + aDom.GetAttribute("href");
                            if (hotMoviceHelper.IsContainsMoive(onlineURL) || li.QuerySelector("li.phlidate") == null)
                            {
                                continue;
                            }
                            var pubDate = DateTime.Now;
                            DateTime.TryParse(li.QuerySelector("li.phlidate").InnerHtml, out pubDate);
                            hotMoviceHelper.AddToMovieDic(new MovieInfo()
                            {
                                Dy2018OnlineUrl = onlineURL,
                                MovieName       = aDom.InnerHtml,
                                PubDate         = pubDate,
                            });
                        }
                    }

                    LogHelper.Info("CrawlHotClickMovieInfo Finish.");
                }
                catch (Exception ex)
                {
                    LogHelper.Error("CrawlHotClickMovieInfo Exception", ex);
                    LogHelper.Info("CrawlHotClickMovieInfo Finish.");
                }
            });
        }
コード例 #3
0
        public static void CrawlLatestMovieInfo()
        {
            var indexURL = "http://www.btdytt520.com/movie/";
            var html     = HTTPHelper.GetHTMLByURL(indexURL);

            if (string.IsNullOrEmpty(html))
            {
                return;
            }
            var htmlDom  = htmlParser.Parse(html);
            var divMovie = htmlDom.QuerySelector("div.index_Sidebar_cc");
            var lstMovie = divMovie.QuerySelectorAll("a").Select(a => new MovieInfo()
            {
                Dy2018OnlineUrl = "http://www.btdytt520.com/" + a.GetAttribute("href"),
                MovieName       = a.InnerHtml
            }).ToList();
        }
コード例 #4
0
        /// <summary>
        /// 爬取数据
        /// </summary>
        public static void CrawlHotMovie()
        {
            Task.Factory.StartNew(() =>
            {
                try
                {
                    LogHelper.Info("CrawlHotMovie Start...");
                    var htmlDoc    = HTTPHelper.GetHTMLByURL("http://www.dy2018.com/");
                    var dom        = htmlParser.Parse(htmlDoc);
                    var lstDivInfo = dom.QuerySelectorAll("div.co_content222");
                    if (lstDivInfo != null)
                    {
                        //前三个DIV为新电影
                        foreach (var divInfo in lstDivInfo.Take(3))
                        {
                            divInfo.QuerySelectorAll("a").Where(a => a.GetAttribute("href").Contains("/i/")).ToList().ForEach(
                                a =>
                            {
                                var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href");
                                if (!hotMovieList.IsContainsMoive(onlineURL))
                                {
                                    MovieInfo movieInfo = Dy2018MoviceInfoHelper.GetMovieInfoFromOnlineURL(onlineURL);
                                    if (movieInfo != null && movieInfo.XunLeiDownLoadURLList != null && movieInfo.XunLeiDownLoadURLList.Count != 0)
                                    {
                                        hotMovieList.AddToMovieDic(movieInfo);
                                    }
                                }
                            });
                        }
                    }

                    LogHelper.Info("CrawlHotMovie Finish...");
                }
                catch (Exception ex)
                {
                    LogHelper.Error("CrawlHotMovie Exception", ex);
                }
            });
        }
コード例 #5
0
        public static void CrawlHostMovieInfo()
        {
            var indexURL = "http://www.btdytt520.com/movie/";
            var html     = HTTPHelper.GetHTMLByURL(indexURL);

            if (string.IsNullOrEmpty(html))
            {
                return;
            }
            var htmlDom  = htmlParser.Parse(html);
            var divMovie = htmlDom.QuerySelector("div.index_Sidebar_cc");

            divMovie.QuerySelectorAll("a").Select(a => a).ToList().ForEach(
                a =>
            {
                var aURL = "http://www.btdytt520.com" + a.GetAttribute("href");
                if (!hotMoviceHelper.IsContainsMoive(aURL))
                {
                    hotMoviceHelper.AddToMovieDic(Btdytt520Helper.GetMovieInfoByOnlineURL(aURL));
                }
            });
        }
コード例 #6
0
        /// <summary>
        /// 从在线网页提取电影数据
        /// </summary>
        /// <param name="onlineURL"></param>
        /// <returns></returns>
        public static MovieInfo GetMovieInfoFromOnlineURL(string onlineURL, bool isContainIntro = false)
        {
            try
            {
                var movieHTML = HTTPHelper.GetHTMLByURL(onlineURL);
                if (string.IsNullOrEmpty(movieHTML))
                {
                    return(null);
                }
                var movieDoc = htmlParser.Parse(movieHTML);
                var zoom = movieDoc.GetElementById("Zoom");
                var lstDownLoadURL = movieDoc.QuerySelectorAll("[bgcolor='#fdfddf']");
                var updatetime = movieDoc.QuerySelector("span.updatetime"); var pubDate = DateTime.Now;
                if (updatetime != null && !string.IsNullOrEmpty(updatetime.InnerHtml))
                {
                    DateTime.TryParse(updatetime.InnerHtml.Replace("发布时间:", ""), out pubDate);
                }
                var lstOnlineURL = lstDownLoadURL.Select(a => a.QuerySelector("a")).Where(item => item != null).Select(item => item.InnerHtml).ToList();

                var movieName = movieDoc.QuerySelector("div.title_all");

                var movieInfo = new MovieInfo()
                {
                    MovieName = movieName != null && movieName.QuerySelector("h1") != null?
                                movieName.QuerySelector("h1").InnerHtml : "找不到影片信息...",
                    Dy2018OnlineUrl       = onlineURL,
                    MovieIntro            = zoom != null && isContainIntro?WebUtility.HtmlEncode(zoom.InnerHtml) : "暂无介绍...",
                    XunLeiDownLoadURLList = lstOnlineURL,
                    PubDate = pubDate,
                };
                return(movieInfo);
            }
            catch (Exception ex)
            {
                LogHelper.Error("GetMovieInfoFromOnlineURL Exception", ex, new { OnloneURL = onlineURL });
                return(null);
            }
        }
コード例 #7
0
        /// <summary>
        /// 从在线网页提取数据
        /// </summary>
        /// <param name="i"></param>
        private static void FillMovieFromOnline(string indexURL)
        {
            var htmlDoc    = HTTPHelper.GetHTMLByURL(indexURL);
            var dom        = htmlParser.Parse(htmlDoc);
            var lstDivInfo = dom.QuerySelectorAll("div.co_content8");

            if (lstDivInfo != null)
            {
                lstDivInfo.FirstOrDefault().QuerySelectorAll("a").Where(a => a.GetAttribute("href").Contains("/i/")).ToList()
                .ForEach(a =>
                {
                    var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href");
                    if (!latestMovieList.IsContainsMoive(onlineURL))
                    {
                        MovieInfo movieInfo = Dy2018MoviceInfoHelper.GetMovieInfoFromOnlineURL(onlineURL);
                        if (movieInfo != null && movieInfo.XunLeiDownLoadURLList != null && movieInfo.XunLeiDownLoadURLList.Count != 0)
                        {
                            latestMovieList.AddToMovieDic(movieInfo);
                        }
                    }
                });
            }
        }