Exemplo n.º 1
0
        /// <summary>
        /// 从在线网页提取数据
        /// </summary>
        /// <param name="i"></param>
        private static void CrawlerMovieInfoFromOnline(string indexURL, int movieType)
        {
            var newMovieCount = 0;
            var htmlDoc       = HTTPHelper.GetHTMLByURL(indexURL);
            var dom           = htmlParser.Parse(htmlDoc);

            dom.QuerySelector("div.co_content8")
            ?.QuerySelectorAll("a")
            .Where(a => a.GetAttribute("href").StartsWith("/i/"))
            .ForEach(a =>
            {
                var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href");
                if (!MovieDataContent.Movies.Any(mo => mo.OnlineUrl == onlineURL))
                {
                    var movieInfo = GetMovieInfoFromURL(onlineURL);
                    if (movieInfo != null)
                    {
                        movieInfo.MovieType = MovieType.Latest;
                        MovieDataContent.Movies.Add(movieInfo);
                        newMovieCount++;
                    }
                }
            });
            MovieDataContent.SaveChanges();
            LogHelper.Info($"Finish Dy2018 Crawl {movieType.ToString()}MovieInfo,New Data Count:{newMovieCount},IndexURL:{indexURL}");
        }
Exemplo n.º 2
0
        private static string GetHTMLByHTTPWebRequest(string indexURL)
        {
            HttpWebRequest httpWebRequest = WebRequest.CreateHttp(indexURL);

            AddCookies(httpWebRequest);
            var html = HTTPHelper.GetHTML(httpWebRequest);

            return(html);
        }
Exemplo n.º 3
0
 private static string GetHTMLOnJumpWebPage(string htmlDoc)
 {
     if (htmlDoc.Contains("window.location"))
     {
         var tempDom   = htmlParser.Parse(htmlDoc);
         var scriptDom = tempDom.QuerySelector("script");
         var tempURL   = "http://www.dy2018.com" + scriptDom.InnerHtml.Replace("window.location=", "")
                         .Replace("+", "").Replace("\"", "").Replace(" ", "").Replace(";", "");
         htmlDoc = HTTPHelper.GetHTMLByURL(tempURL);
         LogHelper.Info($"GetHTML From JumpURL {(string.IsNullOrEmpty(htmlDoc) ? "Success" : "Fail")}!,the URL:{tempURL}");
     }
     //LogHelper.Info(htmlDoc);
     return(htmlDoc);
 }
Exemplo n.º 4
0
 public static void CrawlHostMovieInfo()
 {
     Task.Factory.StartNew(() =>
     {
         try
         {
             #region
             int newMovieCount = 0;
             var indexURL      = "http://www.btdytt520.com/movie/";
             var html          = HTTPHelper.GetHTMLByURL(indexURL, true);
             if (string.IsNullOrEmpty(html))
             {
                 return;
             }
             var htmlDom = htmlParser.Parse(html);
             htmlDom.QuerySelector("div.index_Sidebar_cc")
             .QuerySelectorAll("a")
             .ForEach(a =>
             {
                 var onlineURL = "http://www.btdytt520.com" + a.GetAttribute("href");
                 if (!MovieDataContent.Movies.Any(mo => mo.OnlineUrl == onlineURL))
                 {
                     var movieInfo = GetMovieInfoURL(onlineURL);
                     if (movieInfo != null)
                     {
                         movieInfo.MovieType = MovieType.Latest;
                         MovieDataContent.Movies.Add(movieInfo);
                         newMovieCount++;
                     }
                 }
             });
             MovieDataContent.SaveChanges();
             LogHelper.Info($"Finish Btdytt520 CrawlHostMovieInfo,New Data Count:{newMovieCount}");
             #endregion
         }
         catch (Exception ex)
         {
             LogHelper.Error("Btdytt520 CrawlHostMovieInfo Exception", ex);
         }
     });
 }
Exemplo n.º 5
0
 /// <summary>
 /// 从在线网页提取电影数据
 /// </summary>
 /// <param name="onlineURL"></param>
 /// <returns></returns>
 private static MovieInfo GetMovieInfoFromURL(string onlineURL)
 {
     try
     {
         var movieHTML = HTTPHelper.GetHTMLByURL(onlineURL);
         if (string.IsNullOrEmpty(movieHTML))
         {
             return(null);
         }
         var movieDoc       = htmlParser.Parse(movieHTML);
         var zoom           = movieDoc.GetElementById("Zoom");
         var lstDownLoadURL = movieDoc.QuerySelectorAll("[bgcolor='#fdfddf']");
         var updatetime     = movieDoc.QuerySelector("span.updatetime");
         var pubDate        = DateTime.Now;
         if (!string.IsNullOrEmpty(updatetime?.TextContent))
         {
             DateTime.TryParse(updatetime.TextContent.Replace("发布时间:", ""), out pubDate);
         }
         var lstURL    = lstDownLoadURL.Select(a => a.QuerySelector("a")?.TextContent ?? "");
         var movieName = movieDoc.QuerySelector("div.title_all")?.QuerySelector("h1");
         var movieInfo = new MovieInfo()
         {
             MovieName       = movieName.TextContent ?? "找不到影片信息...",
             OnlineUrl       = onlineURL,
             MovieIntro      = zoom?.TextContent ?? "暂无介绍...",
             DownLoadURLList = string.Join(";", lstURL),
             PubDate         = pubDate.Date,
             DataCreateTime  = DateTime.Now,
             SoureceDomain   = SoureceDomainConsts.Dy2018Domain,
             //MovieType=(int)MovieTypeEnum.Latest
         };
         return(movieInfo);
     }
     catch (Exception ex)
     {
         LogHelper.Error("Dy2018 GetMovieInfoFromURL Exception", ex, new { OnloneURL = onlineURL });
         return(null);
     }
 }
Exemplo n.º 6
0
 /// <summary>
 /// 爬取数据
 /// </summary>
 public static void CrawlHotMovie()
 {
     Task.Factory.StartNew(() =>
     {
         try
         {
             var newMovieCount = 0;
             LogHelper.Info("Dy2018 CrawlHotMovie Start...");
             var htmlDoc = HTTPHelper.GetHTMLByURL("http://www.dy2018.com/");
             htmlDoc     = GetHTMLOnJumpWebPage(htmlDoc);
             var dom     = htmlParser.Parse(htmlDoc);
             dom.QuerySelectorAll("div.co_content222")
             ?.Take(3)
             .Select(divInfo => divInfo.QuerySelectorAll("a").Where(a => a.GetAttribute("href").StartsWith("/i/")))
             .Aggregate((IEnumerable <IElement> a, IEnumerable <IElement> b) => a.Concat(b))
             .ForEach(a =>
             {
                 var onlineURL = "http://www.dy2018.com" + a.GetAttribute("href");
                 if (!MovieDataContent.Movies.Any(mo => mo.OnlineUrl == onlineURL))
                 {
                     var movieInfo = GetMovieInfoFromURL(onlineURL);
                     if (movieInfo != null)
                     {
                         movieInfo.MovieType = MovieType.Latest;
                         MovieDataContent.Movies.Add(movieInfo);
                         newMovieCount++;
                     }
                 }
             });
             MovieDataContent.SaveChanges();
             LogHelper.Info($"Finish Dy2018 CrawlHotMovie,New Data Count:{newMovieCount}");
         }
         catch (Exception ex)
         {
             LogHelper.Error("Dy2018 CrawlHotMovie Exception", ex);
         }
     });
 }