public async Task <List <ScrapedMovie> > ScrapeMovieShowings() { var forumCinemasUrl = "https://www.forumcinemas.lt/Movies/Vilnius"; var response = await _httpClient.GetAsync(forumCinemasUrl); var responseContent = await response.Content.ReadAsStringAsync(); var page = new HtmlDocument(); page.LoadHtml(responseContent); var scrapedMovies = new List <ScrapedMovie>(); var movieNodes = page.DocumentNode.SelectNodes("//td[contains(@class, 'result')]"); foreach (var movieNode in movieNodes) { if (string.IsNullOrWhiteSpace(movieNode.InnerHtml)) { continue; } var movieName = movieNode.SelectSingleNode(".//a[contains(@class, 'result_h4')]").InnerText.Trim(); var scrapedMovie = new ScrapedMovie() { //remove brackets about dubbing NormalizedMovieName = RemoveBracketsFromNameEnd(movieName).LowercaseAndRemoveWhitespace() }; var cinemaBlocks = movieNode.SelectNodes(".//div[contains(@id, 'showTimes')]/*"); foreach (var cinemaBlock in cinemaBlocks) { var cinemaName = cinemaBlock.SelectSingleNode(".//div[contains(@style, 'clear: left')]").InnerText.Trim(); var scrapedMovieShowing = new ScrapedMovieShowing() { //remove city name NormalizedCinemaNameWithoutCity = RemoveBracketsFromNameEnd(cinemaName).LowercaseAndRemoveWhitespace() }; var cinemaTimes = cinemaBlock.SelectNodes(".//li"); foreach (var cinemaTime in cinemaTimes) { scrapedMovieShowing.StartTimes.Add(cinemaTime.InnerText.Trim()); } scrapedMovie.MovieShowings.Add(scrapedMovieShowing); } scrapedMovies.Add(scrapedMovie); } return(scrapedMovies); }
private void UpdateUI(ScrapedMovie movie) { this.InvokeEx(() => { if (movie.Links.Count == 0) return; try { using (var db = new MovieFinderEntities()) { //check if movie already exists var dbMovie = db.Movies.FirstOrDefault(x => (x.UniqueID == movie.UniqueId)); var modified = false; if (dbMovie == null) //new movie { dbMovie = new Movie { CreateDate = DateTime.Now, Description = movie.Description, ImageUrl = movie.ImageUrl, LanguageCode = movie.LangCode, Name = movie.Name, ReleaseDate = movie.ReleasedDate, Version = NewDBVersion, VersionChange = 0, UniqueID = movie.UniqueId, ImageScrapperID = movie.Scraper.ID }; db.Movies.Add(dbMovie); } else { //get the previous scraper var prevScrapper = MovieDetailsScraperBase.Scrappers.First(x => x.ID == dbMovie.ImageScrapperID); if (String.IsNullOrWhiteSpace(dbMovie.Description) || (dbMovie.Description.Length < 50 && !String.IsNullOrWhiteSpace(movie.Description) && movie.Description.Length > 50)) { dbMovie.Description = movie.Description; modified = true; } if (!String.IsNullOrWhiteSpace(movie.ImageUrl) && (String.IsNullOrWhiteSpace(dbMovie.ImageUrl) || movie.Scraper.ImagePriority < prevScrapper.ImagePriority)) { dbMovie.ImageUrl = movie.ImageUrl; dbMovie.ImageScrapperID = movie.Scraper.ID; dbMovie.ImageLocalUrl = null; modified = true; } } db.SaveChanges(); if (String.IsNullOrWhiteSpace(dbMovie.ImageLocalUrl)) { if(new ImageScrapperService().CopyImageToLocal(dbMovie, ConfigurationManager.AppSettings["ImagePath"])) db.SaveChanges(); } foreach (var l in movie.Links) { if (!db.MovieLinks.Any(x => x.DowloadUrl == l.DownloadUrl)) { db.MovieLinks.Add(new MovieLink { MovieID = dbMovie.ID, LinkTitle = l.Title, SiteTitle = movie.Scraper.Title, PageSiteID = movie.Scraper.ID, PageUrl = movie.PageUrl, DowloadUrl = l.DownloadUrl, DownloadSiteID = l.DownloadSiteID, Version = NewDBVersion, HasSubtitle = movie.Scraper.Title == "EIH", IsWebSupported = true, IsDesktopSupported = true }); db.SaveChanges(); modified = true; } } if (dbMovie.Version != NewDBVersion && modified) { dbMovie.Version = NewDBVersion; dbMovie.ModifiedDate = DateTime.Now; db.SaveChanges(); } } movies.Add(movie); this.labelCount.Text = "Total: " + movies.Count; } catch (Exception ex) { MessageBox.Show(ex.Message); } }); }