public async Task <bool> ScrapeForLocalMovieAsync(int id, CancellationToken cancellationToken = default) { var movie = await _context.Set <Movie>().FindAsync(id); if (movie is null) { throw EntityNotFoundException.Of <Movie>(id); } IScrapeSession session = null; foreach (var scraper in _scrapers.Where(x => x.Type == ScraperType.Local)) { session = await _movieService.GetScrapeSessionAsync(scraper.Source, scraper.Type, session, cancellationToken); await scraper.ScrapeAsync(session, cancellationToken); if (session.LocalImdbCodes.Contains(movie.ImdbCode)) { return(true); } } return(false); }
public async Task <ScrapeResult> ScrapeAsync(IScrapeSession session, CancellationToken cancellationToken) { var libraries = await _client.GetMovieLibrariesAsync(cancellationToken); if (!libraries.Any()) { throw new Exception("no plex movie libraries found"); } _logger.LogInformation("found {libraryCount} libraries", libraries.Count); var movieTasks = libraries.Select(x => _client.GetMoviesAsync(x.Key, cancellationToken)); var movies = (await Task.WhenAll(movieTasks)).SelectMany(x => x).ToList(); _logger.LogInformation("found {moviesCount} movies", movies.Count); var newMovies = session.ScrapeFrom.HasValue ? movies.Where(x => x.DateCreated > session.ScrapeFrom.Value).ToList() : movies; var requests = _mapper.Map <ICollection <CreateLocalMovieRequest> >(newMovies); await session.CreateLocalMoviesAsync(requests); return(new ScrapeResult { MovieCount = newMovies.Count }); }
private async Task <bool> ScrapeImageAsync(IScrapeSession session, string imdbCode, MovieImageSourceDto source, CancellationToken cancellationToken) { if (source is null) { return(false); } var scraper = _scrapers.FirstOrDefault(x => x.Source == source.Source); if (scraper is null) { return(false); } _logger.LogInformation("scraping {source} -> {value}", source.Source, source.Value); CreateMovieImageRequest image = null; try { image = await scraper.ScrapeImageAsync(imdbCode, source, cancellationToken); } catch (Exception e) { _logger.LogError(e, $"failed to scrape image {source.Source} -> {source.Value}"); } if (image is null) { return(false); } await session.CreateMovieImageAsync(image, cancellationToken); return(true); }
public async Task <IScrapeSession> GetScrapeSessionAsync(string source, ScraperType type, IScrapeSession lastSession = null, CancellationToken cancellationToken = default) { var scrapeFrom = type == ScraperType.Local ? (await _context.GetLatestLocalMovieBySourceAsync(source, cancellationToken))?.DateCreated : (await _context.GetLatestMovieBySourceAsync(source, cancellationToken))?.DateCreated; var movieImdbCodes = lastSession?.MovieImdbCodes ?? await _context.Set <Movie>().Select(x => x.ImdbCode).ToListAsync(cancellationToken); var localImdbCodes = lastSession?.LocalImdbCodes ?? await _context.Set <LocalMovie>().Select(x => x.ImdbCode).ToListAsync(cancellationToken); return(new ScrapeSession(source, type, scrapeFrom, movieImdbCodes, localImdbCodes, _context, _clock, cancellationToken, _options.Value)); }
public async Task ScrapeAsync(int id, CancellationToken cancellationToken = default) { Scrape scrape; try { scrape = await _context.Scrapes().FirstOrDefaultAsync(x => x.Id == id, cancellationToken); if (scrape is null) { throw new Exception($"cannot find scrape record {id}"); } } catch (Exception e) { _logger.LogError(e, "failed when getting scrape record {id}", id); // TODO requeue on db fail return; } scrape.ScrapeSources = new List <ScrapeSource>(); IScrapeSession session = null; foreach (var scraper in _scrapers) { var source = new ScrapeSource { Source = scraper.Source, Type = scraper.Type, StartDate = _clock.UtcNow }; scrape.ScrapeSources.Add(source); try { session = await _movieService.GetScrapeSessionAsync(scraper.Source, scraper.Type, session, cancellationToken); var result = await scraper.ScrapeAsync(session, cancellationToken); source.Success = true; source.EndDate = _clock.UtcNow; source.MovieCount = result.MovieCount; source.TorrentCount = result.TorrentCount; switch (scraper.Type) { case ScraperType.Local: scrape.LocalMovieCount += result.MovieCount; break; case ScraperType.Torrent: scrape.MovieCount += result.MovieCount; scrape.TorrentCount += result.TorrentCount; break; default: throw new ArgumentOutOfRangeException(); } } catch (Exception e) { _logger.LogError(e, "failed to scrape {source}", scraper.Source); source.Success = false; source.EndDate = _clock.UtcNow; source.Error = e.ToString(); } } try { scrape.ImageCount = await ScrapeImagesAsync(session, cancellationToken); } catch (Exception e) { _logger.LogError(e, "failed to scrape images"); } scrape.Success = scrape.ScrapeSources.All(x => x.Success); scrape.EndDate = _clock.UtcNow; await _context.SaveChangesAsync(cancellationToken); }
private async Task <int> ScrapeImagesAsync(IScrapeSession session, CancellationToken cancellationToken = default) { _logger.LogInformation("scraping images"); session.CreateMovieImageDirectory(); const int limit = 50; var result = await _movieService.GetMoviesWithMissingImagesAsync(limit, 0, cancellationToken); _logger.LogInformation("starting image scrape {count}", result.Count); if (result.Count == 0) { return(0); } var options = _options.Value; var scraped = 0; var skip = 0; while (result.Any()) { _logger.LogInformation("scraping images for movie ids {from} to {to}", result.First().Id, result.Last().Id); foreach (var sources in result) { if (await session.AssertMovieImageAsync(sources.ImdbCode, cancellationToken)) { _logger.LogInformation("successfully updated image from local filesystem"); continue; } // prefer local image if (await ScrapeImageAsync(session, sources.ImdbCode, sources.LocalSource, cancellationToken)) { _logger.LogInformation("successfully updated image from local movie source {source}", sources.LocalSource); scraped++; continue; } var success = false; foreach (var source in sources.RemoteSources) { success = await ScrapeImageAsync(session, sources.ImdbCode, source, cancellationToken); if (success) { _logger.LogInformation("successfully updated image from remote movie source {source}", source.Source); scraped++; } else { _logger.LogWarning("failed to scrape {image} from {source}", source.Value, source.Source); } if (options.RemoteScrapeDelay > TimeSpan.Zero) { await Task.Delay(options.RemoteScrapeDelay, cancellationToken); } if (success) { break; } } if (!success) { // skip this image next time. skip++; } } result = await _movieService.GetMoviesWithMissingImagesAsync(limit, skip, cancellationToken); } _logger.LogInformation("done scraping images"); return(scraped); }
public async Task <ScrapeResult> ScrapeAsync(IScrapeSession session, CancellationToken cancellationToken = default) { var result = new ScrapeResult(); var options = _options.Value; _logger.LogInformation("Scraping yts movies from {fromDate}", session.ScrapeFrom); for (var page = 1; page < int.MaxValue; page++) { _logger.LogInformation("scraping page {page}", page); var request = new YtsListMoviesRequest { Page = page, Limit = 50, OrderBy = "desc", SortBy = "date_added" }; var response = await _client.ListMoviesAsync(request, cancellationToken); if (response.Movies is null) { break; } var movies = session.ScrapeFrom.HasValue ? response.Movies .Where(x => x.DateUploaded > session.ScrapeFrom.Value) .ToList() : response.Movies; if (!movies.Any()) { break; } _logger.LogInformation("retrieved {movieCount} movies", movies.Count); var requests = _mapper.Map <ICollection <CreateMovieRequest> >(movies); await session.CreateMoviesAsync(requests); _logger.LogInformation("added {movieCount} movies", requests.Count); result.MovieCount += requests.Count; result.TorrentCount += requests.Sum(x => x.Torrents?.Count ?? 0); if (movies.Count < request.Limit) { break; } if (options.RemoteScrapeDelay > TimeSpan.Zero) { await Task.Delay(options.RemoteScrapeDelay, cancellationToken); } } _logger.LogInformation("done"); return(result); }