Example #1
0
        public async Task <bool> ScrapeForLocalMovieAsync(int id, CancellationToken cancellationToken = default)
        {
            var movie = await _context.Set <Movie>().FindAsync(id);

            if (movie is null)
            {
                throw EntityNotFoundException.Of <Movie>(id);
            }

            IScrapeSession session = null;

            foreach (var scraper in _scrapers.Where(x => x.Type == ScraperType.Local))
            {
                session = await _movieService.GetScrapeSessionAsync(scraper.Source, scraper.Type, session, cancellationToken);

                await scraper.ScrapeAsync(session, cancellationToken);

                if (session.LocalImdbCodes.Contains(movie.ImdbCode))
                {
                    return(true);
                }
            }

            return(false);
        }
Example #2
0
        public async Task <ScrapeResult> ScrapeAsync(IScrapeSession session, CancellationToken cancellationToken)
        {
            var libraries = await _client.GetMovieLibrariesAsync(cancellationToken);

            if (!libraries.Any())
            {
                throw new Exception("no plex movie libraries found");
            }

            _logger.LogInformation("found {libraryCount} libraries", libraries.Count);

            var movieTasks = libraries.Select(x => _client.GetMoviesAsync(x.Key, cancellationToken));
            var movies     = (await Task.WhenAll(movieTasks)).SelectMany(x => x).ToList();

            _logger.LogInformation("found {moviesCount} movies", movies.Count);

            var newMovies = session.ScrapeFrom.HasValue
                ? movies.Where(x => x.DateCreated > session.ScrapeFrom.Value).ToList()
                : movies;

            var requests = _mapper.Map <ICollection <CreateLocalMovieRequest> >(newMovies);
            await session.CreateLocalMoviesAsync(requests);

            return(new ScrapeResult
            {
                MovieCount = newMovies.Count
            });
        }
Example #3
0
        private async Task <bool> ScrapeImageAsync(IScrapeSession session, string imdbCode, MovieImageSourceDto source, CancellationToken cancellationToken)
        {
            if (source is null)
            {
                return(false);
            }

            var scraper = _scrapers.FirstOrDefault(x => x.Source == source.Source);

            if (scraper is null)
            {
                return(false);
            }

            _logger.LogInformation("scraping {source} -> {value}", source.Source, source.Value);

            CreateMovieImageRequest image = null;

            try
            {
                image = await scraper.ScrapeImageAsync(imdbCode, source, cancellationToken);
            }
            catch (Exception e)
            {
                _logger.LogError(e, $"failed to scrape image {source.Source} -> {source.Value}");
            }

            if (image is null)
            {
                return(false);
            }

            await session.CreateMovieImageAsync(image, cancellationToken);

            return(true);
        }
Example #4
0
        public async Task <IScrapeSession> GetScrapeSessionAsync(string source, ScraperType type, IScrapeSession lastSession = null, CancellationToken cancellationToken = default)
        {
            var scrapeFrom = type == ScraperType.Local
                ? (await _context.GetLatestLocalMovieBySourceAsync(source, cancellationToken))?.DateCreated
                : (await _context.GetLatestMovieBySourceAsync(source, cancellationToken))?.DateCreated;

            var movieImdbCodes = lastSession?.MovieImdbCodes ?? await _context.Set <Movie>().Select(x => x.ImdbCode).ToListAsync(cancellationToken);

            var localImdbCodes = lastSession?.LocalImdbCodes ?? await _context.Set <LocalMovie>().Select(x => x.ImdbCode).ToListAsync(cancellationToken);

            return(new ScrapeSession(source, type, scrapeFrom, movieImdbCodes, localImdbCodes, _context, _clock, cancellationToken, _options.Value));
        }
Example #5
0
        public async Task ScrapeAsync(int id, CancellationToken cancellationToken = default)
        {
            Scrape scrape;

            try
            {
                scrape = await _context.Scrapes().FirstOrDefaultAsync(x => x.Id == id, cancellationToken);

                if (scrape is null)
                {
                    throw new Exception($"cannot find scrape record {id}");
                }
            }
            catch (Exception e)
            {
                _logger.LogError(e, "failed when getting scrape record {id}", id);
                // TODO requeue on db fail
                return;
            }

            scrape.ScrapeSources = new List <ScrapeSource>();
            IScrapeSession session = null;

            foreach (var scraper in _scrapers)
            {
                var source = new ScrapeSource
                {
                    Source    = scraper.Source,
                    Type      = scraper.Type,
                    StartDate = _clock.UtcNow
                };
                scrape.ScrapeSources.Add(source);

                try
                {
                    session = await _movieService.GetScrapeSessionAsync(scraper.Source, scraper.Type, session, cancellationToken);

                    var result = await scraper.ScrapeAsync(session, cancellationToken);

                    source.Success      = true;
                    source.EndDate      = _clock.UtcNow;
                    source.MovieCount   = result.MovieCount;
                    source.TorrentCount = result.TorrentCount;

                    switch (scraper.Type)
                    {
                    case ScraperType.Local:
                        scrape.LocalMovieCount += result.MovieCount;
                        break;

                    case ScraperType.Torrent:
                        scrape.MovieCount   += result.MovieCount;
                        scrape.TorrentCount += result.TorrentCount;
                        break;

                    default:
                        throw new ArgumentOutOfRangeException();
                    }
                }
                catch (Exception e)
                {
                    _logger.LogError(e, "failed to scrape {source}", scraper.Source);
                    source.Success = false;
                    source.EndDate = _clock.UtcNow;
                    source.Error   = e.ToString();
                }
            }

            try
            {
                scrape.ImageCount = await ScrapeImagesAsync(session, cancellationToken);
            }
            catch (Exception e)
            {
                _logger.LogError(e, "failed to scrape images");
            }

            scrape.Success = scrape.ScrapeSources.All(x => x.Success);
            scrape.EndDate = _clock.UtcNow;
            await _context.SaveChangesAsync(cancellationToken);
        }
Example #6
0
        private async Task <int> ScrapeImagesAsync(IScrapeSession session, CancellationToken cancellationToken = default)
        {
            _logger.LogInformation("scraping images");

            session.CreateMovieImageDirectory();

            const int limit  = 50;
            var       result = await _movieService.GetMoviesWithMissingImagesAsync(limit, 0, cancellationToken);

            _logger.LogInformation("starting image scrape {count}", result.Count);

            if (result.Count == 0)
            {
                return(0);
            }

            var options = _options.Value;
            var scraped = 0;
            var skip    = 0;

            while (result.Any())
            {
                _logger.LogInformation("scraping images for movie ids {from} to {to}", result.First().Id, result.Last().Id);

                foreach (var sources in result)
                {
                    if (await session.AssertMovieImageAsync(sources.ImdbCode, cancellationToken))
                    {
                        _logger.LogInformation("successfully updated image from local filesystem");
                        continue;
                    }

                    // prefer local image
                    if (await ScrapeImageAsync(session, sources.ImdbCode, sources.LocalSource, cancellationToken))
                    {
                        _logger.LogInformation("successfully updated image from local movie source {source}", sources.LocalSource);
                        scraped++;
                        continue;
                    }

                    var success = false;
                    foreach (var source in sources.RemoteSources)
                    {
                        success = await ScrapeImageAsync(session, sources.ImdbCode, source, cancellationToken);

                        if (success)
                        {
                            _logger.LogInformation("successfully updated image from remote movie source {source}", source.Source);
                            scraped++;
                        }
                        else
                        {
                            _logger.LogWarning("failed to scrape {image} from {source}", source.Value, source.Source);
                        }

                        if (options.RemoteScrapeDelay > TimeSpan.Zero)
                        {
                            await Task.Delay(options.RemoteScrapeDelay, cancellationToken);
                        }

                        if (success)
                        {
                            break;
                        }
                    }

                    if (!success)
                    {
                        // skip this image next time.
                        skip++;
                    }
                }

                result = await _movieService.GetMoviesWithMissingImagesAsync(limit, skip, cancellationToken);
            }

            _logger.LogInformation("done scraping images");

            return(scraped);
        }
Example #7
0
        public async Task <ScrapeResult> ScrapeAsync(IScrapeSession session, CancellationToken cancellationToken = default)
        {
            var result  = new ScrapeResult();
            var options = _options.Value;

            _logger.LogInformation("Scraping yts movies from {fromDate}", session.ScrapeFrom);

            for (var page = 1; page < int.MaxValue; page++)
            {
                _logger.LogInformation("scraping page {page}", page);
                var request = new YtsListMoviesRequest
                {
                    Page    = page,
                    Limit   = 50,
                    OrderBy = "desc",
                    SortBy  = "date_added"
                };
                var response = await _client.ListMoviesAsync(request, cancellationToken);

                if (response.Movies is null)
                {
                    break;
                }

                var movies = session.ScrapeFrom.HasValue
                    ? response.Movies
                             .Where(x => x.DateUploaded > session.ScrapeFrom.Value)
                             .ToList()
                    : response.Movies;

                if (!movies.Any())
                {
                    break;
                }

                _logger.LogInformation("retrieved {movieCount} movies", movies.Count);

                var requests = _mapper.Map <ICollection <CreateMovieRequest> >(movies);

                await session.CreateMoviesAsync(requests);

                _logger.LogInformation("added {movieCount} movies", requests.Count);

                result.MovieCount   += requests.Count;
                result.TorrentCount += requests.Sum(x => x.Torrents?.Count ?? 0);

                if (movies.Count < request.Limit)
                {
                    break;
                }

                if (options.RemoteScrapeDelay > TimeSpan.Zero)
                {
                    await Task.Delay(options.RemoteScrapeDelay, cancellationToken);
                }
            }

            _logger.LogInformation("done");

            return(result);
        }