Esempio n. 1
0
        internal static async Task <(string ImdbUrl, string ImdbHtml, string ParentUrl, string ParentHtml, string ReleaseUrl, string ReleaseHtml, ImdbMetadata ImdbMetadata)> DownloadAsync(
            string imdbId, bool useCache, string imdbFile, string parentFile, string releaseFile, IWebDriver?webDriver = null)
        {
            using WebClient? webClient = webDriver is null ? new() { Encoding = Encoding.UTF8 } : null;
            webClient?.AddChromeHeaders();

            string imdbUrl  = $"https://www.imdb.com/title/{imdbId}/";
            string imdbHtml = useCache && File.Exists(imdbFile)
                ? await File.ReadAllTextAsync(imdbFile)
                : await Retry.FixedIntervalAsync(async() => webDriver is not null?await webDriver.DownloadStringAsync(imdbUrl) : await webClient !.DownloadCompressedStringAsync(imdbUrl), retryCount : 10);

            CQ           imdbCQ       = imdbHtml;
            string       json         = imdbCQ.Find(@"script[type=""application/ld+json""]").Text();
            ImdbMetadata imdbMetadata = JsonSerializer.Deserialize <ImdbMetadata>(
                json,
                new JsonSerializerOptions()
            {
                PropertyNameCaseInsensitive = true
            }) ?? throw new InvalidOperationException(json);

            string parentUrl  = string.Empty;
            string parentHtml = string.Empty;
            string parentHref = imdbCQ.Find(@"div.titleParent a").FirstOrDefault()?.GetAttribute("href")
                                ?? imdbCQ.Find(@"div").FirstOrDefault(div => div.Classes.Any(@class => @class.StartsWith("TitleBlock__SeriesParentLinkWrapper", StringComparison.Ordinal)))?.Cq().Find("a").Attr("href")
                                ?? string.Empty;

            if (!string.IsNullOrWhiteSpace(parentHref))
            {
                string parentImdbId = Regex.Match(parentHref, "tt[0-9]+").Value;
                (parentUrl, parentHtml, _, _, _, _, imdbMetadata.Parent) = await DownloadAsync(parentImdbId, useCache, parentFile, string.Empty, releaseFile);
            }

            string htmlTitle     = imdbCQ.Find(@"title").Text();
            string htmlTitleYear = htmlTitle.Contains("(", StringComparison.Ordinal)
                ? htmlTitle[(htmlTitle.LastIndexOf("(", StringComparison.Ordinal) + 1)..htmlTitle.LastIndexOf(")", StringComparison.Ordinal)]
Esempio n. 2
0
        internal static async Task DownloadImdbMetadataAsync(string directory, int level = 2, bool overwrite = false, bool isTV = false, Action <string>?log = null)
        {
            log ??= TraceLog;
            await EnumerateDirectories(directory, level)
            .ParallelForEachAsync(async movie =>
            {
                if (!overwrite && Directory.EnumerateFiles(movie, JsonMetadataSearchPattern, SearchOption.TopDirectoryOnly).Any())
                {
                    log($"Skip {movie}.");
                    return;
                }

                string?nfo = Directory.EnumerateFiles(movie, XmlMetadataSearchPattern, SearchOption.TopDirectoryOnly).FirstOrDefault();
                if (string.IsNullOrWhiteSpace(nfo))
                {
                    log($"!Missing metadata {movie}.");
                    return;
                }

                string?imdbId = XDocument.Load(nfo).Root?.Element((isTV ? "imdb_id" : "imdbid") !)?.Value;
                if (string.IsNullOrWhiteSpace(imdbId))
                {
                    await File.WriteAllTextAsync(Path.Combine(movie, $"{NotExistingFlag}{JsonMetadataExtension}"), "{}");
                    return;
                }

                (string imdbJson, string year, string[] regions) = await Retry.FixedIntervalAsync(async() => await Imdb.DownloadJsonAsync($"https://www.imdb.com/title/{imdbId}"), retryCount: 10);
                Debug.Assert(!string.IsNullOrWhiteSpace(imdbJson));
                if (string.IsNullOrWhiteSpace(year))
                {
                    ImdbMetadata imdbMetadata = JsonSerializer.Deserialize <ImdbMetadata>(
                        imdbJson,
                        new() { PropertyNameCaseInsensitive = true, IgnoreReadOnlyProperties = true }) ?? throw new InvalidOperationException(imdbJson);
                    year = imdbMetadata.YearOfCurrentRegion;
                }

                if (string.IsNullOrWhiteSpace(year))
                {
                    log($"!Year is missing for {imdbId}: {movie}");
                }
                if (!regions.Any())
                {
                    log($"!Location is missing for {imdbId}: {movie}");
                }
                string json = Path.Combine(movie, $"{imdbId}.{year}.{string.Join(",", regions.Take(5))}{JsonMetadataExtension}");
                log($"Downloaded https://www.imdb.com/title/{imdbId} to {json}.");
                await File.WriteAllTextAsync(json, imdbJson);
                log($"Saved to {json}.");
            }, IOMaxDegreeOfParallelism);
        }
Esempio n. 3
0
        internal static async Task <(string Json, string Year, string[] Regions)> DownloadJsonAsync(string url)
        {
            using WebClient webClient = new();
            string imdbHtml = await webClient.DownloadStringTaskAsync(url);

            CQ     cqImdb = new(imdbHtml);
            string year   = cqImdb.Find(@"#titleYear").Text().Trim().TrimStart('(').TrimEnd(')').Trim();
            string json   = cqImdb.Find(@"script[type=""application/ld+json""]").Text();

            if (string.IsNullOrWhiteSpace(year))
            {
                string episodeYear = cqImdb.Find(@"a[title=""See more release dates""]").Text();
                Match  match       = Regex.Match(episodeYear, "[0-9]{4}");
                if (match.Success)
                {
                    year = match.Value;
                    Debug.Assert(year.Length == 4);
                }
            }

            if (string.IsNullOrWhiteSpace(year))
            {
                ImdbMetadata imdbMetadata = JsonSerializer.Deserialize <ImdbMetadata>(
                    json,
                    new() { PropertyNameCaseInsensitive = true, IgnoreReadOnlyProperties = true }) ?? throw new InvalidOperationException(url);
                year = imdbMetadata.YearOfCurrentRegion;
            }

            return(
                json,
                year,
                cqImdb
                .Find(@"#titleDetails .txt-block")
                .Elements
                .Select(element => new CQ(element).Text().Trim())
                .FirstOrDefault(text => text.StartsWith("Country:", StringComparison.InvariantCultureIgnoreCase))
                ?.Replace("Country:", string.Empty, StringComparison.InvariantCultureIgnoreCase)
                .Split('|')
                .Select(region => region.Trim())
                .ToArray() ?? Array.Empty <string>());
        }