public async Task FixedIntervalAsyncWithoutResultTest() { const int retryCount = 5; TimeSpan retryInterval = TimeSpan.FromSeconds(1); Counter <InvalidOperationException> counter = new Counter <InvalidOperationException>(retryCount); int retryFuncCount = 0; int retryHandlerCount = 0; await Retry.FixedIntervalAsync( async() => { retryFuncCount++; counter.Increase(); await Task.Yield(); }, retryCount, exception => exception is InvalidOperationException, (sender, e) => { Assert.IsInstanceOfType(e.LastException, typeof(InvalidOperationException)); Assert.AreEqual(retryInterval, e.Delay); Assert.AreEqual(counter.Time.Count, e.CurrentRetryCount); retryHandlerCount++; }, retryInterval, false); Assert.AreEqual(retryCount, retryFuncCount); Assert.AreEqual(retryCount - 1, retryHandlerCount); Assert.AreEqual(retryCount, counter.Time.Count); TimeSpan[] intervals = counter.Time.Take(counter.Time.Count - 1).Zip(counter.Time.Skip(1), (a, b) => b - a).ToArray(); Assert.AreEqual(retryCount - 1, intervals.Length); Assert.IsTrue(intervals.All(interval => interval >= retryInterval)); }
internal static async Task DownloadMetadataAsync(string summaryJsonPath, string metadataJsonPath, int degreeOfParallelism = 4) { string summaryJsonString = await File.ReadAllTextAsync(summaryJsonPath); Dictionary <string, YtsSummary> summaries = JsonSerializer.Deserialize <Dictionary <string, YtsSummary> >(summaryJsonString) ?? throw new InvalidOperationException(summaryJsonPath); ConcurrentDictionary <string, YtsMetadata[]> details = File.Exists(metadataJsonPath) ? new(JsonSerializer.Deserialize <Dictionary <string, YtsMetadata[]> >(await File.ReadAllTextAsync(metadataJsonPath)) ?? throw new InvalidOperationException(metadataJsonPath)) : new(); Dictionary <string, YtsMetadata> existingMetadataByLink = details.Values.SelectMany(details => details).ToDictionary(detail => detail.Link, detail => detail); int count = 1; await summaries .Values .Where(summary => !existingMetadataByLink.ContainsKey(summary.Link)) .ParallelForEachAsync(async(summary, index) => { Log($"Start {index}:{summary.Link}"); using WebClient webClient = new(); try { string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(summary.Link), RetryCount); CQ cq = new(html); CQ info = cq.Find("#movie-info"); YtsMetadata detail = new( Link: summary.Link, Title: summary.Title, ImdbId: info.Find("a.icon[title='IMDb Rating']").Attr("href").Replace("https://www.imdb.com/title/", string.Empty).Trim('/'), ImdbRating: summary.ImdbRating, Genres: summary.Genres, Year: summary.Year, Image: summary.Image, Language: info.Find("h2 a span").Text().Trim().TrimStart('[').TrimEnd(']'), Availabilities: info.Find("p.hidden-sm a[rel='nofollow']").ToDictionary(link => link.TextContent.Trim(), link => link.GetAttribute("href"))); lock (AddItemLock) { details[detail.ImdbId] = details.ContainsKey(detail.ImdbId) ? details[detail.ImdbId].Where(item => !string.Equals(item.Link, detail.Link, StringComparison.OrdinalIgnoreCase)).Append(detail).ToArray() : new[] { detail }; } } catch (Exception exception) { Log($"{summary.Link} {exception}"); } if (Interlocked.Increment(ref count) % SaveFrequency == 0) { SaveDetail(metadataJsonPath, details); } Log($"End {index}:{summary.Link}"); }, degreeOfParallelism); SaveDetail(metadataJsonPath, details); }
internal static async Task DownloadImdbMetadataAsync(string directory, int level = 2, bool overwrite = false, bool isTV = false, Action <string>?log = null) { log ??= TraceLog; await EnumerateDirectories(directory, level) .ParallelForEachAsync(async movie => { if (!overwrite && Directory.EnumerateFiles(movie, JsonMetadataSearchPattern, SearchOption.TopDirectoryOnly).Any()) { log($"Skip {movie}."); return; } string?nfo = Directory.EnumerateFiles(movie, XmlMetadataSearchPattern, SearchOption.TopDirectoryOnly).FirstOrDefault(); if (string.IsNullOrWhiteSpace(nfo)) { log($"!Missing metadata {movie}."); return; } string?imdbId = XDocument.Load(nfo).Root?.Element((isTV ? "imdb_id" : "imdbid") !)?.Value; if (string.IsNullOrWhiteSpace(imdbId)) { await File.WriteAllTextAsync(Path.Combine(movie, $"{NotExistingFlag}{JsonMetadataExtension}"), "{}"); return; } (string imdbJson, string year, string[] regions) = await Retry.FixedIntervalAsync(async() => await Imdb.DownloadJsonAsync($"https://www.imdb.com/title/{imdbId}"), retryCount: 10); Debug.Assert(!string.IsNullOrWhiteSpace(imdbJson)); if (string.IsNullOrWhiteSpace(year)) { ImdbMetadata imdbMetadata = JsonSerializer.Deserialize <ImdbMetadata>( imdbJson, new() { PropertyNameCaseInsensitive = true, IgnoreReadOnlyProperties = true }) ?? throw new InvalidOperationException(imdbJson); year = imdbMetadata.YearOfCurrentRegion; } if (string.IsNullOrWhiteSpace(year)) { log($"!Year is missing for {imdbId}: {movie}"); } if (!regions.Any()) { log($"!Location is missing for {imdbId}: {movie}"); } string json = Path.Combine(movie, $"{imdbId}.{year}.{string.Join(",", regions.Take(5))}{JsonMetadataExtension}"); log($"Downloaded https://www.imdb.com/title/{imdbId} to {json}."); await File.WriteAllTextAsync(json, imdbJson); log($"Saved to {json}."); }, IOMaxDegreeOfParallelism); }
internal static async Task DownloadSummariesAsync(string jsonPath, Func <int, bool>? @continue = null, int index = 1) { @continue ??= _ => true; Dictionary <string, YtsSummary> allSummaries = File.Exists(jsonPath) ? JsonSerializer.Deserialize <Dictionary <string, YtsSummary> >(await File.ReadAllTextAsync(jsonPath)) ?? throw new InvalidOperationException(jsonPath) : new(); using WebClient webClient = new(); for (; @continue(index); index++) { string url = $"{BaseUrl}/browse-movies?page={index}"; Log($"Start {url}"); string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(url), RetryCount); CQ cq = new(html); if (cq[".browse-movie-wrap"].IsEmpty()) { Log($"! {url} is empty"); break; } YtsSummary[] summaries = cq .Find(".browse-movie-wrap") .Select(dom => { CQ cqMovie = new(dom); return(new YtsSummary( Link: cqMovie.Find(".browse-movie-title").Attr("href"), Title: cqMovie.Find(".browse-movie-title").Text(), ImdbRating: cqMovie.Find(".rating").Text().Replace(" / 10", string.Empty), Genres: cqMovie.Find(@"h4[class!=""rating""]").Select(genre => genre.TextContent).ToArray(), Year: int.TryParse(cqMovie.Find(".browse-movie-year").Text(), out int year) ? year : -1, Image: cqMovie.Find(".img-responsive").Data <string>("cfsrc"))); }) .ToArray(); summaries.ForEach(summary => allSummaries[summary.Link] = summary); if (index % SaveFrequency == 0) { string jsonString = JsonSerializer.Serialize(allSummaries, new() { WriteIndented = true }); await File.WriteAllTextAsync(jsonPath, jsonString); } Log($"End {url}"); } string finalJsonString = JsonSerializer.Serialize(allSummaries, new() { WriteIndented = true }); await File.WriteAllTextAsync(jsonPath, finalJsonString); }
public async Task SaveWithRetryAsync() => await Retry.FixedIntervalAsync( async() => { await this.context.BronzeImages().ForEachAsync(character => SaveCharacterWithRetry(this.cacheRoot, character)); await this.context.LiushutongImages().ForEachAsync(bronze => SaveCharacterWithRetry(this.cacheRoot, bronze)); await this.context.OracleImages().ForEachAsync(character => SaveCharacterWithRetry(this.cacheRoot, character)); await this.context.SealImages().ForEachAsync(character => SaveCharacterWithRetry(this.cacheRoot, character)); }, retryCount : 10, retryInterval : TimeSpan.FromSeconds(10));
internal static async Task DownloadItemsAsync() { string json = await File.ReadAllTextAsync(ListFile); YtsMovieSummary[] movies = JsonSerializer.Deserialize <YtsMovieSummary[]>(json); await movies.ParallelForEachAsync(async movie => { string file = Path.Combine(ItemsDirectory, $"{Path.GetFileName(new Uri(movie.Link).LocalPath)}{HtmlExtension}"); if (!File.Exists(file)) { using WebClient webClient = new WebClient(); string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(movie.Link), RetryCount); Log($"Downloaded {movie.Link}"); await File.WriteAllTextAsync(file, html); Log($"Saved {file}"); } }); }
internal static async Task DownloadMovieListAsync(int index = 1) { for (; ; index++) { using WebClient webClient = new WebClient(); string url = $"{BaseUrl}/browse-movies?page={index}"; string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(url), RetryCount); Log($"Downloaded {url}"); CQ cqHtml = new CQ(html); if (cqHtml[".browse-movie-wrap"].IsEmpty()) { break; } string file = Path.Combine(ListDirectory, $"browse-movies-{index}{HtmlExtension}"); await File.WriteAllTextAsync(file, html); Log($"Saved {file}"); } }
internal static async Task DownloadMetadataAsync( string baseUrl, int startIndex, int count, string entryJsonPath, string x265JsonPath, string h264JsonPath, string ytsJsonPath, string libraryJsonPath, Action <string>?log = null) { log ??= message => Trace.WriteLine(message); List <string> entryLinks = new(); using WebClient webClient = new(); await Enumerable .Range(startIndex, count) .Select(index => $"{baseUrl}/page/{index}/") .ForEachAsync(async url => { try { string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(url)); CQ listCQ = html; log($"Done {url}"); listCQ .Find("h1.entry-title a") .Select(entryLink => entryLink.GetAttribute("href")) .ForEach(entryLinks.Add); } catch (Exception exception) when(exception.IsNotCritical()) { log(exception.ToString()); } }); ConcurrentDictionary <string, EntryMetadata> entryMetadata = new(); await entryLinks.ParallelForEachAsync(async entryLink => { using WebClient webClient = new(); try { string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(entryLink)); CQ entryCQ = html; string title = entryCQ.Find("h1.entry-title").Text().Trim(); log($"Done {title} {entryLink}"); entryMetadata[entryLink] = new EntryMetadata( title, entryCQ.Find("div.entry-content").Html()); } catch (Exception exception) when(exception.IsNotCritical()) { log(exception.ToString()); } }, 4); string jsonString = JsonSerializer.Serialize(entryMetadata, new() { WriteIndented = true }); await File.WriteAllTextAsync(entryJsonPath, jsonString); Dictionary <string, RarbgMetadata[]> x265Metadata = JsonSerializer.Deserialize <Dictionary <string, RarbgMetadata[]> >(await File.ReadAllTextAsync(x265JsonPath)) !; Dictionary <string, RarbgMetadata[]> h264Metadata = JsonSerializer.Deserialize <Dictionary <string, RarbgMetadata[]> >(await File.ReadAllTextAsync(h264JsonPath)) !; Dictionary <string, YtsMetadata[]> ytsMetadata = JsonSerializer.Deserialize <Dictionary <string, YtsMetadata[]> >(await File.ReadAllTextAsync(ytsJsonPath)) !; Dictionary <string, Dictionary <string, VideoMetadata> > libraryMetadata = JsonSerializer.Deserialize <Dictionary <string, Dictionary <string, VideoMetadata> > >(await File.ReadAllTextAsync(libraryJsonPath)) !; entryMetadata .SelectMany(entry => Regex .Matches(entry.Value.Content, "imdb.com/title/(tt[0-9]+)") .Where(match => match.Success) .Select(match => (Link: entry.Key, match.Groups[1].Value))) .Distinct(imdbId => imdbId.Value) .ForEach(imdbId => { if (libraryMetadata.ContainsKey(imdbId.Value) && libraryMetadata[imdbId.Value].Any()) { libraryMetadata[imdbId.Value].ForEach(video => log(video.Key)); log(string.Empty); return; } if (x265Metadata.ContainsKey(imdbId.Value)) { log(imdbId.Link); x265Metadata[imdbId.Value].ForEach(metadata => log($"{metadata.Link} {metadata.Title}")); log(string.Empty); return; } if (h264Metadata.ContainsKey(imdbId.Value)) { log(imdbId.Link); h264Metadata[imdbId.Value].ForEach(metadata => log($"{metadata.Link} {metadata.Title}")); log(string.Empty); return; } if (ytsMetadata.ContainsKey(imdbId.Value)) { log(imdbId.Link); ytsMetadata[imdbId.Value].ForEach(metadata => log($"{metadata.Link} {metadata.Title}")); log(string.Empty); } }); }
internal static async Task DownloadMetadataAsync( string indexUrl, string rareJsonPath, string x265JsonPath, string h264JsonPath, string ytsJsonPath, string libraryJsonPath, Action <string>?log = null) { log ??= message => Trace.WriteLine(message); using WebClient webClient = new(); string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(indexUrl)); CQ indexCQ = html; string[] links = indexCQ .Find("#inner-slider li a") .Select(link => link.GetAttribute("href")) .ToArray(); ConcurrentDictionary <string, RareMetadata> rareMetadata = new(); await links.ParallelForEachAsync(async link => { using WebClient webClient = new(); try { string html = await Retry.FixedIntervalAsync(async() => await webClient.DownloadStringTaskAsync(link)); CQ rareCQ = html; string title = rareCQ.Find("#content article h1").Text().Trim(); log($"Done {title} {link}"); rareMetadata[link] = new RareMetadata( title, rareCQ.Find("#content article div.entry-content").Html()); } catch (Exception exception) when(exception.IsNotCritical()) { log(exception.ToString()); } }, 4); string jsonString = JsonSerializer.Serialize(rareMetadata, new() { WriteIndented = true }); await File.WriteAllTextAsync(rareJsonPath, jsonString); Dictionary <string, RarbgMetadata[]> x265Metadata = JsonSerializer.Deserialize <Dictionary <string, RarbgMetadata[]> >(await File.ReadAllTextAsync(x265JsonPath)) !; Dictionary <string, RarbgMetadata[]> h264Metadata = JsonSerializer.Deserialize <Dictionary <string, RarbgMetadata[]> >(await File.ReadAllTextAsync(h264JsonPath)) !; Dictionary <string, YtsMetadata[]> ytsMetadata = JsonSerializer.Deserialize <Dictionary <string, YtsMetadata[]> >(await File.ReadAllTextAsync(ytsJsonPath)) !; Dictionary <string, Dictionary <string, VideoMetadata> > libraryMetadata = JsonSerializer.Deserialize <Dictionary <string, Dictionary <string, VideoMetadata> > >(await File.ReadAllTextAsync(libraryJsonPath)) !; rareMetadata .SelectMany(rare => Regex .Matches(rare.Value.Content, "imdb.com/title/(tt[0-9]+)") .Where(match => match.Success) .Select(match => (Link: rare.Key, match.Groups[1].Value))) .Distinct(imdbId => imdbId.Value) .ForEach(imdbId => { if (libraryMetadata.ContainsKey(imdbId.Value) && libraryMetadata[imdbId.Value].Any()) { libraryMetadata[imdbId.Value].ForEach(video => log(video.Key)); log(string.Empty); return; } if (x265Metadata.ContainsKey(imdbId.Value)) { log(imdbId.Link); x265Metadata[imdbId.Value].ForEach(metadata => log($"{metadata.Link} {metadata.Title}")); log(string.Empty); return; } if (h264Metadata.ContainsKey(imdbId.Value)) { log(imdbId.Link); h264Metadata[imdbId.Value].ForEach(metadata => log($"{metadata.Link} {metadata.Title}")); log(string.Empty); return; } if (ytsMetadata.ContainsKey(imdbId.Value)) { log(imdbId.Link); ytsMetadata[imdbId.Value].ForEach(metadata => log($"{metadata.Link} {metadata.Title}")); log(string.Empty); } }); }