public async Task <IBookParser> FetchAsync(string url = null, Action <IBookParser> onStart = null, Action <IBookParser, long> onParsed = null, Action <IBookParser, long> onCompleted = null, Action <int> onStartFetchChapter = null, Action <int, List <string>, long> onFetchChapterCompleted = null, Action <int, Exception> onFetchChapterError = null, string directoryOfImages = null, Action <IBookParser, string> onStartDownload = null, Action <string, string, long> onDownloadCompleted = null, Action <string, Exception> onDownloadError = null, bool parallelExecutions = true, CancellationToken cancellationToken = default) { // prepare var stopwatch = Stopwatch.StartNew(); // parse the book onStart?.Invoke(this); if (!string.IsNullOrWhiteSpace(url)) { await this.ParseAsync(url, onParsed, null, cancellationToken).ConfigureAwait(false); // fetch the first chapter var first = await this.FetchChapterAsync(0, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken).ConfigureAwait(false); // compute other information (original, translator, ...) if (string.IsNullOrWhiteSpace(this.Original)) { this.Original = this.GetOriginal(first); } if (string.IsNullOrWhiteSpace(this.Translator)) { this.Translator = this.GetTranslator(first); } if (string.IsNullOrWhiteSpace(this.Credits)) { this.Credits = this.GetCredits(first); } if (first != null && first.Count > 1 && first[1].PositionOf("=\"anhbia\"") > 0) { var cover = this.GetCoverImage(first); if (string.IsNullOrWhiteSpace(this.Cover) || !this.Cover.IsEquals(Definitions.MediaURI + cover.GetFilename())) { this.MediaFileUrls.Add(cover); this.Cover = Definitions.MediaURI + cover.GetFilename(); } if (this.Chapters[0].PositionOf(cover) > 0) { var start = this.Chapters[0].PositionOf("<img"); var end = this.Chapters[0].PositionOf(">", start); this.Chapters[0] = this.Chapters[0].Remove(start, end - start + 1); this.Chapters[0] = this.Chapters[0].Replace("<p></p>", "").Replace(StringComparison.OrdinalIgnoreCase, "<p align=\"center\"></p>", ""); } } } // fetch chapters if (this.Chapters.Count > (string.IsNullOrWhiteSpace(url) ? 0 : 1)) { if (parallelExecutions) { var tasks = new List <Task <List <string> > >(); for (var index = string.IsNullOrWhiteSpace(url) ? 0 : 1; index < this.Chapters.Count; index++) { tasks.Add(this.Chapters[index].IsStartsWith("https://vnthuquan.net") || this.Chapters[index].IsStartsWith("http://vnthuquan.net") ? this.FetchChapterAsync(index, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken) : Task.FromResult <List <string> >(null) ); } await Task.WhenAll(tasks).ConfigureAwait(false); } else { for (var index = string.IsNullOrWhiteSpace(url) ? 0 : 1; index < this.Chapters.Count; index++) { if (this.Chapters[index].IsStartsWith("https://vnthuquan.net") || this.Chapters[index].IsStartsWith("http://vnthuquan.net")) { await this.FetchChapterAsync(index, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken).ConfigureAwait(false); } } } } // download image files if (this.MediaFileUrls.Count > 0) { directoryOfImages = directoryOfImages ?? "temp"; onStartDownload?.Invoke(this, directoryOfImages); await Task.WhenAll(this.MediaFileUrls.Select(uri => UtilityService.DownloadFileAsync(uri, Path.Combine(directoryOfImages, this.PermanentID + "-" + uri.GetFilename()), this.SourceUrl, onDownloadCompleted, onDownloadError, cancellationToken))).ConfigureAwait(false); } // normalize TOC this.NormalizeTOC(); // assign identity if (string.IsNullOrWhiteSpace(this.ID) || !this.ID.IsValidUUID()) { this.ID = this.PermanentID; } // done stopwatch.Stop(); onCompleted?.Invoke(this, stopwatch.ElapsedMilliseconds); return(this); }
public async Task <IBookParser> FetchAsync(string url = null, Action <IBookParser> onStart = null, Action <IBookParser, long> onParsed = null, Action <IBookParser, long> onCompleted = null, Action <int> onStartFetchChapter = null, Action <int, List <string>, long> onFetchChapterCompleted = null, Action <int, Exception> onFetchChapterError = null, string folderOfImages = null, Action <IBookParser, string> onStartDownload = null, Action <string, string, long> onDownloadCompleted = null, Action <string, Exception> onDownloadError = null, bool parallelExecutions = false, CancellationToken cancellationToken = default) { // prepare var stopwatch = new Stopwatch(); stopwatch.Start(); // parse the book onStart?.Invoke(this); if (!string.IsNullOrWhiteSpace(url)) { await this.ParseAsync(url, onParsed, null, cancellationToken).ConfigureAwait(false); } // cover image if (!string.IsNullOrWhiteSpace(this.Cover) && !this.Cover.IsStartsWith(Definitions.MediaURI)) { this.MediaFileUrls.Add(this.Cover); this.Cover = Definitions.MediaURI + this.Cover.GetFilename(); } // fetch chapters async Task parallelFetch() { var chaptersOfBigBook = 39; var normalDelayMin = 456; var normalDelayMax = 1234; var mediumDelayMin = 2345; var mediumDelayMax = 4321; var longDelayMin = 3456; var longDelayMax = 5678; var step = 7; var start = 0; var end = start + step; var isCompleted = false; while (!isCompleted) { var tasks = new List <Task>(); for (var index = start; index < end; index++) { if (index >= this.Chapters.Count) { isCompleted = true; break; } if (this.Chapters[index].IsStartsWith("https://isach.info") || this.Chapters[index].IsStartsWith("http://isach.info")) { tasks.Add(Task.Run(async() => { var delay = this.Chapters.Count > chaptersOfBigBook ? UtilityService.GetRandomNumber(mediumDelayMin, mediumDelayMax) : UtilityService.GetRandomNumber(normalDelayMin, normalDelayMax); await Task.Delay(delay, cancellationToken).ConfigureAwait(false); await this.FetchChapterAsync(index, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken); }, cancellationToken)); } } await Task.WhenAll(tasks).ConfigureAwait(false); // go next if (!isCompleted) { start += step; end += step; if (end <= this.Chapters.Count) { await Task.Delay(UtilityService.GetRandomNumber(longDelayMin, longDelayMax), cancellationToken).ConfigureAwait(false); } } } } async Task sequenceFetch() { var chaptersOfLargeBook = 69; var mediumPausePointOfLargeBook = 6; var longPausePointOfLargeBook = 29; var chaptersOfBigBook = 29; var mediumPausePointOfBigBook = 3; var longPausePointOfBigBook = 14; var normalDelayMin = 456; var normalDelayMax = 890; var mediumDelay = 4321; var longDelayOfBigBook = 7890; var longDelayOfLargeBook = 15431; var totalChapters = 0; for (var index = 0; index < this.Chapters.Count; index++) { if (this.Chapters[index].IsStartsWith("https://isach.info") || this.Chapters[index].IsStartsWith("http://isach.info")) { totalChapters++; } } var chapterCounter = 0; var chapterIndex = -1; while (chapterIndex < this.Chapters.Count) { chapterIndex++; var chapterUrl = chapterIndex < this.Chapters.Count ? this.Chapters[chapterIndex] : ""; if (chapterUrl.IsStartsWith("https://isach.info") || chapterUrl.IsStartsWith("http://isach.info")) { var number = totalChapters > chaptersOfBigBook ? mediumPausePointOfLargeBook : mediumPausePointOfBigBook; var delay = chapterCounter > (number - 1) && chapterCounter % number == 0 ? mediumDelay : UtilityService.GetRandomNumber(normalDelayMin, normalDelayMax); if (totalChapters > chaptersOfLargeBook) { if (chapterCounter > longPausePointOfLargeBook && chapterCounter % (longPausePointOfLargeBook + 1) == 0) { delay = longDelayOfLargeBook; } } else if (totalChapters > chaptersOfBigBook) { if (chapterCounter > longPausePointOfBigBook && chapterCounter % (longPausePointOfBigBook + 1) == 0) { delay = longDelayOfBigBook; } } await Task.Delay(delay, cancellationToken).ConfigureAwait(false); await this.FetchChapterAsync(chapterIndex, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken).ConfigureAwait(false); } chapterCounter++; } } if (this.Chapters.Count > (string.IsNullOrWhiteSpace(url) ? 0 : 1)) { if (parallelExecutions) { await parallelFetch().ConfigureAwait(false); } else { await sequenceFetch().ConfigureAwait(false); } } // download image files if (this.MediaFileUrls.Count > 0) { folderOfImages = folderOfImages ?? "temp"; onStartDownload?.Invoke(this, folderOfImages); await Task.WhenAll(this.MediaFileUrls.Select(uri => UtilityService.DownloadFileAsync(uri, Path.Combine(folderOfImages, this.PermanentID + "-" + uri.GetFilename()), this.SourceUrl, onDownloadCompleted, onDownloadError, cancellationToken))).ConfigureAwait(false); } // normalize TOC this.NormalizeTOC(); // assign identity if (string.IsNullOrWhiteSpace(this.ID) || !this.ID.IsValidUUID()) { this.ID = this.PermanentID; } // done stopwatch.Stop(); onCompleted?.Invoke(this, stopwatch.ElapsedMilliseconds); return(this); }