Beispiel #1
0
        public async Task <IBookParser> FetchAsync(string url = null,
                                                   Action <IBookParser> onStart     = null, Action <IBookParser, long> onParsed = null, Action <IBookParser, long> onCompleted = null,
                                                   Action <int> onStartFetchChapter = null, Action <int, List <string>, long> onFetchChapterCompleted = null, Action <int, Exception> onFetchChapterError = null,
                                                   string directoryOfImages         = null, Action <IBookParser, string> onStartDownload = null, Action <string, string, long> onDownloadCompleted = null, Action <string, Exception> onDownloadError = null,
                                                   bool parallelExecutions          = true, CancellationToken cancellationToken          = default)
        {
            // prepare
            var stopwatch = Stopwatch.StartNew();

            // parse the book
            onStart?.Invoke(this);
            if (!string.IsNullOrWhiteSpace(url))
            {
                await this.ParseAsync(url, onParsed, null, cancellationToken).ConfigureAwait(false);

                // fetch the first chapter
                var first = await this.FetchChapterAsync(0, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken).ConfigureAwait(false);

                // compute other information (original, translator, ...)
                if (string.IsNullOrWhiteSpace(this.Original))
                {
                    this.Original = this.GetOriginal(first);
                }

                if (string.IsNullOrWhiteSpace(this.Translator))
                {
                    this.Translator = this.GetTranslator(first);
                }

                if (string.IsNullOrWhiteSpace(this.Credits))
                {
                    this.Credits = this.GetCredits(first);
                }

                if (first != null && first.Count > 1 && first[1].PositionOf("=\"anhbia\"") > 0)
                {
                    var cover = this.GetCoverImage(first);
                    if (string.IsNullOrWhiteSpace(this.Cover) || !this.Cover.IsEquals(Definitions.MediaURI + cover.GetFilename()))
                    {
                        this.MediaFileUrls.Add(cover);
                        this.Cover = Definitions.MediaURI + cover.GetFilename();
                    }
                    if (this.Chapters[0].PositionOf(cover) > 0)
                    {
                        var start = this.Chapters[0].PositionOf("<img");
                        var end   = this.Chapters[0].PositionOf(">", start);
                        this.Chapters[0] = this.Chapters[0].Remove(start, end - start + 1);
                        this.Chapters[0] = this.Chapters[0].Replace("<p></p>", "").Replace(StringComparison.OrdinalIgnoreCase, "<p align=\"center\"></p>", "");
                    }
                }
            }

            // fetch chapters
            if (this.Chapters.Count > (string.IsNullOrWhiteSpace(url) ? 0 : 1))
            {
                if (parallelExecutions)
                {
                    var tasks = new List <Task <List <string> > >();
                    for (var index = string.IsNullOrWhiteSpace(url) ? 0 : 1; index < this.Chapters.Count; index++)
                    {
                        tasks.Add(this.Chapters[index].IsStartsWith("https://vnthuquan.net") || this.Chapters[index].IsStartsWith("http://vnthuquan.net")
                                                        ? this.FetchChapterAsync(index, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken)
                                                        : Task.FromResult <List <string> >(null)
                                  );
                    }
                    await Task.WhenAll(tasks).ConfigureAwait(false);
                }
                else
                {
                    for (var index = string.IsNullOrWhiteSpace(url) ? 0 : 1; index < this.Chapters.Count; index++)
                    {
                        if (this.Chapters[index].IsStartsWith("https://vnthuquan.net") || this.Chapters[index].IsStartsWith("http://vnthuquan.net"))
                        {
                            await this.FetchChapterAsync(index, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken).ConfigureAwait(false);
                        }
                    }
                }
            }

            // download image files
            if (this.MediaFileUrls.Count > 0)
            {
                directoryOfImages = directoryOfImages ?? "temp";
                onStartDownload?.Invoke(this, directoryOfImages);
                await Task.WhenAll(this.MediaFileUrls.Select(uri => UtilityService.DownloadFileAsync(uri, Path.Combine(directoryOfImages, this.PermanentID + "-" + uri.GetFilename()), this.SourceUrl, onDownloadCompleted, onDownloadError, cancellationToken))).ConfigureAwait(false);
            }

            // normalize TOC
            this.NormalizeTOC();

            // assign identity
            if (string.IsNullOrWhiteSpace(this.ID) || !this.ID.IsValidUUID())
            {
                this.ID = this.PermanentID;
            }

            // done
            stopwatch.Stop();
            onCompleted?.Invoke(this, stopwatch.ElapsedMilliseconds);
            return(this);
        }
Beispiel #2
0
        public async Task <IBookParser> FetchAsync(string url = null,
                                                   Action <IBookParser> onStart     = null, Action <IBookParser, long> onParsed = null, Action <IBookParser, long> onCompleted = null,
                                                   Action <int> onStartFetchChapter = null, Action <int, List <string>, long> onFetchChapterCompleted = null, Action <int, Exception> onFetchChapterError = null,
                                                   string folderOfImages            = null, Action <IBookParser, string> onStartDownload = null, Action <string, string, long> onDownloadCompleted = null, Action <string, Exception> onDownloadError = null,
                                                   bool parallelExecutions          = false, CancellationToken cancellationToken         = default)
        {
            // prepare
            var stopwatch = new Stopwatch();

            stopwatch.Start();

            // parse the book
            onStart?.Invoke(this);
            if (!string.IsNullOrWhiteSpace(url))
            {
                await this.ParseAsync(url, onParsed, null, cancellationToken).ConfigureAwait(false);
            }

            // cover image
            if (!string.IsNullOrWhiteSpace(this.Cover) && !this.Cover.IsStartsWith(Definitions.MediaURI))
            {
                this.MediaFileUrls.Add(this.Cover);
                this.Cover = Definitions.MediaURI + this.Cover.GetFilename();
            }

            // fetch chapters
            async Task parallelFetch()
            {
                var chaptersOfBigBook = 39;
                var normalDelayMin    = 456;
                var normalDelayMax    = 1234;
                var mediumDelayMin    = 2345;
                var mediumDelayMax    = 4321;
                var longDelayMin      = 3456;
                var longDelayMax      = 5678;

                var step  = 7;
                var start = 0;
                var end   = start + step;

                var isCompleted = false;

                while (!isCompleted)
                {
                    var tasks = new List <Task>();
                    for (var index = start; index < end; index++)
                    {
                        if (index >= this.Chapters.Count)
                        {
                            isCompleted = true;
                            break;
                        }

                        if (this.Chapters[index].IsStartsWith("https://isach.info") || this.Chapters[index].IsStartsWith("http://isach.info"))
                        {
                            tasks.Add(Task.Run(async() =>
                            {
                                var delay = this.Chapters.Count > chaptersOfBigBook
                                                                        ? UtilityService.GetRandomNumber(mediumDelayMin, mediumDelayMax)
                                                                        : UtilityService.GetRandomNumber(normalDelayMin, normalDelayMax);
                                await Task.Delay(delay, cancellationToken).ConfigureAwait(false);
                                await this.FetchChapterAsync(index, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken);
                            }, cancellationToken));
                        }
                    }
                    await Task.WhenAll(tasks).ConfigureAwait(false);

                    // go next
                    if (!isCompleted)
                    {
                        start += step;
                        end   += step;
                        if (end <= this.Chapters.Count)
                        {
                            await Task.Delay(UtilityService.GetRandomNumber(longDelayMin, longDelayMax), cancellationToken).ConfigureAwait(false);
                        }
                    }
                }
            }

            async Task sequenceFetch()
            {
                var chaptersOfLargeBook         = 69;
                var mediumPausePointOfLargeBook = 6;
                var longPausePointOfLargeBook   = 29;
                var chaptersOfBigBook           = 29;
                var mediumPausePointOfBigBook   = 3;
                var longPausePointOfBigBook     = 14;
                var normalDelayMin       = 456;
                var normalDelayMax       = 890;
                var mediumDelay          = 4321;
                var longDelayOfBigBook   = 7890;
                var longDelayOfLargeBook = 15431;

                var totalChapters = 0;

                for (var index = 0; index < this.Chapters.Count; index++)
                {
                    if (this.Chapters[index].IsStartsWith("https://isach.info") || this.Chapters[index].IsStartsWith("http://isach.info"))
                    {
                        totalChapters++;
                    }
                }

                var chapterCounter = 0;
                var chapterIndex   = -1;

                while (chapterIndex < this.Chapters.Count)
                {
                    chapterIndex++;
                    var chapterUrl = chapterIndex < this.Chapters.Count ? this.Chapters[chapterIndex] : "";
                    if (chapterUrl.IsStartsWith("https://isach.info") || chapterUrl.IsStartsWith("http://isach.info"))
                    {
                        var number = totalChapters > chaptersOfBigBook
                                                        ? mediumPausePointOfLargeBook
                                                        : mediumPausePointOfBigBook;
                        var delay = chapterCounter > (number - 1) && chapterCounter % number == 0 ? mediumDelay : UtilityService.GetRandomNumber(normalDelayMin, normalDelayMax);
                        if (totalChapters > chaptersOfLargeBook)
                        {
                            if (chapterCounter > longPausePointOfLargeBook && chapterCounter % (longPausePointOfLargeBook + 1) == 0)
                            {
                                delay = longDelayOfLargeBook;
                            }
                        }
                        else if (totalChapters > chaptersOfBigBook)
                        {
                            if (chapterCounter > longPausePointOfBigBook && chapterCounter % (longPausePointOfBigBook + 1) == 0)
                            {
                                delay = longDelayOfBigBook;
                            }
                        }
                        await Task.Delay(delay, cancellationToken).ConfigureAwait(false);

                        await this.FetchChapterAsync(chapterIndex, onStartFetchChapter, onFetchChapterCompleted, onFetchChapterError, cancellationToken).ConfigureAwait(false);
                    }
                    chapterCounter++;
                }
            }

            if (this.Chapters.Count > (string.IsNullOrWhiteSpace(url) ? 0 : 1))
            {
                if (parallelExecutions)
                {
                    await parallelFetch().ConfigureAwait(false);
                }
                else
                {
                    await sequenceFetch().ConfigureAwait(false);
                }
            }

            // download image files
            if (this.MediaFileUrls.Count > 0)
            {
                folderOfImages = folderOfImages ?? "temp";
                onStartDownload?.Invoke(this, folderOfImages);
                await Task.WhenAll(this.MediaFileUrls.Select(uri => UtilityService.DownloadFileAsync(uri, Path.Combine(folderOfImages, this.PermanentID + "-" + uri.GetFilename()), this.SourceUrl, onDownloadCompleted, onDownloadError, cancellationToken))).ConfigureAwait(false);
            }

            // normalize TOC
            this.NormalizeTOC();

            // assign identity
            if (string.IsNullOrWhiteSpace(this.ID) || !this.ID.IsValidUUID())
            {
                this.ID = this.PermanentID;
            }

            // done
            stopwatch.Stop();
            onCompleted?.Invoke(this, stopwatch.ElapsedMilliseconds);
            return(this);
        }