public static async Task <Book> FetchChapters(Book book, string folder, CancellationToken cancellationToken, Action <string> onProcess, Action <string, List <string> > onChapterCompleted, Action <string, Exception> onChapterError, Action <string, string> onDownloadFileCompleted, Action <string, Exception> onDownloadFileError, int crawlMethod) { // fetch chapters Func <Task> fastCrawl = async() => { int chaptersOfBigBook = 39; int normalDelayMin = 456, normalDelayMax = 1234; int mediumDelayMin = 2345, mediumDelayMax = 4321, longDelayMin = 3456, longDelayMax = 5678; int step = 7, start = 0; int end = start + step; bool isCompleted = false; while (!isCompleted) { List <Task> fetchingTasks = new List <Task>(); for (int index = start; index < end; index++) { if (index >= book.Chapters.Count) { isCompleted = true; break; } string chapterUrl = book.ChapterUrls[index]; if (chapterUrl.Equals("") || !chapterUrl.StartsWith("http://isach.info")) { continue; } string referUri = index > 0 && index < book.ChapterUrls.Count ? book.ChapterUrls[index - 1] : book.SourceUri; if (referUri.Equals("")) { referUri = book.SourceUri; } fetchingTasks.Add(Task.Run(async() => { int delay = book.ChapterUrls.Count > chaptersOfBigBook ? Utility.GetRandomNumber(mediumDelayMin, mediumDelayMax) : Utility.GetRandomNumber(normalDelayMin, normalDelayMax); await Task.Delay(delay, cancellationToken); try { List <string> contents = await ISach.GetChapter(chapterUrl, referUri, cancellationToken); int chapterIndex = book.ChapterUrls.IndexOf(chapterUrl); if (contents != null && (!contents[0].Equals("") || !contents[1].Equals(""))) { string title = contents[0]; if (string.IsNullOrWhiteSpace(title) && book.TOCs != null && book.TOCs.Count > chapterIndex) { title = book.GetTOCItem(chapterIndex); contents[0] = title; } book.Chapters[chapterIndex] = (!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" : "") + (contents[1].Equals("") ? "--(empty)--" : contents[1]); } if (onChapterCompleted != null) { contents.Add(chapterIndex.ToString()); contents.Add(book.Chapters.Count.ToString()); onChapterCompleted(chapterUrl, contents); } } catch (Exception ex) { if (onChapterError != null) { onChapterError(chapterUrl, ex); } } }, cancellationToken)); } await Task.WhenAll(fetchingTasks); // go next if (!isCompleted) { start += step; end += step; if (end <= book.Chapters.Count) { await Task.Delay(Utility.GetRandomNumber(longDelayMin, longDelayMax), cancellationToken); } } } }; Func <Task> slowCrawl = async() => { int chaptersOfLargeBook = 69, mediumPausePointOfLargeBook = 6, longPausePointOfLargeBook = 29; int chaptersOfBigBook = 29, mediumPausePointOfBigBook = 3, longPausePointOfBigBook = 14; int normalDelayMin = 456, normalDelayMax = 890, mediumDelay = 4321, longDelayOfBigBook = 7890, longDelayOfLargeBook = 15431; int chapterCounter = 0, totalChapters = 0; for (int index = 0; index < book.ChapterUrls.Count; index++) { if (!book.ChapterUrls[index].Equals("") && book.ChapterUrls[index].StartsWith("http://isach.info")) { totalChapters++; } } int chapterIndex = -1; while (chapterIndex < book.ChapterUrls.Count) { chapterIndex++; string chapterUrl = chapterIndex < book.ChapterUrls.Count ? book.ChapterUrls[chapterIndex] : ""; if (chapterUrl.Equals("") || !chapterUrl.StartsWith("http://isach.info")) { continue; } int number = totalChapters > chaptersOfBigBook ? mediumPausePointOfLargeBook : mediumPausePointOfBigBook; int delay = chapterCounter > (number - 1) && chapterCounter % number == 0 ? mediumDelay : Utility.GetRandomNumber(normalDelayMin, normalDelayMax); if (totalChapters > chaptersOfLargeBook) { if (chapterCounter > longPausePointOfLargeBook && chapterCounter % (longPausePointOfLargeBook + 1) == 0) { if (onProcess != null) { onProcess("\r\n" + "..... Wait for few seconds before continue with more chapters......." + "\r\n"); } delay = longDelayOfLargeBook; } } else if (totalChapters > chaptersOfBigBook) { if (chapterCounter > longPausePointOfBigBook && chapterCounter % (longPausePointOfBigBook + 1) == 0) { if (onProcess != null) { onProcess("\r\n" + "..... Wait for few seconds before continue with more chapters......." + "\r\n"); } delay = longDelayOfBigBook; } } await Task.Delay(delay, cancellationToken); try { string referUri = chapterIndex > 0 && chapterIndex < book.ChapterUrls.Count ? book.ChapterUrls[chapterIndex - 1] : book.SourceUri; if (referUri.Equals("")) { referUri = book.SourceUri; } List <string> contents = await ISach.GetChapter(chapterUrl, referUri, cancellationToken); cancellationToken.ThrowIfCancellationRequested(); if (contents != null && (!contents[0].Equals("") || !contents[1].Equals(""))) { string title = contents[0]; if (string.IsNullOrWhiteSpace(title) && book.TOCs != null && book.TOCs.Count > chapterIndex) { title = book.GetTOCItem(chapterIndex); contents[0] = title; } else if (book.TOCs != null && book.TOCs.Count > chapterIndex && book.TOCs[chapterIndex].IndexOf(title, StringComparison.OrdinalIgnoreCase) < 0) { book.TOCs[chapterIndex] = title; } book.Chapters[chapterIndex] = (!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" : "") + (contents[1].Equals("") ? "--(empty)--" : contents[1]); } if (onChapterCompleted != null) { contents.Add((chapterIndex + 1).ToString()); contents.Add(book.Chapters.Count.ToString()); onChapterCompleted(chapterUrl, contents); } } catch (Exception ex) { if (onChapterError != null) { onChapterError(chapterUrl, ex); } } chapterCounter++; } }; bool useFastMethod = crawlMethod.Equals((int)CrawMethods.Fast); if (!useFastMethod && !crawlMethod.Equals((int)CrawMethods.Slow)) { useFastMethod = Utility.GetRandomNumber() % 7 == 0; } if (useFastMethod) { await fastCrawl(); } else { await slowCrawl(); } // download media files List <Task> downloadingTasks = new List <Task>(); string folderPath = (string.IsNullOrWhiteSpace(folder) ? "" : folder + "\\") + Utils.MediaFolder; if (!string.IsNullOrWhiteSpace(book.Cover) && !book.Cover.StartsWith(Utils.MediaUri)) { string filename = Utils.GetFilename(book.Cover); book.MediaFiles.Add(filename); string referUri = book.ChapterUrls.Count > 0 ? book.ChapterUrls[0] : ISach.ReferUri; if (referUri.IndexOf("&chapter=") > 0) { referUri = referUri.Substring(0, referUri.IndexOf("&chapter=")); } downloadingTasks.Add(Utils.DownloadFileAsync(book.Cover, referUri, folderPath, book.PermanentID, cancellationToken, onDownloadFileCompleted, onDownloadFileError)); book.Cover = Utils.MediaUri + filename; } for (int index = 0; index < book.Chapters.Count; index++) { object[] data = Utils.NormalizeMediaFiles(book.Chapters[index]); if (data == null || data.Length < 1) { continue; } book.Chapters[index] = data[0] as string; foreach (string fileUri in data[1] as List <string> ) { if (fileUri.StartsWith(Utils.MediaUri)) { continue; } string uri = (!fileUri.StartsWith("http://") ? "http://isach.info" : "") + (!fileUri.StartsWith("/") ? "/" : "") + fileUri; string filename = Utils.GetFilename(uri); if (book.MediaFiles.Contains(filename)) { continue; } book.MediaFiles.Add(filename); downloadingTasks.Add(Utils.DownloadFileAsync(uri, ISach.ReferUri, folderPath, book.PermanentID, cancellationToken, onDownloadFileCompleted, onDownloadFileError)); } } await Task.WhenAll(downloadingTasks); // normalize TOC book.NormalizeTOCs(); // return information return(book); }