Exemple #1
0
        public static async Task <Book> FetchChapters(Book book, string folder, CancellationToken cancellationToken, Action <string> onProcess,
                                                      Action <string, List <string> > onChapterCompleted, Action <string, Exception> onChapterError,
                                                      Action <string, string> onDownloadFileCompleted, Action <string, Exception> onDownloadFileError,
                                                      int crawlMethod)
        {
            // fetch chapters
            Func <Task> fastCrawl = async() =>
            {
                int chaptersOfBigBook = 39;
                int normalDelayMin = 456, normalDelayMax = 1234;
                int mediumDelayMin = 2345, mediumDelayMax = 4321, longDelayMin = 3456, longDelayMax = 5678;

                int step = 7, start = 0;
                int end = start + step;

                bool isCompleted = false;
                while (!isCompleted)
                {
                    List <Task> fetchingTasks = new List <Task>();
                    for (int index = start; index < end; index++)
                    {
                        if (index >= book.Chapters.Count)
                        {
                            isCompleted = true;
                            break;
                        }

                        string chapterUrl = book.ChapterUrls[index];
                        if (chapterUrl.Equals("") || !chapterUrl.StartsWith("http://isach.info"))
                        {
                            continue;
                        }

                        string referUri = index > 0 && index < book.ChapterUrls.Count ? book.ChapterUrls[index - 1] : book.SourceUri;
                        if (referUri.Equals(""))
                        {
                            referUri = book.SourceUri;
                        }

                        fetchingTasks.Add(Task.Run(async() =>
                        {
                            int delay = book.ChapterUrls.Count > chaptersOfBigBook
                                                                                                ? Utility.GetRandomNumber(mediumDelayMin, mediumDelayMax)
                                                                                                : Utility.GetRandomNumber(normalDelayMin, normalDelayMax);
                            await Task.Delay(delay, cancellationToken);

                            try
                            {
                                List <string> contents = await ISach.GetChapter(chapterUrl, referUri, cancellationToken);
                                int chapterIndex       = book.ChapterUrls.IndexOf(chapterUrl);
                                if (contents != null && (!contents[0].Equals("") || !contents[1].Equals("")))
                                {
                                    string title = contents[0];
                                    if (string.IsNullOrWhiteSpace(title) && book.TOCs != null && book.TOCs.Count > chapterIndex)
                                    {
                                        title       = book.GetTOCItem(chapterIndex);
                                        contents[0] = title;
                                    }
                                    book.Chapters[chapterIndex] = (!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" : "")
                                                                  + (contents[1].Equals("") ? "--(empty)--" : contents[1]);
                                }

                                if (onChapterCompleted != null)
                                {
                                    contents.Add(chapterIndex.ToString());
                                    contents.Add(book.Chapters.Count.ToString());
                                    onChapterCompleted(chapterUrl, contents);
                                }
                            }
                            catch (Exception ex)
                            {
                                if (onChapterError != null)
                                {
                                    onChapterError(chapterUrl, ex);
                                }
                            }
                        }, cancellationToken));
                    }
                    await Task.WhenAll(fetchingTasks);

                    // go next
                    if (!isCompleted)
                    {
                        start += step;
                        end   += step;
                        if (end <= book.Chapters.Count)
                        {
                            await Task.Delay(Utility.GetRandomNumber(longDelayMin, longDelayMax), cancellationToken);
                        }
                    }
                }
            };

            Func <Task> slowCrawl = async() =>
            {
                int chaptersOfLargeBook = 69, mediumPausePointOfLargeBook = 6, longPausePointOfLargeBook = 29;
                int chaptersOfBigBook = 29, mediumPausePointOfBigBook = 3, longPausePointOfBigBook = 14;
                int normalDelayMin = 456, normalDelayMax = 890, mediumDelay = 4321, longDelayOfBigBook = 7890, longDelayOfLargeBook = 15431;

                int chapterCounter = 0, totalChapters = 0;
                for (int index = 0; index < book.ChapterUrls.Count; index++)
                {
                    if (!book.ChapterUrls[index].Equals("") && book.ChapterUrls[index].StartsWith("http://isach.info"))
                    {
                        totalChapters++;
                    }
                }

                int chapterIndex = -1;
                while (chapterIndex < book.ChapterUrls.Count)
                {
                    chapterIndex++;
                    string chapterUrl = chapterIndex < book.ChapterUrls.Count ? book.ChapterUrls[chapterIndex] : "";
                    if (chapterUrl.Equals("") || !chapterUrl.StartsWith("http://isach.info"))
                    {
                        continue;
                    }

                    int number = totalChapters > chaptersOfBigBook ? mediumPausePointOfLargeBook : mediumPausePointOfBigBook;
                    int delay  = chapterCounter > (number - 1) && chapterCounter % number == 0 ? mediumDelay : Utility.GetRandomNumber(normalDelayMin, normalDelayMax);
                    if (totalChapters > chaptersOfLargeBook)
                    {
                        if (chapterCounter > longPausePointOfLargeBook && chapterCounter % (longPausePointOfLargeBook + 1) == 0)
                        {
                            if (onProcess != null)
                            {
                                onProcess("\r\n" + "..... Wait for few seconds before continue with more chapters......." + "\r\n");
                            }
                            delay = longDelayOfLargeBook;
                        }
                    }
                    else if (totalChapters > chaptersOfBigBook)
                    {
                        if (chapterCounter > longPausePointOfBigBook && chapterCounter % (longPausePointOfBigBook + 1) == 0)
                        {
                            if (onProcess != null)
                            {
                                onProcess("\r\n" + "..... Wait for few seconds before continue with more chapters......." + "\r\n");
                            }
                            delay = longDelayOfBigBook;
                        }
                    }
                    await Task.Delay(delay, cancellationToken);

                    try
                    {
                        string referUri = chapterIndex > 0 && chapterIndex < book.ChapterUrls.Count ? book.ChapterUrls[chapterIndex - 1] : book.SourceUri;
                        if (referUri.Equals(""))
                        {
                            referUri = book.SourceUri;
                        }

                        List <string> contents = await ISach.GetChapter(chapterUrl, referUri, cancellationToken);

                        cancellationToken.ThrowIfCancellationRequested();

                        if (contents != null && (!contents[0].Equals("") || !contents[1].Equals("")))
                        {
                            string title = contents[0];
                            if (string.IsNullOrWhiteSpace(title) && book.TOCs != null && book.TOCs.Count > chapterIndex)
                            {
                                title       = book.GetTOCItem(chapterIndex);
                                contents[0] = title;
                            }
                            else if (book.TOCs != null && book.TOCs.Count > chapterIndex && book.TOCs[chapterIndex].IndexOf(title, StringComparison.OrdinalIgnoreCase) < 0)
                            {
                                book.TOCs[chapterIndex] = title;
                            }
                            book.Chapters[chapterIndex] = (!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" : "")
                                                          + (contents[1].Equals("") ? "--(empty)--" : contents[1]);
                        }

                        if (onChapterCompleted != null)
                        {
                            contents.Add((chapterIndex + 1).ToString());
                            contents.Add(book.Chapters.Count.ToString());
                            onChapterCompleted(chapterUrl, contents);
                        }
                    }
                    catch (Exception ex)
                    {
                        if (onChapterError != null)
                        {
                            onChapterError(chapterUrl, ex);
                        }
                    }
                    chapterCounter++;
                }
            };

            bool useFastMethod = crawlMethod.Equals((int)CrawMethods.Fast);

            if (!useFastMethod && !crawlMethod.Equals((int)CrawMethods.Slow))
            {
                useFastMethod = Utility.GetRandomNumber() % 7 == 0;
            }

            if (useFastMethod)
            {
                await fastCrawl();
            }
            else
            {
                await slowCrawl();
            }

            // download media files
            List <Task> downloadingTasks = new List <Task>();
            string      folderPath       = (string.IsNullOrWhiteSpace(folder) ? "" : folder + "\\") + Utils.MediaFolder;

            if (!string.IsNullOrWhiteSpace(book.Cover) && !book.Cover.StartsWith(Utils.MediaUri))
            {
                string filename = Utils.GetFilename(book.Cover);
                book.MediaFiles.Add(filename);

                string referUri = book.ChapterUrls.Count > 0 ? book.ChapterUrls[0] : ISach.ReferUri;
                if (referUri.IndexOf("&chapter=") > 0)
                {
                    referUri = referUri.Substring(0, referUri.IndexOf("&chapter="));
                }
                downloadingTasks.Add(Utils.DownloadFileAsync(book.Cover, referUri, folderPath, book.PermanentID, cancellationToken, onDownloadFileCompleted, onDownloadFileError));

                book.Cover = Utils.MediaUri + filename;
            }

            for (int index = 0; index < book.Chapters.Count; index++)
            {
                object[] data = Utils.NormalizeMediaFiles(book.Chapters[index]);
                if (data == null || data.Length < 1)
                {
                    continue;
                }

                book.Chapters[index] = data[0] as string;
                foreach (string fileUri in data[1] as List <string> )
                {
                    if (fileUri.StartsWith(Utils.MediaUri))
                    {
                        continue;
                    }

                    string uri      = (!fileUri.StartsWith("http://") ? "http://isach.info" : "") + (!fileUri.StartsWith("/") ? "/" : "") + fileUri;
                    string filename = Utils.GetFilename(uri);
                    if (book.MediaFiles.Contains(filename))
                    {
                        continue;
                    }

                    book.MediaFiles.Add(filename);
                    downloadingTasks.Add(Utils.DownloadFileAsync(uri, ISach.ReferUri, folderPath, book.PermanentID, cancellationToken, onDownloadFileCompleted, onDownloadFileError));
                }
            }
            await Task.WhenAll(downloadingTasks);

            // normalize TOC
            book.NormalizeTOCs();

            // return information
            return(book);
        }