Пример #1
0
        public static async Task <Book> GetBook(string id, string uri, string folder, CancellationToken cancellationToken,
                                                Action <string> onProcess, Action <Book> onParsed, Action <Book> onCompleted, Action <Book, Exception> onError,
                                                Action <string, List <string> > onChapterCompleted, Action <string, Exception> onChapterError,
                                                Action <string, string> onDownloadFileCompleted, Action <string, Exception> onDownloadFileError,
                                                int crawlMethod)
        {
            cancellationToken.ThrowIfCancellationRequested();
            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            // parse book
            Book book = null;

            try
            {
                book = await ISach.ParseBook(uri, cancellationToken);

                book.PermanentID = string.IsNullOrWhiteSpace(id) ? Utility.GetUUID() : id;
                if (string.IsNullOrWhiteSpace(book.Title) && onError != null)
                {
                    onError(book, new InformationInvalidException("The book is invalid"));
                    return(null);
                }
                else if (onParsed != null)
                {
                    onParsed(book);
                }
            }
            catch (Exception ex)
            {
                if (onError != null)
                {
                    onError(book, ex);
                    return(null);
                }
                else
                {
                    throw ex;
                }
            }

            // fetch chapters
            book = await ISach.FetchChapters(book, folder, cancellationToken, onProcess, onChapterCompleted, onChapterError, onDownloadFileCompleted, onDownloadFileError, crawlMethod);

            stopwatch.Stop();
            if (onProcess != null)
            {
                onProcess("..... Total times for processing: " + stopwatch.GetElapsedTimes());
            }

            // callback when done
            if (onCompleted != null)
            {
                onCompleted(book);
            }

            return(book);
        }
Пример #2
0
        public static async Task <BookSelf> GetBookSelf(string urlPattern, List <string> parameters, int currentPage, int totalPages, CancellationToken cancellationToken, Action <BookSelf> onCompleted, Action <BookSelf, Exception> onError)
        {
            BookSelf bookself = new BookSelf();

            bookself.UrlPattern    = string.IsNullOrWhiteSpace(urlPattern) ? "http://isach.info/mobile/story.php?list=story&order=created_date&page={0}" : urlPattern.Trim();
            bookself.UrlParameters = parameters != null && parameters.Count > 0 ? parameters : new List <string>();
            bookself.UrlParameters.Add((currentPage > 1 ? currentPage : 1).ToString());
            bookself.CurrentPage = currentPage > 1 ? currentPage : 1;
            bookself.TotalPages  = totalPages;

            return(await ISach.GetBookSelf(bookself, cancellationToken, onCompleted, onError));
        }
Пример #3
0
        public static List <string> ParseChapter(string html)
        {
            int start = html.IndexOf("<div class='chapter_navigator'>", StringComparison.OrdinalIgnoreCase);

            if (start < 0)
            {
                start = html.IndexOf("<div class='mobile_chapter_navigator'>", StringComparison.OrdinalIgnoreCase) > 0
                                                        ? html.IndexOf("<div class='mobile_chapter_navigator'>", StringComparison.OrdinalIgnoreCase)
                                                        : html.IndexOf("<div id='story_detail'", StringComparison.OrdinalIgnoreCase);
            }
            start = html.IndexOf("ms_chapter", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            int end = start < 0 ? -1 : html.IndexOf("</div>", start + 1, StringComparison.OrdinalIgnoreCase);

            string title = (start > -1 && end > -1 ? html.Substring(start + 1, end - start - 1).Trim() : "").GetNormalized();

            while (title.IndexOf("  ") > -1)
            {
                title = title.Replace("  ", " ");
            }

            if (!title.Equals(""))
            {
                start = html.IndexOf("<div", start + 1, StringComparison.OrdinalIgnoreCase);
                if (title.IndexOf("<div id='dropcap", StringComparison.OrdinalIgnoreCase) > -1 || title.IndexOf("<div id ='dropcap", StringComparison.OrdinalIgnoreCase) > -1)
                {
                    title = "";
                }
                else if (title.ToLower().Equals("null"))
                {
                    title = "";
                }
            }
            else
            {
                start = html.IndexOf("<span class='dropcap", start + 1, StringComparison.OrdinalIgnoreCase);
                if (start < 0)
                {
                    if (html.StartsWith("<div class='ms_text"))
                    {
                        start = 0;
                    }
                    else
                    {
                        start = html.IndexOf("ms_chapter", start + 1, StringComparison.OrdinalIgnoreCase) > 0
                                                                        ? html.IndexOf("ms_chapter", start + 1, StringComparison.OrdinalIgnoreCase)
                                                                        : html.IndexOf("<div style='height: 50px;'></div>", end + 1, StringComparison.OrdinalIgnoreCase) < html.IndexOf("<div class='ms_text'>", end + 1, StringComparison.OrdinalIgnoreCase)
                                                                                ? html.IndexOf("<div style='height: 50px;'></div>", end + 1, StringComparison.OrdinalIgnoreCase)
                                                                                : -1;
                        start = start < 0 ? html.IndexOf("<div class='ms_text'>", end + 1, StringComparison.OrdinalIgnoreCase) : html.IndexOf("</div>", start + 1, StringComparison.OrdinalIgnoreCase) + 6;
                    }
                }
            }

            end = html.IndexOf("<div style='height: 50px;'></div>", start + 1, StringComparison.OrdinalIgnoreCase);
            if (end < 0)
            {
                end = html.IndexOf("<div class='navigator_bottom'>", start + 1, StringComparison.OrdinalIgnoreCase);
                if (end < 0)
                {
                    end = html.IndexOf("<div class='mobile_chapter_navigator'>", start + 1, StringComparison.OrdinalIgnoreCase);
                }
                if (end < 0)
                {
                    end = html.IndexOf("</form>", start + 1, StringComparison.OrdinalIgnoreCase);
                }
            }

            string body = start > -1 && end > -1 ? html.Substring(start, end - start).Trim() : "";

            body = body.Replace(StringComparison.OrdinalIgnoreCase, "<div class='ms_text'>", "<p>").Replace(StringComparison.OrdinalIgnoreCase, "<div", "<p").Replace(StringComparison.OrdinalIgnoreCase, "</div>", "</p>");

            if (body.StartsWith("<span class='dropcap", StringComparison.OrdinalIgnoreCase))
            {
                body = "<p>" + body;
            }

            start = body.IndexOf("<p", StringComparison.OrdinalIgnoreCase);
            end   = body.IndexOf("</p>", start + 1, StringComparison.OrdinalIgnoreCase);
            while (start > -1 && end > -1)
            {
                int dropcap = body.IndexOf("'dropcap", start + 1, StringComparison.OrdinalIgnoreCase);
                if (dropcap > -1 && dropcap < end)
                {
                    string paragraph = body.Substring(start, end - start + 4);
                    body = body.Remove(start, end - start + 4);

                    string dropcapChar = "";
                    dropcap = paragraph.IndexOf("class=");
                    if (dropcap > 0)
                    {
                        dropcap    += 7;
                        dropcapChar = paragraph.Substring(dropcap - 1, 1);
                        end         = paragraph.IndexOf(dropcapChar, dropcap + 1);
                        dropcapChar = paragraph.Substring(dropcap, end - dropcap);
                        dropcapChar = dropcapChar[dropcapChar.Length - 1].ToString();
                    }
                    paragraph = Utility.RemoveTag(Utility.RemoveTag(paragraph, "p"), "span").Trim();
                    if (paragraph.Equals(""))
                    {
                        paragraph = dropcapChar;
                    }
                    body = body.Insert(start, (body.StartsWith("<p>") ? "" : "<p>") + paragraph);
                }

                start = body.IndexOf("<p", start + 1, StringComparison.OrdinalIgnoreCase);
                end   = body.IndexOf("</p>", start + 1, StringComparison.OrdinalIgnoreCase);
            }

            body = ISach.NormalizeBody(body.Replace(" \n", "").Replace("\r", "").Replace("\n", ""));

            if (title.Equals("") &&
                (body.StartsWith("<p>Quyển ", StringComparison.OrdinalIgnoreCase) ||
                 body.StartsWith("<p>Phần ", StringComparison.OrdinalIgnoreCase) || body.StartsWith("<p>Chương ", StringComparison.OrdinalIgnoreCase)))
            {
                start = 0;
                end   = body.IndexOf("</p>") + 4;
                title = Utility.RemoveTag(body.Substring(0, end - start), "p").Trim();
                body  = body.Remove(0, end - start);
            }

            return(new List <string>()
            {
                title, body
            });
        }
Пример #4
0
 public static async Task <List <string> > GetChapter(string uri, string referUri, CancellationToken cancellationToken)
 {
     return(ISach.ParseChapter(await Utility.GetWebPageAsync(uri.Replace("/mobile//", "/mobile/"), referUri, Utility.SpiderUserAgent, cancellationToken)));
 }
Пример #5
0
        public static async Task <Book> FetchChapters(Book book, string folder, CancellationToken cancellationToken, Action <string> onProcess,
                                                      Action <string, List <string> > onChapterCompleted, Action <string, Exception> onChapterError,
                                                      Action <string, string> onDownloadFileCompleted, Action <string, Exception> onDownloadFileError,
                                                      int crawlMethod)
        {
            // fetch chapters
            Func <Task> fastCrawl = async() =>
            {
                int chaptersOfBigBook = 39;
                int normalDelayMin = 456, normalDelayMax = 1234;
                int mediumDelayMin = 2345, mediumDelayMax = 4321, longDelayMin = 3456, longDelayMax = 5678;

                int step = 7, start = 0;
                int end = start + step;

                bool isCompleted = false;
                while (!isCompleted)
                {
                    List <Task> fetchingTasks = new List <Task>();
                    for (int index = start; index < end; index++)
                    {
                        if (index >= book.Chapters.Count)
                        {
                            isCompleted = true;
                            break;
                        }

                        string chapterUrl = book.ChapterUrls[index];
                        if (chapterUrl.Equals("") || !chapterUrl.StartsWith("http://isach.info"))
                        {
                            continue;
                        }

                        string referUri = index > 0 && index < book.ChapterUrls.Count ? book.ChapterUrls[index - 1] : book.SourceUri;
                        if (referUri.Equals(""))
                        {
                            referUri = book.SourceUri;
                        }

                        fetchingTasks.Add(Task.Run(async() =>
                        {
                            int delay = book.ChapterUrls.Count > chaptersOfBigBook
                                                                                                ? Utility.GetRandomNumber(mediumDelayMin, mediumDelayMax)
                                                                                                : Utility.GetRandomNumber(normalDelayMin, normalDelayMax);
                            await Task.Delay(delay, cancellationToken);

                            try
                            {
                                List <string> contents = await ISach.GetChapter(chapterUrl, referUri, cancellationToken);
                                int chapterIndex       = book.ChapterUrls.IndexOf(chapterUrl);
                                if (contents != null && (!contents[0].Equals("") || !contents[1].Equals("")))
                                {
                                    string title = contents[0];
                                    if (string.IsNullOrWhiteSpace(title) && book.TOCs != null && book.TOCs.Count > chapterIndex)
                                    {
                                        title       = book.GetTOCItem(chapterIndex);
                                        contents[0] = title;
                                    }
                                    book.Chapters[chapterIndex] = (!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" : "")
                                                                  + (contents[1].Equals("") ? "--(empty)--" : contents[1]);
                                }

                                if (onChapterCompleted != null)
                                {
                                    contents.Add(chapterIndex.ToString());
                                    contents.Add(book.Chapters.Count.ToString());
                                    onChapterCompleted(chapterUrl, contents);
                                }
                            }
                            catch (Exception ex)
                            {
                                if (onChapterError != null)
                                {
                                    onChapterError(chapterUrl, ex);
                                }
                            }
                        }, cancellationToken));
                    }
                    await Task.WhenAll(fetchingTasks);

                    // go next
                    if (!isCompleted)
                    {
                        start += step;
                        end   += step;
                        if (end <= book.Chapters.Count)
                        {
                            await Task.Delay(Utility.GetRandomNumber(longDelayMin, longDelayMax), cancellationToken);
                        }
                    }
                }
            };

            Func <Task> slowCrawl = async() =>
            {
                int chaptersOfLargeBook = 69, mediumPausePointOfLargeBook = 6, longPausePointOfLargeBook = 29;
                int chaptersOfBigBook = 29, mediumPausePointOfBigBook = 3, longPausePointOfBigBook = 14;
                int normalDelayMin = 456, normalDelayMax = 890, mediumDelay = 4321, longDelayOfBigBook = 7890, longDelayOfLargeBook = 15431;

                int chapterCounter = 0, totalChapters = 0;
                for (int index = 0; index < book.ChapterUrls.Count; index++)
                {
                    if (!book.ChapterUrls[index].Equals("") && book.ChapterUrls[index].StartsWith("http://isach.info"))
                    {
                        totalChapters++;
                    }
                }

                int chapterIndex = -1;
                while (chapterIndex < book.ChapterUrls.Count)
                {
                    chapterIndex++;
                    string chapterUrl = chapterIndex < book.ChapterUrls.Count ? book.ChapterUrls[chapterIndex] : "";
                    if (chapterUrl.Equals("") || !chapterUrl.StartsWith("http://isach.info"))
                    {
                        continue;
                    }

                    int number = totalChapters > chaptersOfBigBook ? mediumPausePointOfLargeBook : mediumPausePointOfBigBook;
                    int delay  = chapterCounter > (number - 1) && chapterCounter % number == 0 ? mediumDelay : Utility.GetRandomNumber(normalDelayMin, normalDelayMax);
                    if (totalChapters > chaptersOfLargeBook)
                    {
                        if (chapterCounter > longPausePointOfLargeBook && chapterCounter % (longPausePointOfLargeBook + 1) == 0)
                        {
                            if (onProcess != null)
                            {
                                onProcess("\r\n" + "..... Wait for few seconds before continue with more chapters......." + "\r\n");
                            }
                            delay = longDelayOfLargeBook;
                        }
                    }
                    else if (totalChapters > chaptersOfBigBook)
                    {
                        if (chapterCounter > longPausePointOfBigBook && chapterCounter % (longPausePointOfBigBook + 1) == 0)
                        {
                            if (onProcess != null)
                            {
                                onProcess("\r\n" + "..... Wait for few seconds before continue with more chapters......." + "\r\n");
                            }
                            delay = longDelayOfBigBook;
                        }
                    }
                    await Task.Delay(delay, cancellationToken);

                    try
                    {
                        string referUri = chapterIndex > 0 && chapterIndex < book.ChapterUrls.Count ? book.ChapterUrls[chapterIndex - 1] : book.SourceUri;
                        if (referUri.Equals(""))
                        {
                            referUri = book.SourceUri;
                        }

                        List <string> contents = await ISach.GetChapter(chapterUrl, referUri, cancellationToken);

                        cancellationToken.ThrowIfCancellationRequested();

                        if (contents != null && (!contents[0].Equals("") || !contents[1].Equals("")))
                        {
                            string title = contents[0];
                            if (string.IsNullOrWhiteSpace(title) && book.TOCs != null && book.TOCs.Count > chapterIndex)
                            {
                                title       = book.GetTOCItem(chapterIndex);
                                contents[0] = title;
                            }
                            else if (book.TOCs != null && book.TOCs.Count > chapterIndex && book.TOCs[chapterIndex].IndexOf(title, StringComparison.OrdinalIgnoreCase) < 0)
                            {
                                book.TOCs[chapterIndex] = title;
                            }
                            book.Chapters[chapterIndex] = (!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" : "")
                                                          + (contents[1].Equals("") ? "--(empty)--" : contents[1]);
                        }

                        if (onChapterCompleted != null)
                        {
                            contents.Add((chapterIndex + 1).ToString());
                            contents.Add(book.Chapters.Count.ToString());
                            onChapterCompleted(chapterUrl, contents);
                        }
                    }
                    catch (Exception ex)
                    {
                        if (onChapterError != null)
                        {
                            onChapterError(chapterUrl, ex);
                        }
                    }
                    chapterCounter++;
                }
            };

            bool useFastMethod = crawlMethod.Equals((int)CrawMethods.Fast);

            if (!useFastMethod && !crawlMethod.Equals((int)CrawMethods.Slow))
            {
                useFastMethod = Utility.GetRandomNumber() % 7 == 0;
            }

            if (useFastMethod)
            {
                await fastCrawl();
            }
            else
            {
                await slowCrawl();
            }

            // download media files
            List <Task> downloadingTasks = new List <Task>();
            string      folderPath       = (string.IsNullOrWhiteSpace(folder) ? "" : folder + "\\") + Utils.MediaFolder;

            if (!string.IsNullOrWhiteSpace(book.Cover) && !book.Cover.StartsWith(Utils.MediaUri))
            {
                string filename = Utils.GetFilename(book.Cover);
                book.MediaFiles.Add(filename);

                string referUri = book.ChapterUrls.Count > 0 ? book.ChapterUrls[0] : ISach.ReferUri;
                if (referUri.IndexOf("&chapter=") > 0)
                {
                    referUri = referUri.Substring(0, referUri.IndexOf("&chapter="));
                }
                downloadingTasks.Add(Utils.DownloadFileAsync(book.Cover, referUri, folderPath, book.PermanentID, cancellationToken, onDownloadFileCompleted, onDownloadFileError));

                book.Cover = Utils.MediaUri + filename;
            }

            for (int index = 0; index < book.Chapters.Count; index++)
            {
                object[] data = Utils.NormalizeMediaFiles(book.Chapters[index]);
                if (data == null || data.Length < 1)
                {
                    continue;
                }

                book.Chapters[index] = data[0] as string;
                foreach (string fileUri in data[1] as List <string> )
                {
                    if (fileUri.StartsWith(Utils.MediaUri))
                    {
                        continue;
                    }

                    string uri      = (!fileUri.StartsWith("http://") ? "http://isach.info" : "") + (!fileUri.StartsWith("/") ? "/" : "") + fileUri;
                    string filename = Utils.GetFilename(uri);
                    if (book.MediaFiles.Contains(filename))
                    {
                        continue;
                    }

                    book.MediaFiles.Add(filename);
                    downloadingTasks.Add(Utils.DownloadFileAsync(uri, ISach.ReferUri, folderPath, book.PermanentID, cancellationToken, onDownloadFileCompleted, onDownloadFileError));
                }
            }
            await Task.WhenAll(downloadingTasks);

            // normalize TOC
            book.NormalizeTOCs();

            // return information
            return(book);
        }
Пример #6
0
        public static async Task <Book> ParseBook(string uri, CancellationToken cancellationToken)
        {
            // get identity
            string url = "/mobile/story.php?story=" + Book.GetIdentity(uri);

            Book book = new Book();

            book.Source    = "isach.info";
            book.SourceUri = "http://isach.info" + url;

            string html = await Utility.GetWebPageAsync(book.SourceUri, ISach.ReferUri, Utility.SpiderUserAgent, cancellationToken);

            // check permission
            if (html.IndexOf("Để đọc tác phẩm này, được yêu cầu phải đăng nhập", StringComparison.OrdinalIgnoreCase) > 0)
            {
                throw new InformationNotFoundException("Access denied: Để đọc tác phẩm này, được yêu cầu phải đăng nhập");
            }

            // title
            int start = html.IndexOf("ms_title", StringComparison.OrdinalIgnoreCase);

            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            int end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);

            if (start > 0 && end > 0)
            {
                book.Title = html.Substring(start + 1, end - start - 1).GetNormalized();
            }

            // author
            start = html.IndexOf("Tác giả:", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Author = Book.GetAuthor(html.Substring(start + 1, end - start - 1).Trim());
            }

            // category
            start = html.IndexOf("Thể loại:", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Category = Book.GetCategory(html.Substring(start + 1, end - start - 1)).GetNormalized();
            }

            // original
            start = html.IndexOf("Nguyên tác:", StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Original = html.Substring(start + 11, end - start - 11).Trim().GetNormalized();
            }

            // translator
            start = html.IndexOf("Dịch giả:", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Translator = html.Substring(start + 1, end - start - 1).Trim().GetNormalized();
            }

            // cover image
            start = html.IndexOf("ms_image", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("src='", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("'", start + 5, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Cover = "http://isach.info" + html.Substring(start + 5, end - start - 5).Trim();
            }

            // chapters
            if (!book.Cover.Equals(""))
            {
                start = html.IndexOf("<a href='" + url, StringComparison.OrdinalIgnoreCase);
                end   = start < 0 ? -1 : html.IndexOf("'", start + 9, StringComparison.OrdinalIgnoreCase);
                if (start > -1 && end > -1)
                {
                    string tocUrl = "http://isach.info" + html.Substring(start + 9, end - start - 9).Trim();
                    await Task.Delay(Utility.GetRandomNumber(123, 432));

                    html = await Utility.GetWebPageAsync(tocUrl, url, Utility.SpiderUserAgent, cancellationToken);
                }
            }

            start = html.IndexOf("ms_chapter", StringComparison.OrdinalIgnoreCase);
            if (start < 0)
            {
                start = html.IndexOf("<div id='c0000", StringComparison.OrdinalIgnoreCase);
            }
            start = start < 0 ? -1 : html.IndexOf("<div", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("</form>", start + 1, StringComparison.OrdinalIgnoreCase);

            if (start < 0 || end < 0)
            {
                List <string> contents = ISach.ParseChapter(html);
                book.Chapters.Add((!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" + "\n" : "") + contents[1]);
            }
            else
            {
                html  = html.Substring(start, end - start).Trim();
                start = html.IndexOf("<a href='", StringComparison.OrdinalIgnoreCase);
                while (start > -1)
                {
                    end = html.IndexOf("'", start + 9, StringComparison.OrdinalIgnoreCase);
                    string chapterUrl = html.Substring(start + 9, end - start - 9).Trim();
                    while (chapterUrl.StartsWith("/"))
                    {
                        chapterUrl = chapterUrl.Right(chapterUrl.Length - 1);
                    }
                    chapterUrl = (!chapterUrl.StartsWith("http://isach.info") ? "http://isach.info/mobile/" : "") + chapterUrl;
                    if (chapterUrl.IndexOf("&chapter=") < 0)
                    {
                        chapterUrl += "&chapter=0001";
                    }

                    book.Chapters.Add(chapterUrl);
                    book.ChapterUrls.Add(chapterUrl);

                    start = html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase) + 1;
                    end   = html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
                    book.TOCs.Add(html.Substring(start, end - start).GetNormalized());

                    start = html.IndexOf("<a href='", start + 1, StringComparison.OrdinalIgnoreCase);
                }
            }

            if (book.ChapterUrls.Count < 1 && (book.Chapters.Count < 1 || book.Chapters[0].Equals("")))
            {
                List <string> contents = ISach.ParseChapter(html);
                book.Chapters.Add((!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" + "\n" : "") + contents[1]);
            }

            return(book);
        }
Пример #7
0
 public static string NormalizeBody(string input)
 {
     return(ISach.NormalizeBody(input, -1));
 }