Пример #1
0
 public static async Task <List <string> > GetChapter(string uri, string referUri, CancellationToken cancellationToken)
 {
     return(ISach.ParseChapter(await Utility.GetWebPageAsync(uri.Replace("/mobile//", "/mobile/"), referUri, Utility.SpiderUserAgent, cancellationToken)));
 }
Пример #2
0
        public static async Task <Book> ParseBook(string uri, CancellationToken cancellationToken)
        {
            // get identity
            string url = "/mobile/story.php?story=" + Book.GetIdentity(uri);

            Book book = new Book();

            book.Source    = "isach.info";
            book.SourceUri = "http://isach.info" + url;

            string html = await Utility.GetWebPageAsync(book.SourceUri, ISach.ReferUri, Utility.SpiderUserAgent, cancellationToken);

            // check permission
            if (html.IndexOf("Để đọc tác phẩm này, được yêu cầu phải đăng nhập", StringComparison.OrdinalIgnoreCase) > 0)
            {
                throw new InformationNotFoundException("Access denied: Để đọc tác phẩm này, được yêu cầu phải đăng nhập");
            }

            // title
            int start = html.IndexOf("ms_title", StringComparison.OrdinalIgnoreCase);

            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            int end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);

            if (start > 0 && end > 0)
            {
                book.Title = html.Substring(start + 1, end - start - 1).GetNormalized();
            }

            // author
            start = html.IndexOf("Tác giả:", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Author = Book.GetAuthor(html.Substring(start + 1, end - start - 1).Trim());
            }

            // category
            start = html.IndexOf("Thể loại:", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Category = Book.GetCategory(html.Substring(start + 1, end - start - 1)).GetNormalized();
            }

            // original
            start = html.IndexOf("Nguyên tác:", StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Original = html.Substring(start + 11, end - start - 11).Trim().GetNormalized();
            }

            // translator
            start = html.IndexOf("Dịch giả:", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Translator = html.Substring(start + 1, end - start - 1).Trim().GetNormalized();
            }

            // cover image
            start = html.IndexOf("ms_image", StringComparison.OrdinalIgnoreCase);
            start = start < 0 ? -1 : html.IndexOf("src='", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("'", start + 5, StringComparison.OrdinalIgnoreCase);
            if (start > 0 && end > 0)
            {
                book.Cover = "http://isach.info" + html.Substring(start + 5, end - start - 5).Trim();
            }

            // chapters
            if (!book.Cover.Equals(""))
            {
                start = html.IndexOf("<a href='" + url, StringComparison.OrdinalIgnoreCase);
                end   = start < 0 ? -1 : html.IndexOf("'", start + 9, StringComparison.OrdinalIgnoreCase);
                if (start > -1 && end > -1)
                {
                    string tocUrl = "http://isach.info" + html.Substring(start + 9, end - start - 9).Trim();
                    await Task.Delay(Utility.GetRandomNumber(123, 432));

                    html = await Utility.GetWebPageAsync(tocUrl, url, Utility.SpiderUserAgent, cancellationToken);
                }
            }

            start = html.IndexOf("ms_chapter", StringComparison.OrdinalIgnoreCase);
            if (start < 0)
            {
                start = html.IndexOf("<div id='c0000", StringComparison.OrdinalIgnoreCase);
            }
            start = start < 0 ? -1 : html.IndexOf("<div", start + 1, StringComparison.OrdinalIgnoreCase);
            end   = start < 0 ? -1 : html.IndexOf("</form>", start + 1, StringComparison.OrdinalIgnoreCase);

            if (start < 0 || end < 0)
            {
                List <string> contents = ISach.ParseChapter(html);
                book.Chapters.Add((!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" + "\n" : "") + contents[1]);
            }
            else
            {
                html  = html.Substring(start, end - start).Trim();
                start = html.IndexOf("<a href='", StringComparison.OrdinalIgnoreCase);
                while (start > -1)
                {
                    end = html.IndexOf("'", start + 9, StringComparison.OrdinalIgnoreCase);
                    string chapterUrl = html.Substring(start + 9, end - start - 9).Trim();
                    while (chapterUrl.StartsWith("/"))
                    {
                        chapterUrl = chapterUrl.Right(chapterUrl.Length - 1);
                    }
                    chapterUrl = (!chapterUrl.StartsWith("http://isach.info") ? "http://isach.info/mobile/" : "") + chapterUrl;
                    if (chapterUrl.IndexOf("&chapter=") < 0)
                    {
                        chapterUrl += "&chapter=0001";
                    }

                    book.Chapters.Add(chapterUrl);
                    book.ChapterUrls.Add(chapterUrl);

                    start = html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase) + 1;
                    end   = html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase);
                    book.TOCs.Add(html.Substring(start, end - start).GetNormalized());

                    start = html.IndexOf("<a href='", start + 1, StringComparison.OrdinalIgnoreCase);
                }
            }

            if (book.ChapterUrls.Count < 1 && (book.Chapters.Count < 1 || book.Chapters[0].Equals("")))
            {
                List <string> contents = ISach.ParseChapter(html);
                book.Chapters.Add((!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" + "\n" : "") + contents[1]);
            }

            return(book);
        }