public static async Task <List <string> > GetChapter(string uri, string referUri, CancellationToken cancellationToken) { return(ISach.ParseChapter(await Utility.GetWebPageAsync(uri.Replace("/mobile//", "/mobile/"), referUri, Utility.SpiderUserAgent, cancellationToken))); }
public static async Task <Book> ParseBook(string uri, CancellationToken cancellationToken) { // get identity string url = "/mobile/story.php?story=" + Book.GetIdentity(uri); Book book = new Book(); book.Source = "isach.info"; book.SourceUri = "http://isach.info" + url; string html = await Utility.GetWebPageAsync(book.SourceUri, ISach.ReferUri, Utility.SpiderUserAgent, cancellationToken); // check permission if (html.IndexOf("Để đọc tác phẩm này, được yêu cầu phải đăng nhập", StringComparison.OrdinalIgnoreCase) > 0) { throw new InformationNotFoundException("Access denied: Để đọc tác phẩm này, được yêu cầu phải đăng nhập"); } // title int start = html.IndexOf("ms_title", StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase); int end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase); if (start > 0 && end > 0) { book.Title = html.Substring(start + 1, end - start - 1).GetNormalized(); } // author start = html.IndexOf("Tác giả:", StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase); if (start > 0 && end > 0) { book.Author = Book.GetAuthor(html.Substring(start + 1, end - start - 1).Trim()); } // category start = html.IndexOf("Thể loại:", StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase); if (start > 0 && end > 0) { book.Category = Book.GetCategory(html.Substring(start + 1, end - start - 1)).GetNormalized(); } // original start = html.IndexOf("Nguyên tác:", StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase); if (start > 0 && end > 0) { book.Original = html.Substring(start + 11, end - start - 11).Trim().GetNormalized(); } // translator start = html.IndexOf("Dịch giả:", StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf("<a", start + 1, StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase); if (start > 0 && end > 0) { book.Translator = html.Substring(start + 1, end - start - 1).Trim().GetNormalized(); } // cover image start = html.IndexOf("ms_image", StringComparison.OrdinalIgnoreCase); start = start < 0 ? -1 : html.IndexOf("src='", start + 1, StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("'", start + 5, StringComparison.OrdinalIgnoreCase); if (start > 0 && end > 0) { book.Cover = "http://isach.info" + html.Substring(start + 5, end - start - 5).Trim(); } // chapters if (!book.Cover.Equals("")) { start = html.IndexOf("<a href='" + url, StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("'", start + 9, StringComparison.OrdinalIgnoreCase); if (start > -1 && end > -1) { string tocUrl = "http://isach.info" + html.Substring(start + 9, end - start - 9).Trim(); await Task.Delay(Utility.GetRandomNumber(123, 432)); html = await Utility.GetWebPageAsync(tocUrl, url, Utility.SpiderUserAgent, cancellationToken); } } start = html.IndexOf("ms_chapter", StringComparison.OrdinalIgnoreCase); if (start < 0) { start = html.IndexOf("<div id='c0000", StringComparison.OrdinalIgnoreCase); } start = start < 0 ? -1 : html.IndexOf("<div", start + 1, StringComparison.OrdinalIgnoreCase); end = start < 0 ? -1 : html.IndexOf("</form>", start + 1, StringComparison.OrdinalIgnoreCase); if (start < 0 || end < 0) { List <string> contents = ISach.ParseChapter(html); book.Chapters.Add((!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" + "\n" : "") + contents[1]); } else { html = html.Substring(start, end - start).Trim(); start = html.IndexOf("<a href='", StringComparison.OrdinalIgnoreCase); while (start > -1) { end = html.IndexOf("'", start + 9, StringComparison.OrdinalIgnoreCase); string chapterUrl = html.Substring(start + 9, end - start - 9).Trim(); while (chapterUrl.StartsWith("/")) { chapterUrl = chapterUrl.Right(chapterUrl.Length - 1); } chapterUrl = (!chapterUrl.StartsWith("http://isach.info") ? "http://isach.info/mobile/" : "") + chapterUrl; if (chapterUrl.IndexOf("&chapter=") < 0) { chapterUrl += "&chapter=0001"; } book.Chapters.Add(chapterUrl); book.ChapterUrls.Add(chapterUrl); start = html.IndexOf(">", start + 1, StringComparison.OrdinalIgnoreCase) + 1; end = html.IndexOf("<", start + 1, StringComparison.OrdinalIgnoreCase); book.TOCs.Add(html.Substring(start, end - start).GetNormalized()); start = html.IndexOf("<a href='", start + 1, StringComparison.OrdinalIgnoreCase); } } if (book.ChapterUrls.Count < 1 && (book.Chapters.Count < 1 || book.Chapters[0].Equals(""))) { List <string> contents = ISach.ParseChapter(html); book.Chapters.Add((!string.IsNullOrWhiteSpace(contents[0]) ? "<h1>" + contents[0] + "</h1>" + "\n" : "") + contents[1]); } return(book); }