public BookChapterDto GetBookChapters(string bookLink) { string url = bookLink; string jsonRes = Utils.HttpHelper.Get(url); var jsonObj = JsonConvert.DeserializeObject <BookChapterResModel>(jsonRes); if (jsonObj.Code < 0) { throw new UserFriendlyException(jsonObj.Msg); } if (jsonObj.Data == null) { return(null); } var responseModel = new BookChapterDto() { BookName = jsonObj.Data.Book_Name, BookLink = bookLink, Author = jsonObj.Data.Author, Status = "--", Last_Update_Time = jsonObj.Data.Update_Time.ToString("yyyy-MM-dd HH:mm:ss"), Last_Update_ChapterName = jsonObj.Data.Last_Update_ChapterName, Last_Update_ChapterLink = jsonObj.Data.Last_Update_ChapterLink, Intro = jsonObj.Data.Book_Intro }; if (jsonObj.Data.Chapters != null && jsonObj.Data.Chapters.Any()) { responseModel.Chapterlist = jsonObj.Data.Chapters.Select(s => new BookChapterDto.ChapterlistModel() { ChapterName = s.Name, ChapterLink = $"{SpiderRemoteUrl}/Book/LiteratureForeign/GetContent?id={s.Id}" }).ToList(); } return(responseModel); }
/// <summary> /// 根据书本介绍页获取书本信息 /// </summary> /// <param name="bookLink"></param> /// <returns></returns> public BookChapterDto GetBookChapters(string bookLink) { HtmlWeb webClient; HtmlDocument doc; //这里两次请求是为了。。。 嗯,错误请求重试 try { try { webClient = new HtmlWeb(); //webClient.OverrideEncoding = Encoding.UTF8; SetGZipHeader(webClient); doc = webClient.Load(bookLink); } catch { Thread.Sleep(2000); webClient = new HtmlWeb(); //webClient.OverrideEncoding = Encoding.UTF8; SetGZipHeader(webClient); doc = webClient.Load(bookLink); } } catch (Exception ex) { throw new UserFriendlyException($"抓取网站请求失败,{ex.Message}。请退出后重试"); } //var _domain = StringHelper.GetUrlDomain(link); Uri uri = new Uri(bookLink); var _domain = $"{uri.Scheme}://{uri.Host}"; var nodes = doc.DocumentNode.SelectNodes("//div[@id='info']/p"); if (nodes == null || nodes.Count == 0) { throw new UserFriendlyException("解析网页异常,请重试"); } //章节目录 List <BookChapterDto.ChapterlistModel> chapterList = new List <BookChapterDto.ChapterlistModel>(); var chapters = doc.DocumentNode.SelectNodes("//div[@id='list']/dl/dd/a"); foreach (var item in chapters) { chapterList.Add(new BookChapterDto.ChapterlistModel() { ChapterName = item.InnerText, ChapterLink = _domain + item.Attributes["href"].Value.Trim() }); } //书本信息 var bookChapter = new BookChapterDto() { BookName = doc.DocumentNode.SelectSingleNode("//div[@id='info']/h1").InnerText.Trim(), BookLink = bookLink, Author = nodes[0].InnerText.Replace(nodes[0].InnerText.Split(':')[0] + ":", string.Empty).Trim(), Status = nodes[1].InnerText.Replace(nodes[1].InnerText.Split(':')[0] + ":", string.Empty).Replace(",加入书架,直达底部", string.Empty), Last_Update_Time = nodes[2].InnerText.Replace(nodes[2].InnerText.Split(':')[0] + ":", string.Empty), Last_Update_ChapterName = nodes[3].InnerText.Replace(nodes[3].InnerText.Split(':')[0] + ":", string.Empty).Trim(), Last_Update_ChapterLink = _domain + nodes[3].ChildNodes["a"].Attributes["href"].Value.Trim(), Intro = doc.DocumentNode.SelectSingleNode("//div[@id='intro']").InnerText.Replace(" ", "").Trim(), Chapterlist = chapterList }; return(bookChapter); }