/// <summary> /// 书简介 /// </summary> /// <param name="doc"></param> public void AnalyContent(HtmlDocument doc) { EBookInfo bi = _bookDetailData.DouBanBookInfo; string summery = ""; try { var hiddenNode = doc.DocumentNode.SelectSingleNode("//div[@class='related_info']//span[@class='all hidden']//div[@class='intro']"); if (hiddenNode != null) { summery = hiddenNode.InnerHtml.Trim(); } else { var shortNode = doc.DocumentNode.SelectSingleNode("//div[@class='related_info']//div[@class='intro']"); if (shortNode != null) { summery = shortNode.InnerHtml.Trim(); } } bi.Summery = summery; } catch (Exception ex) { NLogUtil.ErrorTxt("BookDetailCrawler AnalyContent:" + ex.Message); } }
/// <summary> /// 处理 Span嵌套Span /// </summary> /// <param name="node"></param> /// <param name="bi"></param> private void AnalyInfo_SpanChild(HtmlNode node) { EBookInfo bi = _bookDetailData.DouBanBookInfo; EPerson author = _bookDetailData.Author; string name = node.InnerText.Trim(); HtmlNode ci = node.NextSibling.NextSibling;; string url = ci.Attributes["href"].Value; if (!url.StartsWith(DouBanBookPrefix)) { url = DouBanBookPrefix + url; } switch (name) { case "作者:": case "作者": var authorName = ci.InnerText.Trim(); if (authorName.StartsWith("[")) { var ep = authorName.IndexOf("]"); author.Country = authorName.Substring(1, ep - 1); author.Name = authorName.Substring(ep + 1); } if (authorName.StartsWith("<")) { var ep = authorName.IndexOf(">"); author.Country = authorName.Substring(1, ep - 1); author.Name = authorName.Substring(ep + 1); } else if (authorName.StartsWith("(")) { var ep = authorName.IndexOf(")"); author.Country = authorName.Substring(1, ep - 1); author.Name = authorName.Substring(ep + 1); } else if (authorName.StartsWith("【")) { var ep = authorName.IndexOf("】"); author.Country = authorName.Substring(1, ep - 1); author.Name = authorName.Substring(ep + 1); } else { author.Name = authorName; author.Country = "中国"; } author.Code = GenCodeHelper.Person_Code(author.Name.Trim()); bi.AuthorCode = author.Code; author.SourceUrl = url; break; case "译者:": case "译者": bi.Translater = ci.InnerText; bi.TranslaterUrl = url; break; case "丛书": case "丛书:": bi.Series = ci.InnerText; bi.SeriesUrl = url; break; case "出品方": case "出品方:": bi.Producer = ci.InnerText; bi.ProducerUrl = url; break; } }
private void AnalyInfo(HtmlNode node) { EBookInfo bi = _bookDetailData.DouBanBookInfo; string name = node.InnerText.Trim(); string infoValue = node.NextSibling.InnerText.Trim(); HtmlNode cNode = node.SelectSingleNode(".//span"); if (cNode != null) { AnalyInfo_SpanChild(cNode); } else { switch (name) { case "出版社:": bi.Publisher = infoValue; break; case "出品方:": AnalyInfo_SpanChild(node); // bi.Producer = infoValue; break; case "副标题:": bi.SubTitle = infoValue; break; case "原作名:": bi.OrigTitle = infoValue; break; case "出版年:": bi.PublishDate = infoValue; break; case "页数:": bi.PageCount = infoValue; break; case "定价:": bi.Pricing = infoValue; break; case "装帧:": bi.Makeup = infoValue; break; case "丛书:": AnalyInfo_SpanChild(node); break; case "ISBN:": bi.ISBN = infoValue; break; case "作者:": AnalyInfo_SpanChild(node); break; } } }