/// <summary> /// /// </summary> /// <param name="els">原素</param> /// <param name="url">当前采集路径</param> /// <param name="baseUri">基url</param> /// <returns></returns> public AbstractBookInfo GetBookInfoByHtmlNode(IEnumerable <HtmlAgilityPack.HtmlNode> els, string url, Uri baseUri) { BookInfo86zw_com bookinfo = new BookInfo86zw_com(); if (els.ElementAt(1).Name == "li" && els.Count() >= 5) { //<li class="li1"><a href="/Book/LN/20.aspx">东方玄幻</a></li> //<li class="li2"><a href="/Html/Book/0/252/Index.shtml"><font color="#006699">[目录]</font></a> <a href="/Book/252/Index.aspx"><font color="#006699">蜀山乱</font></a></li> //<li class="li3"><a href="/Html/Book/0/252/49702.shtml">正文 第五卷 第十九章 金光擒矮子 符咒定金石</a></li> //<li class="li4">5月3日</li> //<li class="li5"><a href="/Author/WB/252.aspx">云飞洛晚</a></li> //<li class="li6"><font color=blue>连载</font></li> //当前数据实体的类型,用于反射 Type bookInfoType = bookinfo.GetType(); //填充 类别 小说名称 最新章节 作者 字数 更新 状态 bookinfo.类别 = els.ElementAt(0).InnerText; bookinfo.最新章节 = els.ElementAt(2).InnerText; bookinfo.作者 = els.ElementAt(4).InnerText; //bookinfo.更新 = els.ElementAt(5).InnerText; bookinfo.状态 = els.ElementAt(5).InnerText; bookinfo.采集URL = url; try { bookinfo.小说简介URL = els.ElementAt(1).ChildNodes.Where(p => p.Name == "a").ElementAt(1).Attributes["href"].Value.ToString(); bookinfo.小说简介URL = new Uri(baseUri, bookinfo.小说简介URL).ToString(); } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(DateTime.Now + ex.Message + "|||||" + ex.StackTrace); } if (els.ElementAt(1).ChildNodes.Count >= 2) { try { bookinfo.小说目录URL = els.ElementAt(1).ChildNodes.Where(p => p.Name == "a").ElementAt(0).Attributes["href"].Value.ToString(); bookinfo.小说目录URL = new Uri(baseUri, bookinfo.小说目录URL).ToString(); } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(DateTime.Now + ex.Message + "|||||" + ex.StackTrace); } try { bookinfo.小说名称 = els.ElementAt(1).ChildNodes.Where(p => p.Name == "a").ElementAt(1).InnerText; } catch (Exception ex) { System.Diagnostics.Debug.WriteLine(DateTime.Now + ex.Message + "|||||" + ex.StackTrace); } } } return(bookinfo); }
void 获取中文更新() { //书名集合 List <string> BookNames = new List <string>(); HtmlAgilityPack.HtmlDocument dom = new HtmlAgilityPack.HtmlDocument(); // dom.LoadHtml("http://www.xkzw.org/xkph_2.htm".GetWeb()); dom.LoadHtml(listdiv.InnerHtml); //可能的原素 List <PossiblyResultElement> possiblyResultElements = new List <PossiblyResultElement>(); //开始循环子原素 SingleListPageAnalyse.AnalyseMaxATagNearest(dom.DocumentNode, possiblyResultElements, 0, new PossiblyResultElement() { ParentPossiblyResult = null, CurrnetHtmlElement = dom.DocumentNode, LayerIndex = -1, ContainTagNum = 0 }); //移除第一个 ul 原素 var removeitem = from title in possiblyResultElements where title.CurrnetHtmlElement.Name == "li" select title; //移除 possiblyResultElements.Remove(removeitem.ElementAt(0)); //计算当前所有HTML原素中的tr原素 var PageTrElements = from tr in possiblyResultElements where tr.CurrnetHtmlElement.Name == "li" select tr; List <Skybot.Collections.Sites.BookInfo86zw_com> list = new List <Skybot.Collections.Sites.BookInfo86zw_com>(); using (TygModel.Entities tygdb = new TygModel.Entities()) { var books = tygdb.书名表.ToLookup(p => p.书名.Replace("》", "").Replace("《", "").Trim() + "|" + p.作者名称); //填类 foreach (var item in PageTrElements) { if (item.CurrnetHtmlElement.HasChildNodes) { //span class="fl">[东方玄幻]</span> //<span class="sm"><a href="/xkzw3226/" target="_blank"> //一等家丁</a></span> //<span class="zj"><a href="/xkzw3226/5162956.html" title="第一五七三章 药水" //target="_blank">第一五七三章 药水</a></span> //<span class="zz">纯情犀利哥</span> <span class="zs"> //1608193</span> //<span class="sj">2013-05-12</span> <span class="zt">连载</span> var els = item.CurrnetHtmlElement.SelectNodes("span"); Skybot.Collections.Sites.BookInfo86zw_com bookITEM = new Skybot.Collections.Sites.BookInfo86zw_com(); bookITEM.类别 = els[0].InnerText.Replace("[", "").Replace("]", "").Trim(); bookITEM.小说名称 = els[1].Element("a").InnerText.Replace("\r\n", "").Trim(); bookITEM.小说目录URL = "http://www.xkzw.org/" + els[1].Element("a").Attributes["href"].Value; bookITEM.最新章节 = els[2].InnerText; bookITEM.作者 = els[3].InnerText; bookITEM.更新 = DateTime.Now.ToString(); bookITEM.采集URL = bookITEM.小说目录URL; bookITEM.状态 = els[6].InnerText; bookITEM.小说简介URL = null; list.Add(bookITEM); } } //更新或者是添加书 foreach (var item in list) { string key = item.小说名称 + "|" + item.作者; var query = tygdb.书名表.Where(p => p.书名.Replace("》", "").Replace("《", "").Trim() + "|" + p.作者名称 == key); if (query.Count() > 0) { foreach (var bookItem in query) { bookItem.最后更新时间 = DateTime.Now; } } else { var bok = item.Convert(); //添加记录 Skybot.Cache.RecordsCacheManager.Instance.Tygdb.AddTo书名表(bok); } } tygdb.SaveChanges(); tygdb.Connection.Close(); tygdb.Dispose(); Skybot.Cache.RecordsCacheManager.Instance.Tygdb.SaveChanges(); } }