Esempio n. 1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="els">原素</param>
        /// <param name="url">当前采集路径</param>
        /// <param name="baseUri">基url</param>
        /// <returns></returns>
        public AbstractBookInfo GetBookInfoByHtmlNode(IEnumerable <HtmlAgilityPack.HtmlNode> els, string url, Uri baseUri)
        {
            BookInfo86zw_com bookinfo = new BookInfo86zw_com();

            if (els.ElementAt(1).Name == "li" && els.Count() >= 5)
            {
                //<li class="li1"><a href="/Book/LN/20.aspx">东方玄幻</a></li>
                //<li class="li2"><a href="/Html/Book/0/252/Index.shtml"><font color="#006699">[目录]</font></a>&nbsp;<a href="/Book/252/Index.aspx"><font color="#006699">蜀山乱</font></a></li>
                //<li class="li3"><a href="/Html/Book/0/252/49702.shtml">正文 第五卷 第十九章 金光擒矮子 符咒定金石</a></li>
                //<li class="li4">5月3日</li>
                //<li class="li5"><a href="/Author/WB/252.aspx">云飞洛晚</a></li>
                //<li class="li6"><font color=blue>连载</font></li>

                //当前数据实体的类型,用于反射
                Type bookInfoType = bookinfo.GetType();
                //填充  类别 小说名称 最新章节 作者 字数 更新 状态
                bookinfo.类别   = els.ElementAt(0).InnerText;
                bookinfo.最新章节 = els.ElementAt(2).InnerText;
                bookinfo.作者   = els.ElementAt(4).InnerText;
                //bookinfo.更新 = els.ElementAt(5).InnerText;
                bookinfo.状态    = els.ElementAt(5).InnerText;
                bookinfo.采集URL = url;


                try
                {
                    bookinfo.小说简介URL = els.ElementAt(1).ChildNodes.Where(p => p.Name == "a").ElementAt(1).Attributes["href"].Value.ToString();
                    bookinfo.小说简介URL = new Uri(baseUri, bookinfo.小说简介URL).ToString();
                }
                catch (Exception ex) { System.Diagnostics.Debug.WriteLine(DateTime.Now + ex.Message + "|||||" + ex.StackTrace); }

                if (els.ElementAt(1).ChildNodes.Count >= 2)
                {
                    try
                    {
                        bookinfo.小说目录URL = els.ElementAt(1).ChildNodes.Where(p => p.Name == "a").ElementAt(0).Attributes["href"].Value.ToString();
                        bookinfo.小说目录URL = new Uri(baseUri, bookinfo.小说目录URL).ToString();
                    }
                    catch (Exception ex) { System.Diagnostics.Debug.WriteLine(DateTime.Now + ex.Message + "|||||" + ex.StackTrace); }
                    try
                    {
                        bookinfo.小说名称 = els.ElementAt(1).ChildNodes.Where(p => p.Name == "a").ElementAt(1).InnerText;
                    }
                    catch (Exception ex) { System.Diagnostics.Debug.WriteLine(DateTime.Now + ex.Message + "|||||" + ex.StackTrace); }
                }
            }
            return(bookinfo);
        }
Esempio n. 2
0
    void 获取中文更新()
    {
        //书名集合
        List <string> BookNames = new List <string>();

        HtmlAgilityPack.HtmlDocument dom = new HtmlAgilityPack.HtmlDocument();
        // dom.LoadHtml("http://www.xkzw.org/xkph_2.htm".GetWeb());
        dom.LoadHtml(listdiv.InnerHtml);

        //可能的原素
        List <PossiblyResultElement> possiblyResultElements = new List <PossiblyResultElement>();


        //开始循环子原素
        SingleListPageAnalyse.AnalyseMaxATagNearest(dom.DocumentNode, possiblyResultElements, 0, new PossiblyResultElement()
        {
            ParentPossiblyResult = null,
            CurrnetHtmlElement   = dom.DocumentNode,
            LayerIndex           = -1,
            ContainTagNum        = 0
        });

        //移除第一个 ul 原素
        var removeitem = from title in possiblyResultElements
                         where title.CurrnetHtmlElement.Name == "li"
                         select title;

        //移除
        possiblyResultElements.Remove(removeitem.ElementAt(0));


        //计算当前所有HTML原素中的tr原素
        var PageTrElements = from tr in possiblyResultElements
                             where tr.CurrnetHtmlElement.Name == "li"
                             select tr;

        List <Skybot.Collections.Sites.BookInfo86zw_com> list = new List <Skybot.Collections.Sites.BookInfo86zw_com>();

        using (TygModel.Entities tygdb = new TygModel.Entities())
        {
            var books = tygdb.书名表.ToLookup(p => p.书名.Replace("》", "").Replace("《", "").Trim() + "|" + p.作者名称);
            //填类
            foreach (var item in PageTrElements)
            {
                if (item.CurrnetHtmlElement.HasChildNodes)
                {
                    //span class="fl">[东方玄幻]</span>
                    //<span class="sm"><a href="/xkzw3226/" target="_blank">
                    //一等家丁</a></span>
                    //<span class="zj"><a href="/xkzw3226/5162956.html" title="第一五七三章 药水"
                    //target="_blank">第一五七三章 药水</a></span>
                    //<span class="zz">纯情犀利哥</span> <span class="zs">
                    //1608193</span>
                    //<span class="sj">2013-05-12</span> <span class="zt">连载</span>

                    var els = item.CurrnetHtmlElement.SelectNodes("span");

                    Skybot.Collections.Sites.BookInfo86zw_com bookITEM = new Skybot.Collections.Sites.BookInfo86zw_com();
                    bookITEM.类别      = els[0].InnerText.Replace("[", "").Replace("]", "").Trim();
                    bookITEM.小说名称    = els[1].Element("a").InnerText.Replace("\r\n", "").Trim();
                    bookITEM.小说目录URL = "http://www.xkzw.org/" + els[1].Element("a").Attributes["href"].Value;
                    bookITEM.最新章节    = els[2].InnerText;
                    bookITEM.作者      = els[3].InnerText;
                    bookITEM.更新      = DateTime.Now.ToString();
                    bookITEM.采集URL   = bookITEM.小说目录URL;
                    bookITEM.状态      = els[6].InnerText;
                    bookITEM.小说简介URL = null;
                    list.Add(bookITEM);
                }
            }



            //更新或者是添加书
            foreach (var item in list)
            {
                string key   = item.小说名称 + "|" + item.作者;
                var    query = tygdb.书名表.Where(p => p.书名.Replace("》", "").Replace("《", "").Trim() + "|" + p.作者名称 == key);
                if (query.Count() > 0)
                {
                    foreach (var bookItem in query)
                    {
                        bookItem.最后更新时间 = DateTime.Now;
                    }
                }
                else
                {
                    var bok = item.Convert();

                    //添加记录
                    Skybot.Cache.RecordsCacheManager.Instance.Tygdb.AddTo书名表(bok);
                }
            }

            tygdb.SaveChanges();

            tygdb.Connection.Close();
            tygdb.Dispose();
            Skybot.Cache.RecordsCacheManager.Instance.Tygdb.SaveChanges();
        }
    }