예제 #1
0
    protected void Page_Load(object sender, EventArgs e)
    {
        // --  --update  [Tyg].[dbo].[书名表] set 采集用的URL1='http://www.86zw.com/Html/Book/32/32600/Index.shtml'  where [书名]='永生'
        //书名集合
        List <string> BookNames = new List <string>();

        HtmlAgilityPack.HtmlDocument dom = new HtmlAgilityPack.HtmlDocument();
        dom.LoadHtml("http://top.baidu.com/buzz.php?p=book".GetWeb());


        //可能的原素
        List <PossiblyResultElement> possiblyResultElements = new List <PossiblyResultElement>();


        //开始循环子原素
        SingleListPageAnalyse.AnalyseMaxATagNearest(dom.DocumentNode, possiblyResultElements, 0, new PossiblyResultElement()
        {
            ParentPossiblyResult = null,
            CurrnetHtmlElement   = dom.DocumentNode,
            LayerIndex           = -1,
            ContainTagNum        = 0
        });


        //计算当前所有HTML原素中的tr原素
        var PageTrElements = from tr in possiblyResultElements
                             where tr.CurrnetHtmlElement.Name == "tr"
                             select tr;

        //填类
        foreach (var item in PageTrElements)
        {
            if (item.CurrnetHtmlElement.HasChildNodes)
            {
                var els = item.CurrnetHtmlElement.ChildNodes.Where(p => p.HasChildNodes);

                double x;
                if (els.ElementAt(0).Name == "th" && double.TryParse(els.ElementAt(0).InnerText, out x))
                {
                    BookNames.Add(els.ElementAt(1).InnerText);
                }
            }
        }


        TygModel.Entities tntity = new TygModel.Entities();
        tntity.CommandTimeout = 60 * 100;
        //找到前50的小说并对数据库记录进行更新
        //更新所有小说的书名
        foreach (var k in tntity.书名表)
        {
            if (BookNames.Contains(k.书名.Trim()))
            {
                k.最后更新时间 = DateTime.Now;
            }
            //《时空玄仙》
            // k.书名 = k.书名.Replace("》", "").Replace("《", "");
        }

        //提交更新
        tntity.SaveChanges();
        tntity.Dispose();
    }
예제 #2
0
    void 获取中文更新()
    {
        //书名集合
        List <string> BookNames = new List <string>();

        HtmlAgilityPack.HtmlDocument dom = new HtmlAgilityPack.HtmlDocument();
        // dom.LoadHtml("http://www.xkzw.org/xkph_2.htm".GetWeb());
        dom.LoadHtml(listdiv.InnerHtml);

        //可能的原素
        List <PossiblyResultElement> possiblyResultElements = new List <PossiblyResultElement>();


        //开始循环子原素
        SingleListPageAnalyse.AnalyseMaxATagNearest(dom.DocumentNode, possiblyResultElements, 0, new PossiblyResultElement()
        {
            ParentPossiblyResult = null,
            CurrnetHtmlElement   = dom.DocumentNode,
            LayerIndex           = -1,
            ContainTagNum        = 0
        });

        //移除第一个 ul 原素
        var removeitem = from title in possiblyResultElements
                         where title.CurrnetHtmlElement.Name == "li"
                         select title;

        //移除
        possiblyResultElements.Remove(removeitem.ElementAt(0));


        //计算当前所有HTML原素中的tr原素
        var PageTrElements = from tr in possiblyResultElements
                             where tr.CurrnetHtmlElement.Name == "li"
                             select tr;

        List <Skybot.Collections.Sites.BookInfo86zw_com> list = new List <Skybot.Collections.Sites.BookInfo86zw_com>();

        using (TygModel.Entities tygdb = new TygModel.Entities())
        {
            var books = tygdb.书名表.ToLookup(p => p.书名.Replace("》", "").Replace("《", "").Trim() + "|" + p.作者名称);
            //填类
            foreach (var item in PageTrElements)
            {
                if (item.CurrnetHtmlElement.HasChildNodes)
                {
                    //span class="fl">[东方玄幻]</span>
                    //<span class="sm"><a href="/xkzw3226/" target="_blank">
                    //一等家丁</a></span>
                    //<span class="zj"><a href="/xkzw3226/5162956.html" title="第一五七三章 药水"
                    //target="_blank">第一五七三章 药水</a></span>
                    //<span class="zz">纯情犀利哥</span> <span class="zs">
                    //1608193</span>
                    //<span class="sj">2013-05-12</span> <span class="zt">连载</span>

                    var els = item.CurrnetHtmlElement.SelectNodes("span");

                    Skybot.Collections.Sites.BookInfo86zw_com bookITEM = new Skybot.Collections.Sites.BookInfo86zw_com();
                    bookITEM.类别      = els[0].InnerText.Replace("[", "").Replace("]", "").Trim();
                    bookITEM.小说名称    = els[1].Element("a").InnerText.Replace("\r\n", "").Trim();
                    bookITEM.小说目录URL = "http://www.xkzw.org/" + els[1].Element("a").Attributes["href"].Value;
                    bookITEM.最新章节    = els[2].InnerText;
                    bookITEM.作者      = els[3].InnerText;
                    bookITEM.更新      = DateTime.Now.ToString();
                    bookITEM.采集URL   = bookITEM.小说目录URL;
                    bookITEM.状态      = els[6].InnerText;
                    bookITEM.小说简介URL = null;
                    list.Add(bookITEM);
                }
            }



            //更新或者是添加书
            foreach (var item in list)
            {
                string key   = item.小说名称 + "|" + item.作者;
                var    query = tygdb.书名表.Where(p => p.书名.Replace("》", "").Replace("《", "").Trim() + "|" + p.作者名称 == key);
                if (query.Count() > 0)
                {
                    foreach (var bookItem in query)
                    {
                        bookItem.最后更新时间 = DateTime.Now;
                    }
                }
                else
                {
                    var bok = item.Convert();

                    //添加记录
                    Skybot.Cache.RecordsCacheManager.Instance.Tygdb.AddTo书名表(bok);
                }
            }

            tygdb.SaveChanges();

            tygdb.Connection.Close();
            tygdb.Dispose();
            Skybot.Cache.RecordsCacheManager.Instance.Tygdb.SaveChanges();
        }
    }