Exemplo n.º 1
0
        /// <summary>
        /// 根据关键词获取文章内容
        /// </summary>
        /// <param name="key"></param>
        /// <returns></returns>
        public string GetChapterContent(string key)
        {
            List <ChapterRule> cr = (List <ChapterRule>)Voodoo.IO.XML.DeSerialize(typeof(List <ChapterRule>), Voodoo.IO.File.Read(System.Environment.CurrentDirectory + "\\ChapterRule.xml"));
            BookInfoRule       bi = (BookInfoRule)Voodoo.IO.XML.DeSerialize(typeof(BookInfoRule), Voodoo.IO.File.Read(System.Environment.CurrentDirectory + "\\BookInfoRule.xml"));

            foreach (ChapterRule c in cr)
            {
                //从Google搜索
                string SearchUrl = string.Format("https://www.google.com/search?hl=zh-CN&newwindow=1&safe=strict&q=site%3A{0}+{1}&oq=site%3A{0}+{1}&aq=f&aqi=&aql=&gs_sm=12&gs_upl=6999l11935l0l12589l3l3l0l0l0l0l0l0ll1l0",
                                                 c.Domain,
                                                 key.toUtf8String()
                                                 );

                string ListContent = Voodoo.Net.Url.GetHtml(SearchUrl, "utf-8");
                Match  m_Url       = new Regex(bi.ChapterUrl, RegexOptions.None).Match(ListContent);
                if (m_Url.Success)
                {
                    string ContentUrl  = m_Url.Groups["url"].Value;
                    string ContentHtml = Voodoo.Net.Url.GetHtml(ContentUrl, c.CharSet);
                    //string ContentHtml = Voodoo.Net.Url.Post(new System.Collections.Specialized.NameValueCollection(),
                    //    ContentUrl,
                    //    Encoding.GetEncoding(c.CharSet),
                    //    new System.Net.CookieContainer(),
                    //    "*.*",
                    //    SearchUrl,
                    //    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.2 Safari/535.11");

                    Match m_Content = new Regex(c.Content, RegexOptions.None).Match(ContentHtml);
                    if (m_Content.Success)
                    {
                        string Content = m_Content.Groups["content"].Value;
                        if (Regex.IsMatch(Content, c.CheckSuccess))
                        {
                            return(Content);
                        }
                    }

                    //m_Url = m_Url.NextMatch();
                }
            }



            return("");
        }
Exemplo n.º 2
0
        private void Form2_Load(object sender, EventArgs e)
        {
            BookInfoRule bi = new BookInfoRule();

            bi.CharSet        = "UTF-8";
            bi.SearchRefer    = "http://sosu.qidian.com/searchresult.aspx?searchkey={0}&searchtype=%E4%B9%A6%E5%90%8D";
            bi.SearchPageUrl  = "http://sosu.qidian.com/ajax/search.ashx?method=getbooksearchlist&searchtype=%E4%B9%A6%E5%90%8D&searchkey={0}";
            bi.BookInfoUrl    = "http://www.qidian.com/Book/{0}.aspx";
            bi.BookInfoUrl    = "<div class=\"title\">[\\s\\S]*?<h1>(?<title>[\\s\\S]*?)</h1>[\\s]*?<b>小说作者:</b>[\\s\\S]*?\">(?<author>[\\s\\S]*?)</a>[\\s\\S]*?<b>总字数:</b>(?<charcount>[\\s\\S]*?)</td>[\\s\\S]*?<div class=\"txt\">(?<intro>[\\s\\S]*?)<span[\\s\\S]*?<b>小说类别:</b><a href=\"[\\s\\S]*?target=\"_blank\">(?<class>[\\s\\S]*?)</a>[\\s\\S]*?<b>写作进程:</b>(?<status>[\\s\\S]*?)</td>";
            bi.ChapterListUrl = "http://www.qidian.com/BookReader/{0}.aspx";
            bi.ChapterTitle   = "<li style='width:.*?%;'><a [\\s\\S]*?>(?<title>.*?)</a>";
            bi.GoogleDomain   = "http://www.google.com/";
            bi.GoogleCharSet  = "UTF-8";
            bi.ChapterUrl     = "<h3 class=\"r\"><a href=\"(?<url>.*?)\" [\\s\\S]*?>[\\s\\S]*?</a></h3>";
            bi.TargetSite     = "http://localhost/";

            bi.mDomain        = "http://www.shouda8.com/";
            bi.mSearchPageUrl = "http://www.shouda8.com/EBook/index.aspx";
            bi.mSearchPar     = "searchkey={0}&SearchClass=1";
            bi.mUrl           = "<td><a target=\"_blank\" href=\"(?<url>.*?)\" class=\"bookname\">[\\s\\S]*?</a> <span class=\"booksort\">";
            bi.mChapter       = "<div class=\"chapter_list_chapter\"><a href=\"(?<url>.*?)\" [\\s\\S]*?>(?<title>.*?)</a></div>";
            bi.mContent       = "<div id=\"chapter_content\">(?<content>[\\s\\S]*?)</div>";
            bi.mCharSet       = "gb2312";

            Voodoo.IO.XML.SaveSerialize(bi, "C:\\BookInfoRule.xml");


            //List<ChapterRule> cr = new List<ChapterRule>();

            //cr.Add(new ChapterRule() {
            //    SiteName="手打吧",
            //    Domain = "shouda8.com",
            //    CharSet = "gb2312",
            //    Content = "<div id=\"chapter_content\">(?<content>[\\s\\S]*?)</div>",
            //    CheckSuccess="[\\s\\S]{1000,}?"
            //});

            //cr.Add(new ChapterRule()
            //{
            //    SiteName = "75小说",
            //    Domain = "75dr.com",
            //    CharSet = "gbk",
            //    Content = "<td id=\"table_container\">(?<content>[\\s\\S]*?)</td>",
            //    CheckSuccess = "[\\s\\S]{1000,}?"
            //});

            //cr.Add(new ChapterRule()
            //{
            //    SiteName = "35小说网",
            //    Domain = "xiaoshuo555.cn",
            //    CharSet = "gbk",
            //    Content = "<div id=\"content\" align=center>(?<content>[\\s\\S]*?)</div>",
            //    CheckSuccess = "[\\s\\S]{1000,}?"
            //});

            //cr.Add(new ChapterRule()
            //{
            //    SiteName = "思路中文网",
            //    Domain = "cilook.cn",
            //    CharSet = "gb2312",
            //    Content = "<div align=\"left\" id=\"content\">(?<content>[\\s\\S]*?)</div>",
            //    CheckSuccess = "[\\s\\S]{1000,}?"
            //});


            //cr.Add(new ChapterRule()
            //{
            //    SiteName = "五五文学",
            //    Domain = "55wx.com",
            //    CharSet = "gb2312",
            //    Content = "<table width=\"100%\"  border=\"0\" align=\"center\" cellpadding=\"0\" cellspacing=\"0\" class=\"text\" id=\"fontsize\" style=\"font-size:14px;\">[\\s\\S]*?<tr>[\\s\\S]*?<td>(?<content>[\\s\\S]*?)</td>",
            //    CheckSuccess = "[\\s\\S]{1000,}?"
            //});

            //cr.Add(new ChapterRule()
            //{
            //    SiteName = "思路中文网",
            //    Domain = "slzww.com",
            //    CharSet = "gbk",
            //    Content = "<div id=\"content\">(?<key>[\\s\\S]*?)</div>",
            //    CheckSuccess = "[\\s\\S]{1000,}?"
            //});

            //Voodoo.IO.XML.SaveSerialize(cr, "C:\\ChapterRule.xml");
        }