Пример #1
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)//实现漫画目录图片枚举
        {
            Regex           regex;
            BasicComicInfo  basic;
            CategoryCollect category   = new CategoryCollect();
            var             comicQueue = new Queue <BasicComicInfo>();
            HtmlDocument    document   = new HtmlDocument();

            document.LoadHtml(cateGoryStr);
            HtmlNode           node    = document.DocumentNode.SelectSingleNode("//div[@class='cComicList']");
            HtmlNodeCollection collect = node.SelectNodes("./li/a");

            foreach (var i in collect)
            {
                basic             = new BasicComicInfo();
                basic.ComicHref   = hostAttach + i.Attributes["href"].Value;
                basic.ComicName   = i.Attributes["title"].Value;
                basic.ComicImgUrl = i.SelectSingleNode("./img").Attributes["src"].Value;
                comicQueue.Enqueue(basic);
            }

            regex = new Regex(@"href='(?<href>[\w:/.]*)'>\s*下一页");//还可以修改,暂时不动
            category.NextPageUrl     = hostAttach + regex.Match(cateGoryStr).Groups["href"].Value;
            category.ComicQueue      = comicQueue;
            category.PagesCollection = new Dictionary <string, string>();
            return(category);
        }
Пример #2
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)//通过漫画分类获取漫画
        {
            var basicInfo    = new BasicComicInfo();
            var comicCollect = new CategoryCollect();
            var bookList     = AnalyseTool.GetTag(cateGoryStr, @"<div class=""book-list"">", "</div>");

            comicCollect.Count = 0;

            Regex regex      = new Regex(@"href=""(?<href>[/\w]*)""\stitle=""(?<title>[\w\&!!,s]*)""><img\s*(data-src|src)=""(?<url>[:\w./]*)""");
            var   comicQueue = new Queue <BasicComicInfo>();

            foreach (Match i in regex.Matches(bookList))
            {
                basicInfo             = new BasicComicInfo();
                basicInfo.ComicHref   = host + i.Groups["href"].Value;
                basicInfo.ComicName   = i.Groups["title"].Value;
                basicInfo.ComicImgUrl = i.Groups["url"].Value;
                comicQueue.Enqueue(basicInfo);
            }


            var pageInfo = AnalyseTool.GetTag(cateGoryStr, @"<div class=""pager-cont"">", "</div>");

            regex = new Regex(@"href=""(?<href>[/\w._]*)""\s*\w+=""[\w-:;]*"">下一页");
            comicCollect.NextPageUrl = homePage + regex.Match(cateGoryStr).Groups["href"].Value;
            comicCollect.Count       = comicQueue.Count;
            comicCollect.ComicQueue  = comicQueue;
            return(comicCollect);
        }
Пример #3
0
        //需要修改路径和标签
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            HtmlDocument           doc;
            HtmlNode               node;
            BasicComicInfo         basicInfo;
            HtmlNodeCollection     collect;
            CategoryCollect        cateCollect;
            Queue <BasicComicInfo> comicQueue;

            basicInfo   = null;
            doc         = new HtmlDocument();
            cateCollect = new CategoryCollect();
            comicQueue  = new Queue <BasicComicInfo>();
            doc.LoadHtml(cateGoryStr);

            collect = doc.DocumentNode.SelectNodes("//ul[@class='liemh htmls indliemh']/li");

            foreach (HtmlNode temp in collect)
            {
                basicInfo             = new BasicComicInfo();
                node                  = temp.SelectSingleNode("./a");
                basicInfo.ComicName   = node.Attributes["title"].Value;
                basicInfo.ComicHref   = node.Attributes["href"].Value;
                basicInfo.ComicImgUrl = temp.SelectSingleNode("./a/img").Attributes["src"].Value;
                comicQueue.Enqueue(basicInfo);
            }

            node = doc.DocumentNode.SelectSingleNode("//div[@class='pagination-wrapper']");
            cateCollect.NextPageUrl = hostName + node.SelectNodes("./a[@class='next']")[0].Attributes["href"].Value;
            cateCollect.Count       = comicQueue.Count;
            cateCollect.ComicQueue  = comicQueue;
            return(cateCollect);
        }
Пример #4
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            string         tempStr = "";
            HtmlDocument   doc;
            HtmlNode       node;
            BasicComicInfo basic;
            Dictionary <string, string> dict;
            Queue <BasicComicInfo>      queue;
            CategoryCollect             collectInfo;

            queue       = new Queue <BasicComicInfo>();
            collectInfo = new CategoryCollect();
            doc         = new HtmlDocument();
            doc.LoadHtml(cateGoryStr);
            HtmlNodeCollection collect = doc.DocumentNode.SelectNodes("//div[@class='box-body']/ul[@class='mh-list col7']/li/div[@class='mh-item']");


            foreach (HtmlNode temp in collect)
            {
                basic             = new BasicComicInfo();
                node              = temp.SelectSingleNode("./div[@class='mh-item-detali']/h2[@class='title']/a");
                basic.ComicName   = node.Attributes["title"].Value;
                basic.ComicHref   = hostAttach + node.Attributes["href"].Value;
                node              = temp.SelectSingleNode("./p[@class='mh-cover']");
                tempStr           = node.Attributes["style"].Value;
                basic.ComicImgUrl = tempStr.Substring(tempStr.IndexOf("(") + 1, tempStr.IndexOf(")") - tempStr.IndexOf("(") - 1);
                queue.Enqueue(basic);
            }

            node    = doc.DocumentNode.SelectSingleNode("//div[@class='page-pagination pull-right mt20']");
            collect = node.SelectNodes("./ul/li/a");
            dict    = new Dictionary <string, string>();
            var count = 0;
            var index = 0;

            foreach (HtmlNode temp in collect)
            {
                count++;
                tempStr = temp.Attributes["data-index"].Value;

                if (dict.ContainsKey(tempStr) == false)
                {
                    dict.Add(tempStr, hostAttach + temp.Attributes["href"].Value);
                }

                if (temp.Attributes["class"] != null && temp.Attributes["class"].Value == "active")
                {
                    index = count;
                }
            }

            collectInfo.NextPageUrl     = dict[(index + 1).ToString()];
            collectInfo.ComicQueue      = queue;
            collectInfo.Count           = queue.Count;
            collectInfo.PagesCollection = dict;
            return(collectInfo);
        }
Пример #5
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            HtmlDocument           doc;
            HtmlNode               node;
            BasicComicInfo         basicInfo;
            HtmlNodeCollection     collect;
            CategoryCollect        cateCollect;
            Queue <BasicComicInfo> comicQueue;

            basicInfo   = null;
            doc         = new HtmlDocument();
            cateCollect = new CategoryCollect();
            comicQueue  = new Queue <BasicComicInfo>();
            doc.LoadHtml(cateGoryStr);

            collect = doc.DocumentNode.SelectNodes("//div[@class='recommendedpic allfloatleft']");
            if (collect == null)
            {
                collect = doc.DocumentNode.SelectNodes("//div[@class='recommendedpicl center']");
            }

            foreach (HtmlNode temp in collect)
            {
                basicInfo             = new BasicComicInfo();
                node                  = temp.SelectSingleNode("./a");
                basicInfo.ComicName   = node.Attributes["title"].Value;
                basicInfo.ComicHref   = node.Attributes["href"].Value;
                basicInfo.ComicImgUrl = hostName + temp.SelectSingleNode("./a/img").Attributes["src"].Value;
                comicQueue.Enqueue(basicInfo);
            }

            string   href     = "";
            HtmlNode hrefNode = doc.DocumentNode.SelectSingleNode("//div[@class='gray reminderguild']");

            Regex regex     = new Regex(@"href=""(?<href>[\w_\.]*)""\s*title=""下一页""");
            Regex regexhref = new Regex(@"href=""(?<href>[\w\-\./]*)""\s*target=""_self"">全部");

            if (hrefNode != null)
            {
                href = regexhref.Match(hrefNode.OuterHtml).Groups["href"].Value;
                href = href.Substring(0, href.LastIndexOf("/")) + "/";
                cateCollect.NextPageUrl = hostName + href + regex.Match(cateGoryStr).Groups["href"].Value;
            }
            else
            {
                cateCollect.NextPageUrl = "http://www.dmzx.com/zuixin/";
            }

            cateCollect.Count      = comicQueue.Count;
            cateCollect.ComicQueue = comicQueue;
            return(cateCollect);
        }
Пример #6
0
        public override CategoryCollect FindComicByCategory(string response)
        {
            HtmlDocument                doc;
            CategoryCollect             cateInfo;
            Queue <BasicComicInfo>      queue;
            Dictionary <string, string> pageDict;

            doc      = new HtmlDocument();
            cateInfo = new CategoryCollect();
            queue    = new Queue <BasicComicInfo>();
            doc.LoadHtml(response);
            BasicComicInfo     basicInfo  = null;
            HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//ul[@class='grid-row clearfix']/li | //ul[@class='grid-row clearfix first']/li");

            foreach (HtmlNode node in collection)
            {
                basicInfo             = new BasicComicInfo();
                basicInfo.ComicName   = node.SelectSingleNode("./p/a").InnerText;
                basicInfo.ComicHref   = node.SelectSingleNode("./p/a").Attributes["href"].Value;
                basicInfo.ComicImgUrl = node.SelectSingleNode("./a/img").Attributes["src"].Value;
                queue.Enqueue(basicInfo);
            }

            pageDict   = new Dictionary <string, string>();
            collection = doc.DocumentNode.SelectNodes("//ul[@class='pagination']/li/a");

            foreach (HtmlNode node in collection)
            {
                if (pageDict.ContainsKey(node.InnerText) == false)
                {
                    pageDict.Add(node.InnerText, hostName + node.Attributes["href"].Value);
                }
            }

            var temp        = doc.DocumentNode.SelectSingleNode("//div[@class='head']/span").InnerText;
            var currentPage = Convert.ToInt32(temp.Substring(0, temp.IndexOf("/")));

            cateInfo.ComicQueue      = queue;
            cateInfo.Count           = queue.Count;
            cateInfo.NextPageUrl     = pageDict[(currentPage + 1).ToString()];
            cateInfo.PagesCollection = pageDict;
            return(cateInfo);
        }
Пример #7
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            HtmlDocument           doc;
            HtmlNode               node;
            HtmlNodeCollection     nodeCollect;
            CategoryCollect        collect;
            BasicComicInfo         basicInfo = null;
            Queue <BasicComicInfo> queue;

            queue   = new Queue <BasicComicInfo>();
            collect = new CategoryCollect();
            doc     = new HtmlDocument();

            doc.LoadHtml(cateGoryStr);
            nodeCollect = doc.DocumentNode.SelectNodes("//div[@class='nag cf']/div/div/a");

            foreach (HtmlNode temp in nodeCollect)
            {
                node                  = temp.SelectSingleNode("./span/img");
                basicInfo             = new BasicComicInfo();
                basicInfo.ComicHref   = temp.Attributes["href"].Value;
                basicInfo.ComicName   = temp.Attributes["title"].Value;
                basicInfo.ComicImgUrl = node.Attributes["src"].Value;
                queue.Enqueue(basicInfo);
            }

            collect.ComicQueue = queue;
            collect.Count      = queue.Count;
            node = doc.DocumentNode.SelectSingleNode("//a[@class='nextpostslink']");
            if (node != null)
            {
                collect.NextPageUrl = node.Attributes["href"].Value;
            }

            return(collect);
        }
Пример #8
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            CategoryCollect cateCollect = new CategoryCollect();
            var             dat         = AnalyseTool.GetTag(cateGoryStr, @"<div class=""cComicList""", @"<div class=""cComicPageChange2");
            Regex           regex       = new Regex(@"href=['""](?<href>[/\w-]*)['""]\s*class=['""][\w_]*['""]\stitle=['""](?<title>[\w-!!\s,。]*)['""]><img\salt=['""][\w-!,。\s,]*['""]\s*src=['""](?<url>[\w:/.-]*)");
            var             basicInfo   = new BasicComicInfo();
            var             comicQueue  = new Queue <BasicComicInfo>();

            foreach (Match i in regex.Matches(dat))
            {
                basicInfo             = new BasicComicInfo();
                basicInfo.ComicHref   = hostName + i.Groups["href"].Value;
                basicInfo.ComicName   = i.Groups["title"].Value;
                basicInfo.ComicImgUrl = i.Groups["url"].Value;
                comicQueue.Enqueue(basicInfo);
            }

            dat   = AnalyseTool.GetTag(cateGoryStr, "<span class='cPageChangeLink'>", "</span>");
            regex = new Regex(@"<a\s*href='(?<url>[\w-/]*)'>下一页");
            cateCollect.NextPageUrl     = hostName + regex.Match(dat).Groups["url"].Value;
            cateCollect.ComicQueue      = comicQueue;
            cateCollect.PagesCollection = new Dictionary <string, string>();
            return(cateCollect);
        }
Пример #9
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            CategoryCollect        cateCollect = new CategoryCollect();
            Queue <BasicComicInfo> queue       = new Queue <BasicComicInfo>();
            HtmlNode mainNode = GetMainNode(cateGoryStr);

            cateCollect.ComicQueue = queue;
            HtmlNode node = mainNode.SelectSingleNode("//ul[@class='list_con_li clearfix']");

            if (node == null)
            {
                return(cateCollect);
            }

            HtmlNodeCollection collect = node.SelectNodes("./li");

            if (collect == null)
            {
                return(cateCollect);
            }

            BasicComicInfo comicInfo;

            foreach (HtmlNode nodeTemp in collect)
            {
                comicInfo             = new BasicComicInfo();
                comicInfo.ComicHref   = nodeTemp.SelectSingleNode("./a").Attributes["href"].Value;
                comicInfo.ComicName   = nodeTemp.SelectSingleNode("./a/img").Attributes["alt"].Value;
                comicInfo.ComicImgUrl = nodeTemp.SelectSingleNode("./a/img").Attributes["src"].Value;
                queue.Enqueue(comicInfo);
            }

            int    count = 0;
            string key   = "";

            node    = mainNode.SelectSingleNode("//span[@class='comi_num']/em");
            count   = Convert.ToInt32(node.InnerText);
            collect = mainNode.SelectNodes("//ul[@class='pagination']/li/a");
            cateCollect.PagesCollection = new Dictionary <string, string>();

            if (collect != null)
            {
                foreach (HtmlNode temp in collect)
                {
                    key = temp.InnerText;

                    if (cateCollect.PagesCollection.ContainsKey(key) == false)
                    {
                        cateCollect.PagesCollection.Add(key, hostName + temp.Attributes["href"].Value);
                    }
                }

                if (cateCollect.PagesCollection.ContainsKey("下一页"))
                {
                    cateCollect.NextPageUrl = cateCollect.PagesCollection["下一页"];
                }
            }



            return(cateCollect);
        }
Пример #10
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            CategoryCollect collect;
            BasicComicInfo  basicComicInfo;

            collect = new CategoryCollect();
            HtmlNode mainNode = GetMainNode(cateGoryStr);

            Queue <BasicComicInfo> queue = new Queue <BasicComicInfo>();
            HtmlNodeCollection     nodes = mainNode.SelectNodes("//div[@class='mh-item']");

            if (nodes != null)
            {
                foreach (HtmlNode temp in nodes)
                {
                    basicComicInfo             = new BasicComicInfo();
                    basicComicInfo.ComicHref   = host + temp.SelectSingleNode("./a").Attributes["href"].Value;
                    basicComicInfo.ComicName   = temp.SelectSingleNode("./div/h2/a").InnerText;
                    basicComicInfo.ComicImgUrl = temp.SelectSingleNode("./a/img").Attributes["src"].Value;
                    queue.Enqueue(basicComicInfo);
                }
            }

            int curIndex = 0;
            int count    = 1;

            nodes = mainNode.SelectNodes("//div[@class='page-pagination']/ul/li");
            collect.PagesCollection = new Dictionary <string, string>();

            if (nodes != null)
            {
                foreach (HtmlNode temp in nodes)//这部分有问题,找不到page-pagination段
                {
                    if (!collect.PagesCollection.ContainsKey(temp.InnerText))
                    {
                        HtmlNode node = temp.SelectSingleNode("./a");

                        if (node.Attributes["class"] != null)
                        {
                            curIndex = count;
                        }

                        collect.PagesCollection.Add(node.InnerText, host + node.Attributes["href"].Value);
                        count++;
                    }
                }
            }

            collect.ComicTotalCount = queue.Count;
            collect.ComicQueue      = queue;

            if (curIndex - 1 > 0 && collect.Count > 0)
            {
                collect.LastPageUrl = collect.PagesCollection[(curIndex - 1).ToString()];
            }
            else
            {
                collect.LastPageUrl = collect.PagesCollection[(curIndex).ToString()];
            }

            if (curIndex + 1 < collect.PagesCollection.Count && collect.Count > 0)
            {
                collect.NextPageUrl = collect.PagesCollection[(curIndex + 1).ToString()];
            }
            else
            {
                collect.NextPageUrl = collect.PagesCollection[(collect.PagesCollection.Count - 1).ToString()];
            }


            return(collect);
        }
Пример #11
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            CategoryCollect retCollect = new CategoryCollect();
            var             mainNode   = GetMainNode(cateGoryStr);

            if (mainNode == null)
            {
                return(retCollect);
            }

            BasicComicInfo         info;
            Queue <BasicComicInfo> queue   = new Queue <BasicComicInfo>();
            HtmlNodeCollection     collect = mainNode.SelectNodes("//div[@class='cComicList']/li");

            foreach (HtmlNode nodeTemp in collect)
            {
                info             = new BasicComicInfo();
                info.ComicHref   = hostName + nodeTemp.SelectSingleNode("./a").Attributes["href"].Value;
                info.ComicName   = nodeTemp.SelectSingleNode("./a").Attributes["title"].Value;
                info.ComicImgUrl = nodeTemp.SelectSingleNode("./a/img").Attributes["src"].Value;
                queue.Enqueue(info);
            }

            collect = mainNode.SelectNodes("//span[@class='cPageChangeLink']/a");

            if (collect != null)
            {
                string key;
                Dictionary <string, string> dict;
                dict = new Dictionary <string, string>();

                foreach (var i in collect)
                {
                    key = i.InnerText;

                    if (dict.ContainsKey(key) == false && i.Attributes["href"] != null)
                    {
                        dict.Add(key, hostName + i.Attributes["href"].Value);
                    }
                }

                if (dict.ContainsKey("上一页"))
                {
                    retCollect.LastPageUrl = dict["上一页"];
                }

                if (dict.ContainsKey("下一页"))
                {
                    retCollect.NextPageUrl = dict["下一页"];
                }
            }

            HtmlNode node = mainNode.SelectSingleNode("//div[@class='cComicPageChange']");

            if (node != null)
            {
                string temp = node.SelectNodes("./b")[0].InnerText;
                retCollect.Count = Convert.ToInt32(temp);
            }

            retCollect.ComicQueue = queue;

            return(retCollect);
        }
Пример #12
0
        public override CategoryCollect FindComicByCategory(string cateGoryStr)
        {
            HtmlDocument           doc;
            BasicComicInfo         comicInfo;
            Queue <BasicComicInfo> queue;

            doc = new HtmlDocument();
            CategoryCollect collect = new CategoryCollect();

            queue = new Queue <BasicComicInfo>();
            doc.LoadHtml(cateGoryStr);
            Dictionary <string, string> dict;

            dict = new Dictionary <string, string>();

            HtmlNodeCollection nodeCollection = doc.DocumentNode.SelectNodes("//li[@class='list-comic']");

            if (nodeCollection != null)
            {
                foreach (HtmlNode node in nodeCollection)
                {
                    comicInfo             = new BasicComicInfo();
                    comicInfo.ComicHref   = node.SelectSingleNode("./a").Attributes["href"].Value;
                    comicInfo.ComicName   = node.SelectSingleNode("./a/mip-img").Attributes["alt"].Value.Replace("'", "");
                    comicInfo.ComicImgUrl = node.SelectSingleNode("./a/mip-img").Attributes["src"].Value;
                    queue.Enqueue(comicInfo);
                }
            }

            string key     = "";
            int    current = 0;

            nodeCollection = doc.DocumentNode.SelectNodes("//ul[@class='pagination']/li");


            if (nodeCollection != null)
            {
                foreach (HtmlNode node in nodeCollection)
                {
                    if (node.SelectSingleNode("./a") == null)
                    {
                        continue;
                    }

                    key = node.SelectSingleNode("./a").InnerText;

                    if (node.Attributes["class"] != null && node.Attributes["class"].Value == "active")
                    {
                        current = Convert.ToInt32(key);
                    }

                    if (dict.ContainsKey(key) == false)
                    {
                        dict.Add(key, node.SelectSingleNode("./a").Attributes["href"].Value);
                    }
                }
            }

            collect.ComicQueue      = queue;
            collect.Count           = queue.Count;
            collect.PagesCollection = dict;

            if (dict.ContainsKey((current + 1).ToString()))
            {
                collect.NextPageUrl = dict[(current + 1).ToString()];
            }

            if (dict.ContainsKey((current - 1).ToString()))
            {
                collect.LastPageUrl = dict[(current - 1).ToString()];
            }
            return(collect);
        }