public override CategoryCollect FindComicByCategory(string cateGoryStr)//实现漫画目录图片枚举 { Regex regex; BasicComicInfo basic; CategoryCollect category = new CategoryCollect(); var comicQueue = new Queue <BasicComicInfo>(); HtmlDocument document = new HtmlDocument(); document.LoadHtml(cateGoryStr); HtmlNode node = document.DocumentNode.SelectSingleNode("//div[@class='cComicList']"); HtmlNodeCollection collect = node.SelectNodes("./li/a"); foreach (var i in collect) { basic = new BasicComicInfo(); basic.ComicHref = hostAttach + i.Attributes["href"].Value; basic.ComicName = i.Attributes["title"].Value; basic.ComicImgUrl = i.SelectSingleNode("./img").Attributes["src"].Value; comicQueue.Enqueue(basic); } regex = new Regex(@"href='(?<href>[\w:/.]*)'>\s*下一页");//还可以修改,暂时不动 category.NextPageUrl = hostAttach + regex.Match(cateGoryStr).Groups["href"].Value; category.ComicQueue = comicQueue; category.PagesCollection = new Dictionary <string, string>(); return(category); }
public override CategoryCollect FindComicByCategory(string cateGoryStr)//通过漫画分类获取漫画 { var basicInfo = new BasicComicInfo(); var comicCollect = new CategoryCollect(); var bookList = AnalyseTool.GetTag(cateGoryStr, @"<div class=""book-list"">", "</div>"); comicCollect.Count = 0; Regex regex = new Regex(@"href=""(?<href>[/\w]*)""\stitle=""(?<title>[\w\&!!,s]*)""><img\s*(data-src|src)=""(?<url>[:\w./]*)"""); var comicQueue = new Queue <BasicComicInfo>(); foreach (Match i in regex.Matches(bookList)) { basicInfo = new BasicComicInfo(); basicInfo.ComicHref = host + i.Groups["href"].Value; basicInfo.ComicName = i.Groups["title"].Value; basicInfo.ComicImgUrl = i.Groups["url"].Value; comicQueue.Enqueue(basicInfo); } var pageInfo = AnalyseTool.GetTag(cateGoryStr, @"<div class=""pager-cont"">", "</div>"); regex = new Regex(@"href=""(?<href>[/\w._]*)""\s*\w+=""[\w-:;]*"">下一页"); comicCollect.NextPageUrl = homePage + regex.Match(cateGoryStr).Groups["href"].Value; comicCollect.Count = comicQueue.Count; comicCollect.ComicQueue = comicQueue; return(comicCollect); }
//需要修改路径和标签 public override CategoryCollect FindComicByCategory(string cateGoryStr) { HtmlDocument doc; HtmlNode node; BasicComicInfo basicInfo; HtmlNodeCollection collect; CategoryCollect cateCollect; Queue <BasicComicInfo> comicQueue; basicInfo = null; doc = new HtmlDocument(); cateCollect = new CategoryCollect(); comicQueue = new Queue <BasicComicInfo>(); doc.LoadHtml(cateGoryStr); collect = doc.DocumentNode.SelectNodes("//ul[@class='liemh htmls indliemh']/li"); foreach (HtmlNode temp in collect) { basicInfo = new BasicComicInfo(); node = temp.SelectSingleNode("./a"); basicInfo.ComicName = node.Attributes["title"].Value; basicInfo.ComicHref = node.Attributes["href"].Value; basicInfo.ComicImgUrl = temp.SelectSingleNode("./a/img").Attributes["src"].Value; comicQueue.Enqueue(basicInfo); } node = doc.DocumentNode.SelectSingleNode("//div[@class='pagination-wrapper']"); cateCollect.NextPageUrl = hostName + node.SelectNodes("./a[@class='next']")[0].Attributes["href"].Value; cateCollect.Count = comicQueue.Count; cateCollect.ComicQueue = comicQueue; return(cateCollect); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { string tempStr = ""; HtmlDocument doc; HtmlNode node; BasicComicInfo basic; Dictionary <string, string> dict; Queue <BasicComicInfo> queue; CategoryCollect collectInfo; queue = new Queue <BasicComicInfo>(); collectInfo = new CategoryCollect(); doc = new HtmlDocument(); doc.LoadHtml(cateGoryStr); HtmlNodeCollection collect = doc.DocumentNode.SelectNodes("//div[@class='box-body']/ul[@class='mh-list col7']/li/div[@class='mh-item']"); foreach (HtmlNode temp in collect) { basic = new BasicComicInfo(); node = temp.SelectSingleNode("./div[@class='mh-item-detali']/h2[@class='title']/a"); basic.ComicName = node.Attributes["title"].Value; basic.ComicHref = hostAttach + node.Attributes["href"].Value; node = temp.SelectSingleNode("./p[@class='mh-cover']"); tempStr = node.Attributes["style"].Value; basic.ComicImgUrl = tempStr.Substring(tempStr.IndexOf("(") + 1, tempStr.IndexOf(")") - tempStr.IndexOf("(") - 1); queue.Enqueue(basic); } node = doc.DocumentNode.SelectSingleNode("//div[@class='page-pagination pull-right mt20']"); collect = node.SelectNodes("./ul/li/a"); dict = new Dictionary <string, string>(); var count = 0; var index = 0; foreach (HtmlNode temp in collect) { count++; tempStr = temp.Attributes["data-index"].Value; if (dict.ContainsKey(tempStr) == false) { dict.Add(tempStr, hostAttach + temp.Attributes["href"].Value); } if (temp.Attributes["class"] != null && temp.Attributes["class"].Value == "active") { index = count; } } collectInfo.NextPageUrl = dict[(index + 1).ToString()]; collectInfo.ComicQueue = queue; collectInfo.Count = queue.Count; collectInfo.PagesCollection = dict; return(collectInfo); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { HtmlDocument doc; HtmlNode node; BasicComicInfo basicInfo; HtmlNodeCollection collect; CategoryCollect cateCollect; Queue <BasicComicInfo> comicQueue; basicInfo = null; doc = new HtmlDocument(); cateCollect = new CategoryCollect(); comicQueue = new Queue <BasicComicInfo>(); doc.LoadHtml(cateGoryStr); collect = doc.DocumentNode.SelectNodes("//div[@class='recommendedpic allfloatleft']"); if (collect == null) { collect = doc.DocumentNode.SelectNodes("//div[@class='recommendedpicl center']"); } foreach (HtmlNode temp in collect) { basicInfo = new BasicComicInfo(); node = temp.SelectSingleNode("./a"); basicInfo.ComicName = node.Attributes["title"].Value; basicInfo.ComicHref = node.Attributes["href"].Value; basicInfo.ComicImgUrl = hostName + temp.SelectSingleNode("./a/img").Attributes["src"].Value; comicQueue.Enqueue(basicInfo); } string href = ""; HtmlNode hrefNode = doc.DocumentNode.SelectSingleNode("//div[@class='gray reminderguild']"); Regex regex = new Regex(@"href=""(?<href>[\w_\.]*)""\s*title=""下一页"""); Regex regexhref = new Regex(@"href=""(?<href>[\w\-\./]*)""\s*target=""_self"">全部"); if (hrefNode != null) { href = regexhref.Match(hrefNode.OuterHtml).Groups["href"].Value; href = href.Substring(0, href.LastIndexOf("/")) + "/"; cateCollect.NextPageUrl = hostName + href + regex.Match(cateGoryStr).Groups["href"].Value; } else { cateCollect.NextPageUrl = "http://www.dmzx.com/zuixin/"; } cateCollect.Count = comicQueue.Count; cateCollect.ComicQueue = comicQueue; return(cateCollect); }
public override CategoryCollect FindComicByCategory(string response) { HtmlDocument doc; CategoryCollect cateInfo; Queue <BasicComicInfo> queue; Dictionary <string, string> pageDict; doc = new HtmlDocument(); cateInfo = new CategoryCollect(); queue = new Queue <BasicComicInfo>(); doc.LoadHtml(response); BasicComicInfo basicInfo = null; HtmlNodeCollection collection = doc.DocumentNode.SelectNodes("//ul[@class='grid-row clearfix']/li | //ul[@class='grid-row clearfix first']/li"); foreach (HtmlNode node in collection) { basicInfo = new BasicComicInfo(); basicInfo.ComicName = node.SelectSingleNode("./p/a").InnerText; basicInfo.ComicHref = node.SelectSingleNode("./p/a").Attributes["href"].Value; basicInfo.ComicImgUrl = node.SelectSingleNode("./a/img").Attributes["src"].Value; queue.Enqueue(basicInfo); } pageDict = new Dictionary <string, string>(); collection = doc.DocumentNode.SelectNodes("//ul[@class='pagination']/li/a"); foreach (HtmlNode node in collection) { if (pageDict.ContainsKey(node.InnerText) == false) { pageDict.Add(node.InnerText, hostName + node.Attributes["href"].Value); } } var temp = doc.DocumentNode.SelectSingleNode("//div[@class='head']/span").InnerText; var currentPage = Convert.ToInt32(temp.Substring(0, temp.IndexOf("/"))); cateInfo.ComicQueue = queue; cateInfo.Count = queue.Count; cateInfo.NextPageUrl = pageDict[(currentPage + 1).ToString()]; cateInfo.PagesCollection = pageDict; return(cateInfo); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { HtmlDocument doc; HtmlNode node; HtmlNodeCollection nodeCollect; CategoryCollect collect; BasicComicInfo basicInfo = null; Queue <BasicComicInfo> queue; queue = new Queue <BasicComicInfo>(); collect = new CategoryCollect(); doc = new HtmlDocument(); doc.LoadHtml(cateGoryStr); nodeCollect = doc.DocumentNode.SelectNodes("//div[@class='nag cf']/div/div/a"); foreach (HtmlNode temp in nodeCollect) { node = temp.SelectSingleNode("./span/img"); basicInfo = new BasicComicInfo(); basicInfo.ComicHref = temp.Attributes["href"].Value; basicInfo.ComicName = temp.Attributes["title"].Value; basicInfo.ComicImgUrl = node.Attributes["src"].Value; queue.Enqueue(basicInfo); } collect.ComicQueue = queue; collect.Count = queue.Count; node = doc.DocumentNode.SelectSingleNode("//a[@class='nextpostslink']"); if (node != null) { collect.NextPageUrl = node.Attributes["href"].Value; } return(collect); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { CategoryCollect cateCollect = new CategoryCollect(); var dat = AnalyseTool.GetTag(cateGoryStr, @"<div class=""cComicList""", @"<div class=""cComicPageChange2"); Regex regex = new Regex(@"href=['""](?<href>[/\w-]*)['""]\s*class=['""][\w_]*['""]\stitle=['""](?<title>[\w-!!\s,。]*)['""]><img\salt=['""][\w-!,。\s,]*['""]\s*src=['""](?<url>[\w:/.-]*)"); var basicInfo = new BasicComicInfo(); var comicQueue = new Queue <BasicComicInfo>(); foreach (Match i in regex.Matches(dat)) { basicInfo = new BasicComicInfo(); basicInfo.ComicHref = hostName + i.Groups["href"].Value; basicInfo.ComicName = i.Groups["title"].Value; basicInfo.ComicImgUrl = i.Groups["url"].Value; comicQueue.Enqueue(basicInfo); } dat = AnalyseTool.GetTag(cateGoryStr, "<span class='cPageChangeLink'>", "</span>"); regex = new Regex(@"<a\s*href='(?<url>[\w-/]*)'>下一页"); cateCollect.NextPageUrl = hostName + regex.Match(dat).Groups["url"].Value; cateCollect.ComicQueue = comicQueue; cateCollect.PagesCollection = new Dictionary <string, string>(); return(cateCollect); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { CategoryCollect cateCollect = new CategoryCollect(); Queue <BasicComicInfo> queue = new Queue <BasicComicInfo>(); HtmlNode mainNode = GetMainNode(cateGoryStr); cateCollect.ComicQueue = queue; HtmlNode node = mainNode.SelectSingleNode("//ul[@class='list_con_li clearfix']"); if (node == null) { return(cateCollect); } HtmlNodeCollection collect = node.SelectNodes("./li"); if (collect == null) { return(cateCollect); } BasicComicInfo comicInfo; foreach (HtmlNode nodeTemp in collect) { comicInfo = new BasicComicInfo(); comicInfo.ComicHref = nodeTemp.SelectSingleNode("./a").Attributes["href"].Value; comicInfo.ComicName = nodeTemp.SelectSingleNode("./a/img").Attributes["alt"].Value; comicInfo.ComicImgUrl = nodeTemp.SelectSingleNode("./a/img").Attributes["src"].Value; queue.Enqueue(comicInfo); } int count = 0; string key = ""; node = mainNode.SelectSingleNode("//span[@class='comi_num']/em"); count = Convert.ToInt32(node.InnerText); collect = mainNode.SelectNodes("//ul[@class='pagination']/li/a"); cateCollect.PagesCollection = new Dictionary <string, string>(); if (collect != null) { foreach (HtmlNode temp in collect) { key = temp.InnerText; if (cateCollect.PagesCollection.ContainsKey(key) == false) { cateCollect.PagesCollection.Add(key, hostName + temp.Attributes["href"].Value); } } if (cateCollect.PagesCollection.ContainsKey("下一页")) { cateCollect.NextPageUrl = cateCollect.PagesCollection["下一页"]; } } return(cateCollect); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { CategoryCollect collect; BasicComicInfo basicComicInfo; collect = new CategoryCollect(); HtmlNode mainNode = GetMainNode(cateGoryStr); Queue <BasicComicInfo> queue = new Queue <BasicComicInfo>(); HtmlNodeCollection nodes = mainNode.SelectNodes("//div[@class='mh-item']"); if (nodes != null) { foreach (HtmlNode temp in nodes) { basicComicInfo = new BasicComicInfo(); basicComicInfo.ComicHref = host + temp.SelectSingleNode("./a").Attributes["href"].Value; basicComicInfo.ComicName = temp.SelectSingleNode("./div/h2/a").InnerText; basicComicInfo.ComicImgUrl = temp.SelectSingleNode("./a/img").Attributes["src"].Value; queue.Enqueue(basicComicInfo); } } int curIndex = 0; int count = 1; nodes = mainNode.SelectNodes("//div[@class='page-pagination']/ul/li"); collect.PagesCollection = new Dictionary <string, string>(); if (nodes != null) { foreach (HtmlNode temp in nodes)//这部分有问题,找不到page-pagination段 { if (!collect.PagesCollection.ContainsKey(temp.InnerText)) { HtmlNode node = temp.SelectSingleNode("./a"); if (node.Attributes["class"] != null) { curIndex = count; } collect.PagesCollection.Add(node.InnerText, host + node.Attributes["href"].Value); count++; } } } collect.ComicTotalCount = queue.Count; collect.ComicQueue = queue; if (curIndex - 1 > 0 && collect.Count > 0) { collect.LastPageUrl = collect.PagesCollection[(curIndex - 1).ToString()]; } else { collect.LastPageUrl = collect.PagesCollection[(curIndex).ToString()]; } if (curIndex + 1 < collect.PagesCollection.Count && collect.Count > 0) { collect.NextPageUrl = collect.PagesCollection[(curIndex + 1).ToString()]; } else { collect.NextPageUrl = collect.PagesCollection[(collect.PagesCollection.Count - 1).ToString()]; } return(collect); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { CategoryCollect retCollect = new CategoryCollect(); var mainNode = GetMainNode(cateGoryStr); if (mainNode == null) { return(retCollect); } BasicComicInfo info; Queue <BasicComicInfo> queue = new Queue <BasicComicInfo>(); HtmlNodeCollection collect = mainNode.SelectNodes("//div[@class='cComicList']/li"); foreach (HtmlNode nodeTemp in collect) { info = new BasicComicInfo(); info.ComicHref = hostName + nodeTemp.SelectSingleNode("./a").Attributes["href"].Value; info.ComicName = nodeTemp.SelectSingleNode("./a").Attributes["title"].Value; info.ComicImgUrl = nodeTemp.SelectSingleNode("./a/img").Attributes["src"].Value; queue.Enqueue(info); } collect = mainNode.SelectNodes("//span[@class='cPageChangeLink']/a"); if (collect != null) { string key; Dictionary <string, string> dict; dict = new Dictionary <string, string>(); foreach (var i in collect) { key = i.InnerText; if (dict.ContainsKey(key) == false && i.Attributes["href"] != null) { dict.Add(key, hostName + i.Attributes["href"].Value); } } if (dict.ContainsKey("上一页")) { retCollect.LastPageUrl = dict["上一页"]; } if (dict.ContainsKey("下一页")) { retCollect.NextPageUrl = dict["下一页"]; } } HtmlNode node = mainNode.SelectSingleNode("//div[@class='cComicPageChange']"); if (node != null) { string temp = node.SelectNodes("./b")[0].InnerText; retCollect.Count = Convert.ToInt32(temp); } retCollect.ComicQueue = queue; return(retCollect); }
public override CategoryCollect FindComicByCategory(string cateGoryStr) { HtmlDocument doc; BasicComicInfo comicInfo; Queue <BasicComicInfo> queue; doc = new HtmlDocument(); CategoryCollect collect = new CategoryCollect(); queue = new Queue <BasicComicInfo>(); doc.LoadHtml(cateGoryStr); Dictionary <string, string> dict; dict = new Dictionary <string, string>(); HtmlNodeCollection nodeCollection = doc.DocumentNode.SelectNodes("//li[@class='list-comic']"); if (nodeCollection != null) { foreach (HtmlNode node in nodeCollection) { comicInfo = new BasicComicInfo(); comicInfo.ComicHref = node.SelectSingleNode("./a").Attributes["href"].Value; comicInfo.ComicName = node.SelectSingleNode("./a/mip-img").Attributes["alt"].Value.Replace("'", ""); comicInfo.ComicImgUrl = node.SelectSingleNode("./a/mip-img").Attributes["src"].Value; queue.Enqueue(comicInfo); } } string key = ""; int current = 0; nodeCollection = doc.DocumentNode.SelectNodes("//ul[@class='pagination']/li"); if (nodeCollection != null) { foreach (HtmlNode node in nodeCollection) { if (node.SelectSingleNode("./a") == null) { continue; } key = node.SelectSingleNode("./a").InnerText; if (node.Attributes["class"] != null && node.Attributes["class"].Value == "active") { current = Convert.ToInt32(key); } if (dict.ContainsKey(key) == false) { dict.Add(key, node.SelectSingleNode("./a").Attributes["href"].Value); } } } collect.ComicQueue = queue; collect.Count = queue.Count; collect.PagesCollection = dict; if (dict.ContainsKey((current + 1).ToString())) { collect.NextPageUrl = dict[(current + 1).ToString()]; } if (dict.ContainsKey((current - 1).ToString())) { collect.LastPageUrl = dict[(current - 1).ToString()]; } return(collect); }