/// <summary> /// 亚马逊产品列表 /// </summary> public static void AmazonProductList(string url) { //http://www.amazon.cn/gp/site-directory/ref=sa_menu_fullstore if (string.IsNullOrEmpty(url)) { return; } Uri uri = new Uri(url); string queryString = uri.Query; NameValueCollection nameValue = UrlHelper.GetQueryString(queryString); // 根据url中抽取分类 string node = nameValue["node"]; if (string.IsNullOrEmpty(node)) { return; } string urlTemplate = "http://www.amazon.cn/s/ref=?rh=n:{0}&page={1}"; var firstPageUrl = string.Format(urlTemplate, node, 1); var hcFirst = new HttpClient(firstPageUrl); hcFirst.Timeout = 30000; var html = HttpUtility.HtmlDecode(hcFirst.Request()); var htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html); var maxPageNode = htmlDocument.DocumentNode.SelectSingleNode("//span[@class='pagnDisabled']"); var maxPageNumber = 0; if (maxPageNode != null && int.TryParse(maxPageNode.InnerText, out maxPageNumber)) { for (int i = 1; i <= maxPageNumber; i++) { if (i != 1) { var pageUrl = string.Format(urlTemplate, node, i); var hc = new HttpClient(pageUrl); hc.Timeout = 30000; html = HttpUtility.HtmlDecode(hc.Request()); htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html); } var productNodes = htmlDocument.DocumentNode.SelectNodes( "//div[@class='result product'] | //div[@class='result lastRow product']"); if (productNodes == null) { return; } foreach (HtmlNode productNode in productNodes) { var imageNode = productNode.SelectSingleNode("div[@class='image']/a"); var titleNode = productNode.SelectSingleNode("div[@class='data']/h3[@class='title']/a"); } } } }
/// <summary> /// 苏宁列表页面商品提取 /// </summary> /// <param name="url"></param> public static object SuNingProductList(object obj) { var url = obj as string; if (string.IsNullOrEmpty(url)) { return(false); } Uri uri = new Uri(url); string queryString = uri.Query; NameValueCollection nameValue = UrlHelper.GetQueryString(queryString); // 根据url中抽取分类 string cid = nameValue["ci"]; if (string.IsNullOrEmpty(cid)) { return(false); } string urlTemplate = "http://search.suning.com/emall/strd.do?ci={0}&cityId=9017&cp={1}&il=0&si=5&st=14&iy=-1"; var firstPageUrl = string.Format("http://search.suning.com/emall/strd.do?ci={0}&cityId=9017&cp=0&il=0&si=5&st=14&iy=-1", cid); var hcFirst = new HttpClient(firstPageUrl); hcFirst.Timeout = 30000; var htmlFirst = hcFirst.Request(); var htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(htmlFirst); // 先找最大页面页面 var pageContainer = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[7]/div[2]/div[8]/a"); if (pageContainer == null || !pageContainer.Any()) { return(false); } var lastPageNode = pageContainer[pageContainer.Count - 2]; var lastPageNumber = int.Parse(lastPageNode.InnerText); for (int i = 0; i < lastPageNumber; i++) { if (i == 0) { // 解析商品 var productLis = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[7]/div[2]/div[6]/ul[1]/li"); foreach (var htmlNode in productLis) { var aNode = htmlNode.SelectSingleNode("a"); if (aNode != null) { // 商品名称 var name = aNode.Attributes["title"].Value; // 商品链接 var href = aNode.Attributes["href"].Value; // 图片 var imageNode = aNode.SelectSingleNode("img"); var picUrl = string.Empty; if (imageNode != null && imageNode.Attributes["src2"] != null && !string.IsNullOrEmpty(imageNode.Attributes["src2"].Value)) { // 图片url picUrl = imageNode.Attributes["src2"].Value; } // 评论 var commentNode = htmlNode.SelectSingleNode("div[1]/div[1]/p[1]/a[1]/i[1]"); int commentNum = 0; if (commentNode != null) { // 评论数目 int.TryParse(commentNode.InnerText, out commentNum); } if (!DataAccess.IsExistUrl(href)) { DataAccess.InsertProduct(name, href, int.Parse(cid), commentNum, picUrl); } } } } else { var categoryUrl = string.Format(urlTemplate, cid, i); HttpClient hc = new HttpClient(categoryUrl); hc.Timeout = 30000; var html = hc.Request(); htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html); // 解析商品 // 解析商品 var productLis = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[7]/div[2]/div[6]/ul[1]/li"); foreach (var htmlNode in productLis) { var aNode = htmlNode.SelectSingleNode("a"); if (aNode != null) { // 商品名称 var name = aNode.Attributes["title"].Value; // 商品链接 var href = aNode.Attributes["href"].Value; // 图片 var imageNode = aNode.SelectSingleNode("img"); var picUrl = string.Empty; if (imageNode != null && imageNode.Attributes["src2"] != null && !string.IsNullOrEmpty(imageNode.Attributes["src2"].Value)) { // 图片url picUrl = imageNode.Attributes["src2"].Value; } // 评论 var commentNode = htmlNode.SelectSingleNode("div[1]/div[1]/p[1]/a[1]/i[1]"); int commentNum = 0; if (commentNode != null) { // 评论数目 int.TryParse(commentNode.InnerText, out commentNum); } if (!DataAccess.IsExistUrl(href)) { DataAccess.InsertProduct(name, href, int.Parse(cid), commentNum, picUrl); } } } } } return(true); }
public static void DangDangProductList(string categoryUrl) { // Uri uri = new Uri(categoryUrl); NameValueCollection nameValue = UrlHelper.GetQueryString(uri.Query); // 根据url中抽取分类 string cid = nameValue["cat"]; if (string.IsNullOrEmpty(cid)) { return; } var hcFirst = new HttpClient(categoryUrl); hcFirst.Timeout = 30000; var htmlFirst = hcFirst.Request(); var htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(htmlFirst); // 查找第一页面 var maxPageNode = htmlDocument.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[2]/div[3]/div[5]/div[1]/div[2]/span[1]"); if (maxPageNode == null || string.IsNullOrEmpty(maxPageNode.InnerText)) { return; } var maxPageString = maxPageNode.InnerText.Substring("1 / ".Length); // InnerText "1 / 19" string var maxPageNumber = 0; if (!int.TryParse(maxPageString, out maxPageNumber)) { return; } //页面模板 var tempUrl = "http://category.dangdang.com/all/?category_id={0}&page_index={1}"; for (int j = 1; j <= maxPageNumber; j++) { if (j != 1) { var hc = new HttpClient(string.Format(tempUrl, cid, j)); hc.Timeout = 30000; var html = hc.Request(); if (!string.IsNullOrEmpty(html)) { htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html); } } var productList = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[2]/div[3]/div[7]/div"); if (productList == null || !productList.Any()) { return; } int i = 0; foreach (HtmlNode htmlNode in productList) { if (htmlNode.Attributes["class"] != null && htmlNode.Attributes["class"].Value.Contains("listitem")) { var a = htmlNode.SelectSingleNode("p[1]/a[1]"); // 商品页面 if (a != null && a.Attributes["href"] != null && !string.IsNullOrEmpty(a.Attributes["href"].Value)) { // // 商品链接 var productUrl = a.Attributes["href"].Value; // 商品列表页面图片 var image = a.SelectSingleNode("img[1]"); var productImageUrl = image.Attributes["src"].Value; // 商品名称 var titleNode = htmlNode.SelectSingleNode("p[3]/a[1]"); // 评论数 var commentNode = htmlNode.CssSelect("p.starlevel"); //htmlNode.SelectSingleNode("p[4]/span[1]/a[1]"); var commentCount = 0; if (commentNode != null && !string.IsNullOrEmpty(commentNode.FirstOrDefault().InnerText)) { var index = commentNode.FirstOrDefault().InnerText.IndexOf("条"); if (index != -1) { var s = commentNode.FirstOrDefault().InnerText.Substring(1, index - 1); int.TryParse(s, out commentCount); } } if (!DataAccess.IsExistUrl(productUrl)) { DataAccess.InsertProduct(titleNode.InnerText, productUrl, int.Parse(cid), commentCount, productImageUrl); } } } } } }