示例#1
0
        /// <summary>
        /// 亚马逊产品列表
        /// </summary>
        public static void AmazonProductList(string url)
        {
            //http://www.amazon.cn/gp/site-directory/ref=sa_menu_fullstore
            if (string.IsNullOrEmpty(url))
            {
                return;
            }
            Uri    uri                    = new Uri(url);
            string queryString            = uri.Query;
            NameValueCollection nameValue = UrlHelper.GetQueryString(queryString);
            // 根据url中抽取分类
            string node = nameValue["node"];

            if (string.IsNullOrEmpty(node))
            {
                return;
            }

            string urlTemplate  = "http://www.amazon.cn/s/ref=?rh=n:{0}&page={1}";
            var    firstPageUrl = string.Format(urlTemplate, node, 1);
            var    hcFirst      = new HttpClient(firstPageUrl);

            hcFirst.Timeout = 30000;
            var html         = HttpUtility.HtmlDecode(hcFirst.Request());
            var htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html);

            var maxPageNode   = htmlDocument.DocumentNode.SelectSingleNode("//span[@class='pagnDisabled']");
            var maxPageNumber = 0;

            if (maxPageNode != null && int.TryParse(maxPageNode.InnerText, out maxPageNumber))
            {
                for (int i = 1; i <= maxPageNumber; i++)
                {
                    if (i != 1)
                    {
                        var pageUrl = string.Format(urlTemplate, node, i);
                        var hc      = new HttpClient(pageUrl);
                        hc.Timeout   = 30000;
                        html         = HttpUtility.HtmlDecode(hc.Request());
                        htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html);
                    }

                    var productNodes =
                        htmlDocument.DocumentNode.SelectNodes(
                            "//div[@class='result product'] | //div[@class='result lastRow product']");
                    if (productNodes == null)
                    {
                        return;
                    }

                    foreach (HtmlNode productNode in productNodes)
                    {
                        var imageNode = productNode.SelectSingleNode("div[@class='image']/a");
                        var titleNode = productNode.SelectSingleNode("div[@class='data']/h3[@class='title']/a");
                    }
                }
            }
        }
示例#2
0
        /// <summary>
        /// 苏宁列表页面商品提取
        /// </summary>
        /// <param name="url"></param>
        public static object SuNingProductList(object obj)
        {
            var url = obj as string;

            if (string.IsNullOrEmpty(url))
            {
                return(false);
            }
            Uri    uri                    = new Uri(url);
            string queryString            = uri.Query;
            NameValueCollection nameValue = UrlHelper.GetQueryString(queryString);
            // 根据url中抽取分类
            string cid = nameValue["ci"];

            if (string.IsNullOrEmpty(cid))
            {
                return(false);
            }

            string urlTemplate  = "http://search.suning.com/emall/strd.do?ci={0}&cityId=9017&cp={1}&il=0&si=5&st=14&iy=-1";
            var    firstPageUrl = string.Format("http://search.suning.com/emall/strd.do?ci={0}&cityId=9017&cp=0&il=0&si=5&st=14&iy=-1", cid);
            var    hcFirst      = new HttpClient(firstPageUrl);

            hcFirst.Timeout = 30000;
            var htmlFirst    = hcFirst.Request();
            var htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(htmlFirst);

            // 先找最大页面页面
            var pageContainer = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[7]/div[2]/div[8]/a");

            if (pageContainer == null || !pageContainer.Any())
            {
                return(false);
            }
            var lastPageNode = pageContainer[pageContainer.Count - 2];

            var lastPageNumber = int.Parse(lastPageNode.InnerText);

            for (int i = 0; i < lastPageNumber; i++)
            {
                if (i == 0)
                {
                    // 解析商品
                    var productLis = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[7]/div[2]/div[6]/ul[1]/li");
                    foreach (var htmlNode in productLis)
                    {
                        var aNode = htmlNode.SelectSingleNode("a");
                        if (aNode != null)
                        {
                            // 商品名称
                            var name = aNode.Attributes["title"].Value;
                            // 商品链接
                            var href = aNode.Attributes["href"].Value;
                            // 图片
                            var imageNode = aNode.SelectSingleNode("img");
                            var picUrl    = string.Empty;
                            if (imageNode != null && imageNode.Attributes["src2"] != null && !string.IsNullOrEmpty(imageNode.Attributes["src2"].Value))
                            {
                                // 图片url
                                picUrl = imageNode.Attributes["src2"].Value;
                            }

                            // 评论
                            var commentNode = htmlNode.SelectSingleNode("div[1]/div[1]/p[1]/a[1]/i[1]");
                            int commentNum  = 0;
                            if (commentNode != null)
                            {
                                // 评论数目
                                int.TryParse(commentNode.InnerText, out commentNum);
                            }
                            if (!DataAccess.IsExistUrl(href))
                            {
                                DataAccess.InsertProduct(name, href, int.Parse(cid), commentNum, picUrl);
                            }
                        }
                    }
                }
                else
                {
                    var        categoryUrl = string.Format(urlTemplate, cid, i);
                    HttpClient hc          = new HttpClient(categoryUrl);
                    hc.Timeout = 30000;
                    var html = hc.Request();
                    htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html);
                    // 解析商品
                    // 解析商品
                    var productLis = htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[1]/div[7]/div[2]/div[6]/ul[1]/li");
                    foreach (var htmlNode in productLis)
                    {
                        var aNode = htmlNode.SelectSingleNode("a");
                        if (aNode != null)
                        {
                            // 商品名称
                            var name = aNode.Attributes["title"].Value;
                            // 商品链接
                            var href = aNode.Attributes["href"].Value;
                            // 图片
                            var imageNode = aNode.SelectSingleNode("img");
                            var picUrl    = string.Empty;
                            if (imageNode != null && imageNode.Attributes["src2"] != null && !string.IsNullOrEmpty(imageNode.Attributes["src2"].Value))
                            {
                                // 图片url
                                picUrl = imageNode.Attributes["src2"].Value;
                            }

                            // 评论
                            var commentNode = htmlNode.SelectSingleNode("div[1]/div[1]/p[1]/a[1]/i[1]");
                            int commentNum  = 0;
                            if (commentNode != null)
                            {
                                // 评论数目
                                int.TryParse(commentNode.InnerText, out commentNum);
                            }
                            if (!DataAccess.IsExistUrl(href))
                            {
                                DataAccess.InsertProduct(name, href, int.Parse(cid), commentNum, picUrl);
                            }
                        }
                    }
                }
            }
            return(true);
        }
示例#3
0
        public static void DangDangProductList(string categoryUrl)
        {
            //
            Uri uri = new Uri(categoryUrl);
            NameValueCollection nameValue = UrlHelper.GetQueryString(uri.Query);
            // 根据url中抽取分类
            string cid = nameValue["cat"];

            if (string.IsNullOrEmpty(cid))
            {
                return;
            }

            var hcFirst = new HttpClient(categoryUrl);

            hcFirst.Timeout = 30000;
            var htmlFirst    = hcFirst.Request();
            var htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(htmlFirst);

            // 查找第一页面
            var maxPageNode = htmlDocument.DocumentNode.SelectSingleNode("/html[1]/body[1]/div[2]/div[3]/div[5]/div[1]/div[2]/span[1]");

            if (maxPageNode == null || string.IsNullOrEmpty(maxPageNode.InnerText))
            {
                return;
            }
            var maxPageString = maxPageNode.InnerText.Substring("1&nbsp;/&nbsp;".Length);
            //		InnerText	"1&nbsp;/&nbsp;19"	string
            var maxPageNumber = 0;

            if (!int.TryParse(maxPageString, out maxPageNumber))
            {
                return;
            }
            //页面模板
            var tempUrl = "http://category.dangdang.com/all/?category_id={0}&page_index={1}";

            for (int j = 1; j <= maxPageNumber; j++)
            {
                if (j != 1)
                {
                    var hc = new HttpClient(string.Format(tempUrl, cid, j));
                    hc.Timeout = 30000;
                    var html = hc.Request();
                    if (!string.IsNullOrEmpty(html))
                    {
                        htmlDocument = HtmlAgilityPackHelper.GetHtmlDocument(html);
                    }
                }

                var productList =
                    htmlDocument.DocumentNode.SelectNodes("/html[1]/body[1]/div[2]/div[3]/div[7]/div");
                if (productList == null || !productList.Any())
                {
                    return;
                }
                int i = 0;
                foreach (HtmlNode htmlNode in productList)
                {
                    if (htmlNode.Attributes["class"] != null && htmlNode.Attributes["class"].Value.Contains("listitem"))
                    {
                        var a = htmlNode.SelectSingleNode("p[1]/a[1]");
                        // 商品页面
                        if (a != null && a.Attributes["href"] != null && !string.IsNullOrEmpty(a.Attributes["href"].Value))
                        {
                            //
                            // 商品链接
                            var productUrl = a.Attributes["href"].Value;
                            // 商品列表页面图片
                            var image           = a.SelectSingleNode("img[1]");
                            var productImageUrl = image.Attributes["src"].Value;

                            // 商品名称
                            var titleNode = htmlNode.SelectSingleNode("p[3]/a[1]");

                            // 评论数
                            var commentNode  = htmlNode.CssSelect("p.starlevel"); //htmlNode.SelectSingleNode("p[4]/span[1]/a[1]");
                            var commentCount = 0;
                            if (commentNode != null && !string.IsNullOrEmpty(commentNode.FirstOrDefault().InnerText))
                            {
                                var index = commentNode.FirstOrDefault().InnerText.IndexOf("条");
                                if (index != -1)
                                {
                                    var s = commentNode.FirstOrDefault().InnerText.Substring(1, index - 1);
                                    int.TryParse(s, out commentCount);
                                }
                            }

                            if (!DataAccess.IsExistUrl(productUrl))
                            {
                                DataAccess.InsertProduct(titleNode.InnerText, productUrl, int.Parse(cid), commentCount, productImageUrl);
                            }
                        }
                    }
                }
            }
        }