コード例 #1
0
        /// <summary>
        ///     解析出商品
        /// </summary>
        /// <param name="htmlSource">The HTML source.</param>
        /// <param name="listOnly">仅解析出列表,不解析价格等需要再次访问网络的内容.</param>
        /// <returns></returns>
        private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false)
        {
/*
 #if DEBUG
 *          htmlSource = "";
 *          var htmlSources = File.ReadAllLines(@"C:\Users\Administrator\Desktop\htmlSource.txt",System.Text.Encoding.UTF8);
 *          for (int i=0;i< htmlSources.Length;i++)
 *          {
 *              htmlSource += htmlSources[i];
 *          }
 *
 #endif
 */
            const string SkuIdKey   = "ProductSku";
            var          resultList = new List <IResut>();

            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(@"//ul/li");

            foreach (XPathNavigator item in iterator)
            {
                var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()");
                var href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href");
                if (string.IsNullOrEmpty(title))
                {
                    title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()");
                    href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href");
                }

                var imgSrc          = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original");
                var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=\.html)");
                var sku             = skuMatchResults.Success ? skuMatchResults.Value : string.Empty;

                if (string.IsNullOrEmpty(sku))
                {
                    continue;
                }

                // 评价数据
                var comments = ParseComments(item);

                IResut resut = new Resut();

                resut[SkuIdKey]          = sku;
                resut["ShopId"]          = ShopUrl;
                resut["ProductName"]     = title;
                resut["ProductUrl"]      = href;
                resut["ProductImage"]    = imgSrc;
                resut["ProductComments"] = comments;
                resultList.Add(resut);
            }

            if (!listOnly)
            {
                this.UpdateResultsPrices(resultList, SkuIdKey);
            }

            return(resultList.ToArray());
        }
コード例 #2
0
        /// <summary>
        /// 解析当前页的所有产品信息
        /// </summary>
        /// <param name="htmlSource"></param>
        /// <param name="listOnly"></param>
        /// <returns></returns>
        private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false)
        {
            const string SkuIdKey   = "ProductSku";
            var          resultList = new List <IResut>();

            // 返回xpath查询器
            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(@"//ul/li");

            foreach (XPathNavigator item in iterator)
            {
                var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()");
                var href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href");
                if (string.IsNullOrEmpty(title))
                {
                    title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()");
                    href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href");
                }



                //HtmlDocumentHelper.GetNodeValue(item,".//div[@class='jPic']//@original")

                var imgSrc = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original");
                if (imgSrc.Equals(string.Empty))
                {
                    imgSrc = HtmlDocumentHelper.GetNodeValue(htmlSource, ".//div[@class='jPic']//@src");
                }

                var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=.html)");
                var sku             = skuMatchResults.Success ? skuMatchResults.Value : string.Empty;

                if (string.IsNullOrEmpty(sku))
                {
                    continue;
                }

                // 评价数据
                var comments = ParseComments(item);

                IResut resut = new Resut();

                resut[SkuIdKey]          = sku;
                resut["ShopUrl"]         = this.ShopUrl;
                resut["ProductName"]     = title;
                resut["ProductUrl"]      = href;
                resut["ProductImage"]    = imgSrc;
                resut["ProductComments"] = comments;
                resultList.Add(resut);
            }

            if (!listOnly)
            {
                this.UpdateResultsPrices(resultList, SkuIdKey);
            }

            return(resultList.ToArray());
        }
コード例 #3
0
        /// <summary>
        ///     构造搜索  ajax url
        /// </summary>
        /// <param name="webContent">Content of the web.</param>
        /// <param name="renderStructure">The render structure.</param>
        /// <returns></returns>
        /// <exception cref="System.NotSupportedException">$无法从页面中解析出搜索参数</exception>
        private string BuildAjaxSearchUrl(string webContent, IDictionary <string, string> renderStructure)
        {
            /*webContent = File.ReadAllText(@"C:\Users\sinoX\Desktop\京东搜索页.html");*/
            var matchResults = Regex.Match(webContent, "(?<=var params = ){[^}]+}");

            if (!matchResults.Success)
            {
                throw new NotSupportedException("无法从页面中解析出搜索参数");
            }

            // {"appId":"435517","orderBy":"5","direction":"0","categoryId":"0","pageSize":"24","venderId":"1000004373","isGlobalSearch":"0","maxPrice":"0","pagePrototypeId":"17","pageNo":"1","shopId":"1000004373","minPrice":"0"}
            var jObject   = JObject.Parse(matchResults.Value);
            var navigator = HtmlDocumentHelper.CreateNavigator(webContent);


            System.Func <string, string> readJsonFunc = key => JsonHelper.TryReadJobjectValue(jObject, key, (string)null);
            System.Func <string, string> readHtmlFunc = key =>
            {
                string value;
                renderStructure.TryGetValue(key, out value);
                return(value);
            };
            System.Func <string, string> readInputFunc =
                key => HtmlDocumentHelper.GetNodeValue(navigator, $@"//input[@id='{key}']/@value");

            var collection = Url.CreateQueryCollection();

            collection[@"appId"]            = readJsonFunc("appId");
            collection[@"orderBy"]          = "5";
            collection[@"pageNo"]           = "1";
            collection[@"direction"]        = "1";
            collection[@"categoryId"]       = readJsonFunc(@"categoryId");
            collection[@"pageSize"]         = @"24";
            collection[@"pagePrototypeId"]  = readJsonFunc(@"pagePrototypeId");
            collection[@"pageInstanceId"]   = readHtmlFunc(@"m_render_pageInstance_id");
            collection[@"moduleInstanceId"] = readHtmlFunc(@"m_render_instance_id");
            collection[@"prototypeId"]      = readHtmlFunc(@"m_render_prototype_id");
            collection[@"templateId"]       = readHtmlFunc(@"m_render_template_id");
            collection[@"layoutInstanceId"] = readHtmlFunc(@"m_render_layout_instance_id");
            collection[@"origin"]           = readHtmlFunc(@"m_render_origin");
            collection[@"shopId"]           = readInputFunc(@"shop_id");
            collection[@"venderId"]         = readInputFunc(@"vender_id");

            /*collection[@"callback"] = @"jshop_module_render_callback";  // 不用这个直接返回一个 json 结构 */
            collection[@"_"] = $"{JsCodeHelper.GetDateTime()}";

            var baseUrl = renderStructure[@"m_render_is_search"] == "true"
                              ? @"http://module-jshop.jd.com/module/getModuleHtml.html"
                              : @"http://mall.jd.com/view/getModuleHtml.html";

            return(Url.CombinUrl(baseUrl, collection));
        }
コード例 #4
0
        /// <summary>
        /// 处理特例
        /// </summary>
        /// <param name="shopUrl"></param>
        /// <returns></returns>
        private string GetSpecialSearchPageContent(string shopUrl)
        {
            var webContent = this.GetWebContent(shopUrl);

            var pageAppId = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='pageInstance_appId']/@value");
            var vender_id = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='vender_id']/@value");
            var shop_id   = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='shop_id']/@value");

            var searchUrl = $"http://mall.jd.com/advance_search-{pageAppId}-{vender_id}-{shop_id}-5-0-0-1-1-24.html";

            this.CurrentUrl = searchUrl;

            return(this.GetWebContent(searchUrl));
        }
コード例 #5
0
        private string BuildAjaxSearchUrl(string webContent, IDictionary <string, string> renderStructure)
        {
            var matchResults = Regex.Match(webContent, "(?<=var params = ){[^}]+}");

            if (!matchResults.Success)
            {
                throw new NotSupportedException("无法从页面中解析出搜索参数");
            }

            var jObject   = JObject.Parse(matchResults.Value);
            var navigator = HtmlDocumentHelper.CreateNavigator(webContent);

            System.Func <string, string> readJsonFunction = key => JsonHelper.TryReadJobjectValue(jObject, key, (string)null);

            System.Func <string, string> readHtmlFunc = key =>
            {
                string value;
                renderStructure.TryGetValue(key, out value);
                return(value);
            };

            System.Func <string, string> readInputFunc = key =>
                                                         HtmlDocumentHelper.GetNodeValue(navigator, $@"//input[@id='{key}']/@value");

            var collection = Url.CreateQueryCollection();

            collection[@"appId"]            = readJsonFunction("appId");
            collection[@"orderBy"]          = "5";
            collection[@"pageNo"]           = "1";
            collection[@"direction"]        = "1";
            collection[@"categoryId"]       = readJsonFunction(@"categoryId");
            collection[@"pageSize"]         = @"24";
            collection[@"pagePrototypeId"]  = readJsonFunction(@"pagePrototypeId");
            collection[@"pageInstanceId"]   = readHtmlFunc(@"m_render_pageInstance_id");
            collection[@"moduleInstanceId"] = readHtmlFunc("m_render_instance_id");
            collection[@"prototypeId"]      = readHtmlFunc(@"m_render_prototype_id");
            collection[@"templateId"]       = readHtmlFunc(@"m_render_template_id");
            collection[@"layoutInstanceId"] = readHtmlFunc(@"m_render_layout_instance_id");
            collection[@"origin"]           = readHtmlFunc(@"m_render_origin");
            collection[@"shopId"]           = readInputFunc(@"m_render_origin");
            collection[@"verderId"]         = readInputFunc(@"vender_id");

            collection[@"_"] = $"{JsCodeHelper.GetDateTime()}";

            var baseUrl = renderStructure[@"m_render_is_search"] == "true"
                            ? @"http://module-jshop.jd.com/module/getModuleHtml.html"
                            : @"http://mall.jd.com/view/getModuleHtml.html";

            return(Url.CombinUrl(baseUrl, collection));
        }
コード例 #6
0
        /// <summary>
        ///解析评论人数以及评价
        /// </summary>
        /// <param name="item"></param>
        /// <returns></returns>
        private static string ParseComments(XPathNavigator item)
        {
            var matchResults = Regex.Match(item.Value, @"\d+(?= *人评价)");

            if (matchResults.Success)
            {
                return(matchResults.Value);
            }

            // 这里还没有找到例子
            var commentsNode = HtmlDocumentHelper.GetNodeValue(item, ".//span[@class='evaluate']");

            matchResults = Regex.Match(commentsNode, @"(?<=\()\d+(?=\))");
            if (matchResults.Success)
            {
                return(matchResults.Value);
            }

            return("-1");
        }
コード例 #7
0
        /// <summary>
        /// 返回搜索页面的内容
        /// </summary>
        /// <param name="shopUrl"></param>
        /// <returns></returns>
        private string GetSearchPageContent(string shopUrl)
        {
            var webContent = this.GetWebContent(shopUrl);
            // 取出<input type="hidden" value="504028" id="pageInstance_appId"/>中的value
            var pageAppId = HtmlDocumentHelper.GetNodeValue(webContent, @"//input[@id='pageInstance_appId']/@value");

            /*
             * // view_search-店铺页面编号-0-排序类型-排序方向-每页条数-页码.html
             *  排序类型: 5 销量 4 价格 3 收藏  2 时间
             *  每页条数: 最大 24
             *  排序方向: 1 从大到时小  0 从小到大
             *  页码: 从 1 开始
             *  查找pageInstance_appId找到value的值
             *  http://mall.jd.com//view_search-337310-0-5-0-24-5.html
             */


            var searchUrl = $"http://mall.jd.com/view_search-{pageAppId}-0-5-0-24-1.html";


            return(this.GetWebContent(searchUrl));
        }
コード例 #8
0
 /// <summary>
 /// ReadInputFunc
 /// </summary>
 /// <param name="navigator"></param>
 /// <param name="key"></param>
 /// <returns></returns>
 public string ReadInputFunc(XPathNavigator navigator, string key)
 {
     return(HtmlDocumentHelper.GetNodeValue(navigator, $@"//input[@id='{key}']/@value"));
 }