/// <summary>
        /// 解析当前元素
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();
            //var cookies = string.Empty;
            //var htmlString = base.GetMainWebContent(CurrentUrl, null,ref cookies, null);


            var judgementsTitle       = GetJudgementsTitle(HtmlSource);
            var judgementsPubDate     = GetJudgementsPubDate(HtmlSource);
            var judgementsContent     = GetJudgementsContent(HtmlSource);
            var judgementsContentHtml = GetJudgementsContentHtml(HtmlSource);

            var resut = new Resut
            {
                ["DocId"]       = Regex.Match(CurrentUrl, @"(?<==).*").Value,
                ["Url"]         = CurrentUrl,
                ["Title"]       = judgementsTitle,
                ["PubDate"]     = judgementsPubDate,
                ["Content"]     = judgementsContent,
                ["ContentHtml"] = judgementsContentHtml
            };


            resultList.Add(resut);



            return(resultList.ToArray());
        }
Пример #2
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();
            var ListDic    = GetAllResultListDic();
            var checkTime  = System.DateTime.Now;

            foreach (var dic in ListDic)
            {
                IResut resut = new Resut();

                resut["Name"]       = _name;
                resut["Identifier"] = _identifier;
                resut["UserName"]   = dic["UserName"];
                resut["CaseTime"]   = dic["CaseTime"];
                resut["CaseId"]     = dic["CaseId"];
                resut["Id"]         = dic["Id"];
                resut["Type"]       = dic["Type"];
                resut["State"]      = dic["State"];
                resut["View"]       = string.Empty;
                resut["CheckTime"]  = checkTime;


                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var jObject = JObject.Parse(GetPageConfig(HtmlSource));
            var dicList = GetInfoDicList(jObject);

            var resultList = new List <IResut>();

            foreach (var dic in dicList)
            {
                IResut resut = new Resut()
                {
                    { "searchKeyword", dic["searchKeyword"] },
                    { "productId", dic["productId"] },
                    { "productName", dic["productName"] },
                    { "productPrice", dic["productPrice"] },
                    { "PaymentAcount", dic["PaymentAcount"] },
                    { "productType", dic["productType"] },
                    { "shopName", dic["shopName"] },
                    { "userMemberId", dic["userMemberId"] },
                    { "location", dic["location"] },
                    { "productPosition", dic["productPosition"] },
                    { "positionType", dic["positionType"] },
                    { "pageIndex", dic["pageIndex"] }
                };

                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();
            //var html = base.GetWebContent("https://feedback.aliexpress.com/display/evaluationList.htm",Encoding.UTF8.GetBytes(CurrentUrl),ref _cookies,null);
            var nameVipLevelList    = GetNameVipLevelList(HtmlSource);
            var productNameList     = GetProductNameList(HtmlSource);
            var totalPriceList      = GetTotalPriceList(HtmlSource);
            var feedBackDateList    = GetFeedBackDateList(HtmlSource);
            var feedBackContentList = GetFeedBackContentList(HtmlSource);
            var starMList           = GetStarMList(HtmlSource);
            var productIdList       = GetProductId(HtmlSource);

            var length = nameVipLevelList.Count;

            for (var i = 0; i < length; i++)
            {
                IResut resut = new Resut()
                {
                    ["shopId"]          = _shopId,
                    ["nameVipLevel"]    = nameVipLevelList[i],
                    ["productName"]     = productNameList[i],
                    ["totalPrice"]      = totalPriceList[i],
                    ["feedBackDate"]    = feedBackDateList[i],
                    ["feedBackContent"] = feedBackContentList[i],
                    ["starM"]           = starMList[i],
                    ["productId"]       = productIdList[i]
                };

                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #5
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();
            var itemList   = GetItemList();

            foreach (var item in itemList)
            {
                var dic = GetInformationByJToken(item);


                IResut resut = new Resut()
                {
                    { "imageUrl", dic["imageUrl"] },
                    { "title", dic["title"] },
                    { "price", dic["price"] },
                    { "curPrice", dic["curPrice"] },
                    { "vipPrice", dic["vipPrice"] },
                    { "tradeNum", dic["tradeNum"] },
                    { "nick", _nick },
                    { "sellerId", dic["sellerId"] },
                    { "itemId", dic["itemId"] },
                    { "loc", dic["loc"] },
                    { "storeLink", dic["storeLink"] },
                    { "href", dic["href"] },
                    { "commend", dic["commend"] },
                    { "commendHref", dic["commendHref"] }
                };

                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
        /// <summary>
        /// 解析当前元素
        /// </summary>
        /// <param name="htmlSource"></param>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems(string htmlSource)
        {
            var resultList = new List <IResut>();
            var docIds     = GetDocId(htmlSource);
            var pubDate    = GetOneDate(htmlSource);

            Console.WriteLine(pubDate);
            if (DateTime.Compare(_date, DateTime.Parse(pubDate)) < 0)
            {
                foreach (var docId in docIds)
                {
                    IResut resut = new Resut();



                    resut["DocId"]       = docId;
                    resut["CaseType"]    = base._pageInfo.CaseType;
                    resut["ReasonLevel"] = base._pageInfo.ReasonLevel;
                    resut["ReasonKey"]   = base._pageInfo.ReasonKey;
                    resut["CourtLevel"]  = base._pageInfo.CourtLevel;
                    resut["CourtKey"]    = base._pageInfo.CourtKey;
                    resut["Year"]        = base._pageInfo.Year;

                    resultList.Add(resut);
                }
            }
            else
            {
                _done = 1;
            }



            return(resultList.ToArray());
        }
Пример #7
0
        /// <summary>
        ///     解析出商品
        /// </summary>
        /// <param name="htmlSource">The HTML source.</param>
        /// <param name="listOnly">仅解析出列表,不解析价格等需要再次访问网络的内容.</param>
        /// <returns></returns>
        private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false)
        {
/*
 #if DEBUG
 *          htmlSource = "";
 *          var htmlSources = File.ReadAllLines(@"C:\Users\Administrator\Desktop\htmlSource.txt",System.Text.Encoding.UTF8);
 *          for (int i=0;i< htmlSources.Length;i++)
 *          {
 *              htmlSource += htmlSources[i];
 *          }
 *
 #endif
 */
            const string SkuIdKey   = "ProductSku";
            var          resultList = new List <IResut>();

            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(@"//ul/li");

            foreach (XPathNavigator item in iterator)
            {
                var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()");
                var href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href");
                if (string.IsNullOrEmpty(title))
                {
                    title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()");
                    href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href");
                }

                var imgSrc          = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original");
                var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=\.html)");
                var sku             = skuMatchResults.Success ? skuMatchResults.Value : string.Empty;

                if (string.IsNullOrEmpty(sku))
                {
                    continue;
                }

                // 评价数据
                var comments = ParseComments(item);

                IResut resut = new Resut();

                resut[SkuIdKey]          = sku;
                resut["ShopId"]          = ShopUrl;
                resut["ProductName"]     = title;
                resut["ProductUrl"]      = href;
                resut["ProductImage"]    = imgSrc;
                resut["ProductComments"] = comments;
                resultList.Add(resut);
            }

            if (!listOnly)
            {
                this.UpdateResultsPrices(resultList, SkuIdKey);
            }

            return(resultList.ToArray());
        }
Пример #8
0
        /// <summary>
        /// 解析当前页的所有产品信息
        /// </summary>
        /// <param name="htmlSource"></param>
        /// <param name="listOnly"></param>
        /// <returns></returns>
        private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false)
        {
            const string SkuIdKey   = "ProductSku";
            var          resultList = new List <IResut>();

            // 返回xpath查询器
            var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource);
            var iterator  = navigator.Select(@"//ul/li");

            foreach (XPathNavigator item in iterator)
            {
                var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()");
                var href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href");
                if (string.IsNullOrEmpty(title))
                {
                    title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()");
                    href  = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href");
                }



                //HtmlDocumentHelper.GetNodeValue(item,".//div[@class='jPic']//@original")

                var imgSrc = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original");
                if (imgSrc.Equals(string.Empty))
                {
                    imgSrc = HtmlDocumentHelper.GetNodeValue(htmlSource, ".//div[@class='jPic']//@src");
                }

                var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=.html)");
                var sku             = skuMatchResults.Success ? skuMatchResults.Value : string.Empty;

                if (string.IsNullOrEmpty(sku))
                {
                    continue;
                }

                // 评价数据
                var comments = ParseComments(item);

                IResut resut = new Resut();

                resut[SkuIdKey]          = sku;
                resut["ShopUrl"]         = this.ShopUrl;
                resut["ProductName"]     = title;
                resut["ProductUrl"]      = href;
                resut["ProductImage"]    = imgSrc;
                resut["ProductComments"] = comments;
                resultList.Add(resut);
            }

            if (!listOnly)
            {
                this.UpdateResultsPrices(resultList, SkuIdKey);
            }

            return(resultList.ToArray());
        }
Пример #9
0
        /// <summary>
        /// 开始解析
        /// </summary>
        /// <param name="webContent">Content of the web.</param>
        /// <returns></returns>
        public IResut Parse(string webContent)
        {
            IResut resut     = new Resut();
            var    navigator = HtmlDocumentHelper.CreateNavigator(webContent);

            ParseShopScoreResult(resut, navigator);

            throw new NotImplementedException();
        }
Пример #10
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();

            var partHtml = Regex.Match(HtmlSource, @"<div id=""normal_dealbox""[\s\S]*?(?=<div class= ""page_div clear area page_bottom"">)").Value;
            var productNameCollection = Regex.Matches(partHtml, "(?<=title=\").*?(?=\")");
            var urlCollection         = Regex.Matches(partHtml, @"(?<=<h3>\s*<a target=""_blank"" href="")[\S]*?(?="")");
            var priceCollection       = Regex.Matches(partHtml, "(?<=<em><b>¥</b>).*?(?=</em>)");
            var maxPriceCollection    = Regex.Matches(partHtml, "(?<=<del class=\"list_price\">¥).*?(?=</del>)");
            var count = productNameCollection.Count;

            if (count != urlCollection.Count || count != priceCollection.Count || count != maxPriceCollection.Count)
            {
                throw new Exception("开始的条数不匹配");
            }
            for (var i = 0; i < count; i++)
            {
                var resut = new Resut
                {
                    ["ProductName"] = productNameCollection[i].ToString(),
                    ["Url"]         = urlCollection[i].ToString(),
                    //促销价格
                    ["Price"] = priceCollection[i].ToString(),
                    //最大价格
                    ["MaxPrice"] = maxPriceCollection[i].ToString()
                };
                resultList.Add(resut);
            }


            var jsonString = Regex.Match(HtmlSource, "(?<=window.setDeals = ){.*}(?=;)").Value;
            //.Replace(@"""\",@"\").Replace(@"}""",@"}").Replace(@"""{","{");
            var jObject = JObject.Parse(jsonString);
            var jArray  = JArray.Parse(jObject["deals"].ToString());

            foreach (var jToken in jArray)
            {
                var urlName     = jToken["url_name"].ToString();
                var id          = jToken["id"].ToString();
                var title       = jToken["title"].ToString();
                var wuxianPrice = jToken["wuxian_price"].ToString();
                var listPrice   = jToken["list_price"].ToString();
                var resut       = new Resut
                {
                    ["ProductName"] = title,
                    ["Url"]         = $"//out.zhe800.com/ju/deal/{urlName}_{id}",
                    //促销价格
                    ["Price"] = wuxianPrice,
                    //最大价格
                    ["MaxPrice"] = listPrice
                };
                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #11
0
        /// <summary>
        /// 解析当前元素
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var licenceDic = GetLicenceDic(HtmlSource);

            var resut = new Resut();

            foreach (var licence in licenceDic)
            {
                resut.Add(licence.Key, licence.Value);
            }
            resut.Add("Url", CurrentUrl);
            return(new IResut[] { resut });
        }
Пример #12
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();
            var itemList   = GetItemList(CurrentUrl);

            var stringEmpty   = string.Empty;
            var SearchKeyword = _q;
            var ShopName      = stringEmpty;
            var UserMemberId  = stringEmpty;


            var index = 1;

            foreach (var item in itemList)
            {
                var dic = GetInformationByJToken(item);


                IResut resut = new Resut()
                {
                    { "SearchKeyword", SearchKeyword },
                    { "ProductId", dic["ProductId"] },
                    { "PositionType", dic["PositionType"] },
                    { "PageIndex", CurrentPage },
                    { "ProductPosition", index.ToString() }
                };

                //IResut resut = new Resut()
                //{
                //    {"SearchKeyword", SearchKeyword},
                //    {"ProductId", dic["ProductId"] },
                //    {"ProductName", dic["ProductName"]},
                //    {"ProductPrice", dic["ProductPrice"]},
                //    {"PaymentAcount", dic["PaymentAcount"]},
                //    {"ProductType", dic["ProductType"]},
                //    {"ShopName", ShopName},
                //    {"UserMemberId",UserMemberId },
                //    {"Location",dic["Location"]},
                //    {"PositionType",dic["PositionType"] },
                //    {"PageIndex",CurrentPage },
                //    {"ProductPosition",index.ToString() }

                //};

                resultList.Add(resut);
                index++;
            }


            return(resultList.ToArray());
        }
Пример #13
0
        /// <summary>
        ///     将一个 jobject 值转为 result 值
        /// </summary>
        /// <param name="jObject">The j object.</param>
        /// <returns></returns>
        private IResut ConvertToResult(JObject jObject)
        {
            IResut resut = new Resut();

            var properties = jObject.Properties();

            foreach (var property in properties)
            {
                resut[property.Name] = property.Value?.Value <string>() ?? string.Empty;

                // jObject[property].Value<string>();
            }

            return(resut);
        }
Пример #14
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var jObject = JObject.Parse(GetPageConfig(HtmlSource));
            var dicList = GetInfoDicList(jObject);

            var resultList = new List <IResut>();

            foreach (var dic in dicList)
            {
                IResut resut = new Resut()
                {
                    ["ShopId"]                    = dic["ShopId"],
                    ["ShopName"]                  = dic["ShopName"],
                    ["ShopUrl"]                   = dic["ShopUrl"],
                    ["ShopLogoUrl"]               = dic["ShopLogoUrl"],
                    ["MarketName"]                = dic["MarketName"],
                    ["ShopRank"]                  = dic["ShopRank"],
                    ["Location"]                  = dic["Location"],
                    ["InCountry"]                 = dic["InCountry"],
                    ["InProvince"]                = dic["InProvince"],
                    ["InCity"]                    = dic["InCity"],
                    ["BossNickName"]              = dic["BossNickName"],
                    ["EncryptedUserId"]           = dic["EncryptedUserId"],
                    ["MainBiz"]                   = dic["MainBiz"],
                    ["MainIndustry"]              = dic["MainIndustry"],
                    ["SaleCount"]                 = dic["SaleCount"],
                    ["ProductCount"]              = dic["ProductCount"],
                    ["GoodCommentCount"]          = dic["GoodCommentCount"],
                    ["GoodCommentRate"]           = dic["GoodCommentRate"],
                    ["Comment_MatchDescrip"]      = dic["Comment_MatchDescrip"],
                    ["Comment_MatchDescripRate"]  = dic["Comment_MatchDescripRate"],
                    ["Comment_ServiceStatue"]     = dic["Comment_ServiceStatue"],
                    ["Comment_ServiceStatueRate"] = dic["Comment_ServiceStatueRate"],
                    ["Comment_ShipSpeed"]         = dic["Comment_ShipSpeed"],
                    ["Comment_ShipSpeedRate"]     = dic["Comment_ShipSpeedRate"],
                    ["Attribute_BuyProtect"]      = dic["Attribute_BuyProtect"],
                    ["Attribute_GlobalBuy"]       = dic["Attribute_GlobalBuy"],
                    ["Attribute_GoldenSale"]      = dic["Attribute_GoldenSale"],
                    ["SearchKeyword"]             = dic["SearchKeyword"]
                };

                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #15
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            List <IResut>      resultList         = new List <IResut>();
            HtmlNode           htmlNode           = HtmlAgilityPackHelper.GetDocumentNodeByHtml(HtmlSource);
            HtmlNodeCollection htmlNodeCollection = htmlNode.SelectNodes("//td[@class='Font9']");

            foreach (HtmlNode node in htmlNodeCollection)
            {
                string url            = node.SelectSingleNode("./a[@class='five']")?.Attributes["href"]?.Value;
                string dateTimeString = Regex.Match(node.InnerText, @"\d+-\d+-\d+").Value;
                if (string.IsNullOrEmpty(url) || string.IsNullOrEmpty(dateTimeString))
                {
                    break;
                }
                url = $"http://www.ccgp-shandong.gov.cn{url}";
                DateTime dateTime = Convert.ToDateTime(dateTimeString);
                int      days     = (DateTime.Now - dateTime).Days;
                if (days > _gatherDays)
                {
                    _urlQueue.Clear();
                    break;
                }
                string   html        = _httpHelper.GetHtmlByGet(url);
                HtmlNode htmlNode2   = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);
                string   title       = htmlNode2.SelectSingleNode("//div[@align='center']")?.InnerText;
                string   publisher   = Regex.Match(html, "(?<=发布人[::]).*(?=</td>)").Value;
                string   publishTime = Regex.Match(html, "(?<=发布时间[::]).*(?=</td>)").Value;
                publishTime = Convert.ToDateTime(publishTime).ToString(CultureInfo.CurrentCulture);
                //string content = htmlNode2.SelectSingleNode("//td[@bgcolor='#FFFFFF' and @align='center' and not(@valign)]").InnerText.Trim();
                //content = HttpUtility.HtmlDecode(Regex.Match(content, @".*(?=\r\n)").Value);
                string content = htmlNode2.SelectSingleNode("//table//tr[2]/td[2]/table").OuterHtml;

                Resut resut = new Resut()
                {
                    ["url"]         = url,
                    ["title"]       = title,
                    ["content"]     = content,
                    ["publisher"]   = publisher,
                    ["publishTime"] = publishTime
                };

                resultList.Add(resut);
            }
            return(resultList.ToArray());
        }
Пример #16
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();

            var basic   = GetBasicInfo(_orgId);
            var manager = GetManagerInfo(_orgId);
            var equity  = GetEquityHtml(_orgId);

            IResut resut = new Resut()
            {
                { "keyWord", _keyWord },
                { "basic", basic },
                { "manage", manager },
                { "equity", equity }
            };

            resultList.Add(resut);

            return(resultList.ToArray());
        }
Пример #17
0
        /// <summary>
        /// 解析当前元素
        /// </summary>
        /// <param name="htmlSource"></param>
        /// <returns></returns>
        protected virtual IResut[] ParseCurrentItems(string htmlSource)
        {
            var resultList = new List <IResut>();
            var docIds     = GetDocId(htmlSource);

            foreach (var docId in docIds)
            {
                IResut resut = new Resut();

                resut["DocId"]       = docId;
                resut["CaseType"]    = _pageInfo.CaseType;
                resut["ReasonLevel"] = _pageInfo.ReasonLevel;
                resut["ReasonKey"]   = _pageInfo.ReasonKey;
                resut["CourtLevel"]  = _pageInfo.CourtLevel;
                resut["CourtKey"]    = _pageInfo.CourtKey;
                resut["Year"]        = _pageInfo.Year;

                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #18
0
        /// <summary>
        /// 解析出当前值
        /// </summary>
        /// <returns>IResut[].</returns>
        /// <exception cref="System.NotImplementedException"></exception>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList   = new List <IResut>();
            var itemList     = GetItemList(CurrentUrl);
            var activityType = _acvivityEnum.GetType();



            foreach (var item in itemList)
            {
                var dic = GetInformationByJson(item);



                IResut resut = new Resut()
                {
                    { "itemId", dic["itemId"] },
                    { "title", dic["title"] },
                    { "reservePrice", dic["reservePrice"] },
                    { "discountPrice", dic["discountPrice"] },
                    { "discount", dic["discount"] },
                    { "currentSellOut", dic["currentSellOut"] },
                    { "quantity", dic["quantity"] },
                    { "currentQuantity", dic["currentQuantity"] },
                    { "activityStartTime", dic["activityStartTime"] },
                    { "activityEndTime", dic["activityEndTime"] },
                    { "shopId", dic["shopId"] },
                    { "shopName", dic["shopName"] },
                    { "type", _acvivityEnum.ToString() }
                };



                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #19
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList   = new List <IResut>();
            var itemList     = GetItemList(CurrentUrl);
            var activityType = _acvivityEnum.GetType();



            foreach (var item in itemList)
            {
                var dic = GetInformationByJson(item);


                IResut resut = new Resut()
                {
                    { "title", dic["title"] },
                    { "description", dic["description"] },
                    { "itemId", dic["itemId"] },
                    { "ostimeText", dic["ostimeText"] },
                    { "leftTime", dic["leftTime"] },
                    { "totalStock", dic["totalStock"] },
                    { "soldAmount", dic["soldAmount"] },
                    { "itemUrl", dic["itemUrl"] },
                    { "actPrice", dic["actPrice"] },
                    { "origPrice", dic["origPrice"] },
                    { "discount", dic["discount"] },
                    { "type", _acvivityEnum.ToString() }
                };



                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #20
0
        /// <summary>
        /// 解析当前的元素
        /// </summary>
        /// <returns></returns>
        protected IResut[] ParseCurrentItems(string htmlSource)
        {
            // efea2774-b647-11e3-84e9-5cf3fc0c2c18 eff7f53c-b647-11e3-84e9-5cf3fc0c2c18 f096e352-b647-11e3-84e9-5cf3fc0c2c18 f06ab91c-b647-11e3-84e9-5cf3fc0c2c18 f0750746-b647-11e3-84e9-5cf3fc0c2c18
            // http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID=efea2774-b647-11e3-84e9-5cf3fc0c2c18 这里找内容

            var resultList = new List <IResut>();
            var docIds     = GetDocId(htmlSource);

            foreach (var docId in docIds)
            {
                var cookies = string.Empty;
                var url     = $"http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID={docId}";
                //var htmlString = GetHtmlFromGet(url,Encoding.UTF8);
                var htmlString        = base.GetWebContent(url);
                var judgementsTitle   = GetJudgementsTitle(htmlString);
                var judgementsPubDate = GetJudgementsPubDate(htmlString);
                var judgementContent  = GetJudgementsContent(htmlString);

                IResut resut = new Resut();


                resut["DocId"]   = docId;
                resut["Url"]     = url;
                resut["Reason"]  = _pageInfo.CaseType;
                resut["Court"]   = _pageInfo.Area;
                resut["Year"]    = _pageInfo.Year;
                resut["Title"]   = judgementsTitle;
                resut["PubDate"] = judgementsPubDate;
                resut["Content"] = judgementContent;


                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
Пример #21
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList         = new List <IResut>();
            var html               = _httpHelper.GetHtmlByGet(CurrentUrl);
            var documentNode       = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);
            var htmlNodeCollection = documentNode.SelectNodes("//div[@class='ui-box-body']//li[@class='item']");

            if (htmlNodeCollection == null)
            {
                return(resultList.ToArray());
            }

            foreach (var htmlNode in htmlNodeCollection)
            {
                IResut resut       = new Resut();
                var    productName = HttpUtility.HtmlDecode(htmlNode.SelectSingleNode(".//div[@class='detail']//a")?.InnerText ?? string.Empty);
                var    productUrl  = htmlNode.SelectSingleNode(".//div[@class='detail']//a")?.Attributes["href"].Value ?? string.Empty;
                var    productId   = Regex.Match(Regex.Match(productUrl, @"\d+_\d+").Value, @"(?<=_)\d+").Value;
                var    price       = Regex.Match(htmlNode.SelectSingleNode(".//b")?.InnerText ?? string.Empty, @"\d+\.?\d+").Value;
                var    priceOld    = Regex.Match(htmlNode.SelectSingleNode(".//del")?.InnerText ?? string.Empty, @"\d+\.?\d+").Value;
                var    orderNum    = Regex.Match(htmlNode.SelectSingleNode(".//div[@class='recent-order']")?.InnerText ?? string.Empty, @"\d+").Value;
                resut.Add("shopId", _shopId);
                resut.Add("productName", productName);
                resut.Add("productUrl", $"https:{productUrl}");
                resut.Add("productId", productId);
                resut.Add("price", FormatNumber(price));
                resut.Add("priceOld", FormatNumber(priceOld));
                resut.Add("orderNum", FormatNumber(orderNum));


                resultList.Add(resut);
            }


            return(resultList.ToArray());
        }
Пример #22
0
        /// <summary>
        /// 解析当前元素
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var    stringEmpty               = string.Empty;
            var    errorNotice               = stringEmpty;
            var    bossNickName              = stringEmpty;
            string goodCommentCount          = null;
            var    mainIndustry              = stringEmpty;
            var    Comment_MatchDescrip      = stringEmpty;
            var    Comment_MatchDescripRate  = stringEmpty;
            var    Comment_ServiceStatue     = stringEmpty;
            var    Comment_ServiceStatueRate = stringEmpty;
            var    Comment_ShipSpeed         = stringEmpty;
            var    Comment_ShipSpeedRate     = stringEmpty;
            var    marginCharge              = stringEmpty;
            var    shopRank        = stringEmpty;
            var    location        = stringEmpty;
            var    saleCount       = stringEmpty;
            var    productCount    = stringEmpty;
            var    goodCommentRate = stringEmpty;
            var    mainBiz         = stringEmpty;
            var    shopAge         = "0";
            var    intDefault      = 0;
            var    dateDefault     = DateTime.Parse("1990-01-01 00:00:00");
            var    shopStartDate   = dateDefault;

            var resultList = new List <IResut>();
            var shopId     = GetShopId(HtmlSource);
            var userId     = GetUserId(HtmlSource);
            var shopName   = GetShopName(HtmlSource);

            _shopType = GetShopType(HtmlSource);
            if (shopName.Equals(stringEmpty) || shopName.Equals("店铺浏览"))
            {
                errorNotice = GetErrorNotice(HtmlSource);
                if (errorNotice.Equals(stringEmpty))
                {
                    errorNotice = "不支持的店铺类型";
                }
            }
            else if (shopName.Equals("旺铺关闭页面"))
            {
                errorNotice = "店铺不符合开通条件";
            }
            else if (shopName.Contains("页面找不到了"))
            {
                errorNotice = "页面找不到了";
            }
            else
            {
                marginCharge  = GetMarginCharge(HtmlSource);
                shopAge       = GetShopAge(HtmlSource, _shopType);
                shopStartDate = GetShopStartDate(HtmlSource, _shopType);
                //var shopKeeperTest = GetShopKeeper(HtmlSource, _shopType);
                //把shopname编码成url中能够识别的编码 不然在url里#这些特殊字符会出错
                var shopNameEncoding = System.Web.HttpUtility.UrlEncode(shopName);
                var url        = $"https://shopsearch.taobao.com/search?app=shopsearch&q={shopNameEncoding}";
                var htmlString = GetWebContent(url);
                //httpHelper.Cookies = "thw=cn;";
                //var htmlString = _httpHelper.GetHtmlByGet(url);
                //用userId匹配符合的那段 用shopId也可以
                var tempToken = GetContentJsonStringByUserId(htmlString, userId);
                //var tempTokenString = tempToken.ToString();


                if (tempToken == null)
                {
                    var shopKeeper = GetShopKeeper(HtmlSource, _shopType);
                    url =
                        $"https://shopsearch.taobao.com/search?app=shopsearch&q={System.Web.HttpUtility.UrlEncode(shopKeeper)}";
                    htmlString = GetWebContent(url);
                    tempToken  = GetContentJsonStringByUserId(htmlString, userId);
                }



                if (tempToken != null)
                {
                    var tempString = tempToken.ToString();
                    bossNickName              = tempToken["nick"]?.ToString();
                    goodCommentCount          = GetGoodCommentCount(tempString);
                    mainIndustry              = GetMainIndustry(tempString);
                    Comment_MatchDescrip      = GetComment_MatchDescrip(tempString);
                    Comment_MatchDescripRate  = GetComment_MatchDescripRate(tempString);
                    Comment_ServiceStatue     = GetComment_ServiceStatue(tempString);
                    Comment_ServiceStatueRate = GetComment_ServiceStatueRate(tempString);
                    Comment_ShipSpeed         = GetComment_ShipSpeed(tempString);
                    Comment_ShipSpeedRate     = GetComment_ShipSpeedRate(tempString);
                    shopRank        = tempToken["shopIcon"]?["iconClass"]?.ToString();
                    location        = tempToken["provcity"]?.ToString();
                    saleCount       = tempToken["totalsold"]?.ToString();
                    productCount    = tempToken["procnt"]?.ToString();
                    goodCommentRate = tempToken["goodratePercent"]?.ToString().Replace("%", "");
                    mainBiz         = tempToken["mainAuction"]?.ToString();
                }
                else
                {
                    errorNotice = "店铺存在但搜索不到";
                }
            }

            if (!string.IsNullOrEmpty(errorNotice))
            {
                _shopType = "0";
            }



            //System.Func<string, string> GetIntDefault = key => { return string.IsNullOrEmpty(key) ? "0" : key; };


            Resut resut = new Resut
            {
                //店铺ID
                ["ShopId"] = shopId,
                //店铺名
                ["ShopName"] = shopName,
                //店铺名2
                ["ShopName2"] = shopName,
                //旺旺号
                ["BossName"] = bossNickName,
                //旺旺号的昵称
                ["BossNickName"] = bossNickName,
                //公司名称
                ["CompanyName"] = stringEmpty,
                //开店时间
                ["ShopStartDate"] = shopStartDate,
                //ShpAgeNum
                ["ShpAgeNum"] = shopAge,
                //采集入口参数
                ["ShopUrl"] = GetUrlFormat(CurrentUrl),
                //好评数
                ["GoodCommentCount"] = goodCommentCount,
                //主营行业
                ["MainIndustry"] = mainIndustry,
                //描述相符
                ["Comment_MatchDescrip"] = GetIntDefault(Comment_MatchDescrip),
                //描述相符率
                ["Comment_MatchDescripRate"] = GetIntDefault(Comment_MatchDescripRate),
                //服务态度
                ["Comment_ServiceStatue"] = GetIntDefault(Comment_ServiceStatue),
                //服务态度率
                ["Comment_ServiceStatueRate"] = GetIntDefault(Comment_ServiceStatueRate),
                //物流服务
                ["Comment_ShipSpeed"] = GetIntDefault(Comment_ShipSpeed),
                //物流服务率
                ["Comment_ShipSpeedRate"] = GetIntDefault(Comment_ShipSpeedRate),
                //保证金
                ["MarginCharge"] = GetIntDefault(marginCharge),
                //店铺等级
                ["ShopRank"] = shopRank,
                //所在位置
                ["Location"] = location,
                //销售数量
                ["SaleCount"] = GetIntDefault(saleCount),
                //产品数量
                ["ProductCount"] = GetIntDefault(productCount),
                //好评率
                ["GoodCommentRate"] = GetIntDefault(goodCommentRate),
                //主营产品
                ["MainBiz"] = mainBiz,
                //店铺类型
                ["DayMonitor"] = _shopType,
                ["Loaned"]     = intDefault,
                ["targetuid"]  = _targetUid,
                //当前店铺状态
                ["Error_Notice"] = errorNotice
            };

            resultList.Add(resut);
            return(resultList.ToArray());
        }
Пример #23
0
        /// <summary>
        /// 解析当前项目
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();

            var shopId   = GetShopId(HtmlSource);
            var shopName = GetShopName(HtmlSource);

            if (shopName.Equals("1688.com,阿里巴巴打造的全球最大的采购批发平台"))
            {
                SendLog("发现被屏蔽,暂停30s");
                Thread.Sleep(30000);
                throw new Exception("被屏蔽了!");
            }
            var shopName2    = GetShopName2(HtmlSource);
            var bossNickName = GetBossName(HtmlSource);
            var shpAgeNum    = GetShpAgeNum(HtmlSource);

            shpAgeNum = GetIntDefault(shpAgeNum);
            var mainBiz      = GetMainBiz(HtmlSource);
            var location     = GetLocation(HtmlSource);
            var productCount = GetProductCount(HtmlSource);
            var shopRank     = GetShopRank(HtmlSource);
            //var dicComment = GetComment(HtmlSource);

            IDictionary <string, string> dicComment = !shopName.Contains("旺铺关闭") && !shopName.Equals("404-阿里巴巴") && !shopName.Equals("违规下架") ? GetComment() : new Dictionary <string, string>();


            var stringEmpty = string.Empty;
            var intDefault  = 0;
            var dateDefault = DateTime.Parse("1990-01-01 00:00:00");
            //if (int.Parse(shpAgeNum) != 0)
            //{
            //    dateDefault = DateTime.Now.AddYears(-int.Parse(shpAgeNum));
            //}

            //旺铺关闭页面-未达到
            var errorNotice = stringEmpty;

            if (shopName.Equals("违规下架"))
            {
                errorNotice = "违规下架";
            }
            else if (shopName.Contains("旺铺关闭") || shopName.Equals("404-阿里巴巴"))
            {
                errorNotice = shopName;
            }

            var dayMonitor = string.IsNullOrEmpty(errorNotice) ? "1" : "0";


            var resut = new Resut
            {
                //店铺ID
                ["ShopId"] = shopId,
                //店铺名
                ["ShopName"] = shopName,
                //店铺名2
                ["ShopName2"] = shopName2,
                //旺旺号
                ["BossName"] = bossNickName,
                //旺旺号的昵称
                ["BossNickName"] = bossNickName,
                //公司名称
                ["CompanyName"] = shopName,
                //开店时间
                ["ShopStartDate"] = dateDefault,
                //ShpAgeNum
                ["ShpAgeNum"] = shpAgeNum,
                //采集入口参数
                ["ShopUrl"] = GetUrlFormat(CurrentUrl),
                //好评数
                ["GoodCommentCount"] = intDefault,
                //主营行业
                ["MainIndustry"] = stringEmpty,
                //描述相符
                ["Comment_MatchDescrip"] = intDefault,
                //描述相符率
                ["Comment_MatchDescripRate"] = dicComment.ContainsKey("Comment_MatchDescripRate") ? GetIntDefault(dicComment["Comment_MatchDescripRate"]) : "0",
                //服务态度
                ["Comment_ServiceStatue"] = intDefault,
                //服务态度率
                ["Comment_ServiceStatueRate"] = dicComment.ContainsKey("Comment_ServiceStatueRate") ? GetIntDefault(dicComment["Comment_ServiceStatueRate"]) : "0",
                //物流服务
                ["Comment_ShipSpeed"] = intDefault,
                //物流服务率
                ["Comment_ShipSpeedRate"] = dicComment.ContainsKey("Comment_ShipSpeedRate") ? GetIntDefault(dicComment["Comment_ShipSpeedRate"]) : "0",
                //保证金
                ["MarginCharge"] = intDefault,
                //店铺等级
                ["ShopRank"] = shopRank,
                //所在位置
                ["Location"] = location,
                //销售数量
                ["SaleCount"] = intDefault,
                //产品数量
                ["ProductCount"] = GetIntDefault(productCount),
                //好评率
                ["GoodCommentRate"] = intDefault,
                //主营产品
                ["MainBiz"]    = mainBiz,
                ["DayMonitor"] = dayMonitor,
                ["Loaned"]     = intDefault,
                ["targetuid"]  = stringEmpty,
                //当前店铺状态
                ["Error_Notice"] = errorNotice
            };

            resultList.Add(resut);
            return(resultList.ToArray());
        }
Пример #24
0
        /// <summary>
        /// 解析当前元素
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var   resultList  = new List <IResut>();
            var   shopIsExist = GetShopIsExist(HtmlSource);
            Resut resut;

            if (shopIsExist.Equals("0"))
            {
                var stringEmpty = string.Empty;
                resut = new Resut
                {
                    //店铺ID
                    ["ShopId"] = _shopId,
                    //品牌简介链接
                    ["BrandProfile"] = stringEmpty,
                    //品牌
                    ["ShopIsExist"] = shopIsExist,
                    //公司名称
                    ["ShopName"] = stringEmpty,
                    //所在地
                    ["Location"] = stringEmpty,
                    //电话
                    ["Phone"] = stringEmpty,
                    //违章次数
                    ["IllegalRecord"] = stringEmpty,
                    //店铺综合评分
                    ["Comment_General"] = stringEmpty,
                    //店铺综合评分比率
                    ["Comment_GeneralRate"] = stringEmpty,
                    //店铺质量满意度
                    ["Comment_MatchDescrip"] = stringEmpty,
                    //店铺质量满意度比率
                    ["Comment_MatchDescripRate"] = stringEmpty,
                    //服务态度满意度
                    ["Comment_ServiceStatue"] = stringEmpty,
                    //服务态度满意度比率
                    ["Comment_ServiceStatueRate"] = stringEmpty,
                    //物流速度满意度
                    ["Comment_ShipSpeed"] = stringEmpty,
                    //物流速度满意度比率
                    ["Comment_ShipSpeedRate"] = stringEmpty,
                    //商品描述满意度
                    ["Comment_ProductDescrip"] = stringEmpty,
                    //商品描述满意度比率
                    ["Comment_ProductDescripRate"] = stringEmpty,
                    //退换货处理满意度
                    ["Comment_ReturnGoods"] = stringEmpty,
                    //退换货处理满意度比率
                    ["Comment_ReturnGoodsRate"] = stringEmpty,
                    //售后处理时长
                    ["Servece_AfterSales"] = stringEmpty,
                    //售后处理时长比率
                    ["Servece_AfterSalesRate"] = stringEmpty,
                    //交易纠纷率
                    ["Service_TradeDispute"] = stringEmpty,
                    //交易纠纷率比率
                    ["Service_TradeDisputeRate"] = stringEmpty,
                    //退换货返修率
                    ["Service_ReturnRepair"] = stringEmpty,
                    //退换货返修率比率
                    ["Service_ReturnRepairRate"] = stringEmpty,
                    //关注人数
                    ["FollowNumber"] = stringEmpty,
                    //全部商品
                    ["ProductsNum"] = stringEmpty,
                    //上新
                    ["NewProducts"] = stringEmpty,
                    //促销
                    ["PromotionNum"] = stringEmpty,
                    //开店时间
                    ["OpenTime"] = stringEmpty
                };
            }
            else
            {
                var brandProfile  = GetBrandProfile(HtmlSource);
                var shopName      = GetShopName(HtmlSource);
                var location      = GetLocation(HtmlSource);
                var phone         = GetPhone(HtmlSource);
                var illegalRecord = GetIllegalRecord(HtmlSource);
                var shopComment   = GetShopComment(HtmlSource);
                var shopService   = GetShopService(HtmlSource);
                var url           = $"http://shop.m.jd.com/detail/detail?shopId={_shopId}";
                var html          = base.GetWebContent(url);
                var followNumber  = GetFollowNumber(html);
                var totalNumDic   = GetTotalNumDic(html);
                var openTime      = GetOpenTime(html);

                resut = new Resut
                {
                    //店铺ID
                    ["ShopId"] = _shopId,
                    //品牌简介链接
                    ["BrandProfile"] = brandProfile,
                    //品牌
                    ["ShopIsExist"] = shopIsExist,
                    //公司名称
                    ["ShopName"] = shopName,
                    //所在地
                    ["Location"] = location,
                    //电话
                    ["Phone"] = phone,
                    //违章次数
                    ["IllegalRecord"] = illegalRecord,
                    //店铺综合评分
                    ["Comment_General"] = shopComment["Comment_General"],
                    //店铺综合评分比率
                    ["Comment_GeneralRate"] = shopComment["Comment_GeneralRate"],
                    //店铺质量满意度
                    ["Comment_MatchDescrip"] = shopComment["Comment_MatchDescrip"],
                    //店铺质量满意度比率
                    ["Comment_MatchDescripRate"] = shopComment["Comment_MatchDescripRate"],
                    //服务态度满意度
                    ["Comment_ServiceStatue"] = shopComment["Comment_ServiceStatue"],
                    //服务态度满意度比率
                    ["Comment_ServiceStatueRate"] = shopComment["Comment_ServiceStatueRate"],
                    //物流速度满意度
                    ["Comment_ShipSpeed"] = shopComment["Comment_ShipSpeed"],
                    //物流速度满意度比率
                    ["Comment_ShipSpeedRate"] = shopComment["Comment_ShipSpeedRate"],
                    //商品描述满意度
                    ["Comment_ProductDescrip"] = shopComment["Comment_ProductDescrip"],
                    //商品描述满意度比率
                    ["Comment_ProductDescripRate"] = shopComment["Comment_ProductDescripRate"],
                    //退换货处理满意度
                    ["Comment_ReturnGoods"] = shopComment["Comment_ReturnGoods"],
                    //退换货处理满意度比率
                    ["Comment_ReturnGoodsRate"] = shopComment["Comment_ReturnGoodsRate"],
                    //售后处理时长
                    ["Servece_AfterSales"] = shopService["Servece_AfterSales"],
                    //售后处理时长比率
                    ["Servece_AfterSalesRate"] = shopService["Servece_AfterSalesRate"],
                    //交易纠纷率
                    ["Service_TradeDispute"] = shopService["Service_TradeDispute"],
                    //交易纠纷率比率
                    ["Service_TradeDisputeRate"] = shopService["Service_TradeDisputeRate"],
                    //退换货返修率
                    ["Service_ReturnRepair"] = shopService["Service_ReturnRepair"],
                    //退换货返修率比率
                    ["Service_ReturnRepairRate"] = shopService["Service_ReturnRepairRate"],
                    //关注人数
                    ["FollowNumber"] = followNumber,
                    //全部商品
                    ["ProductsNum"] = totalNumDic["ProductsNum"],
                    //上新
                    ["NewProducts"] = totalNumDic["NewProductsNum"],
                    //促销
                    ["PromotionNum"] = totalNumDic["PromotionNum"],
                    //开店时间
                    ["OpenTime"] = openTime
                };
            }
            resultList.Add(resut);
            return(resultList.ToArray());
        }
Пример #25
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {

            

            var resultList = new List<IResut>();

            Newtonsoft.Json.Serialization.Func<string, string> getFormatProductId = productId => Regex.Match(productId, @"(?<=\\"").*(?=\\"")").Value;
            Newtonsoft.Json.Serialization.Func<string, string> getFormatProductName = productName => productName.Trim();
            Newtonsoft.Json.Serialization.Func<string, string> getFormatProductUrl = productUrl => $"https:{Regex.Match(productUrl, @"(?<=\\"").*(?=\\"")").Value}";




            //var html = Regex.Match(HtmlSource, @"<div class=\\""J_TItems\\"">[\s\S]*?<div class=\\""pagination\\"">").Value;

            //var docmentNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);

            //var htmlNodeCollection = docmentNode.SelectNodes(@"//div[@class='\""item4line1\""']//dl") ??
            //             docmentNode.SelectNodes(@"//div[@class='\""item5line1\""']//dl")?? 
            //             docmentNode.SelectNodes(@"//div[@class='\""item30line1\""']//dl");




            //var divNodes = docmentNode.SelectNodes(@"//div");
            //Console.WriteLine(new string('=', 64));
            //foreach (var divNode in divNodes)
            //{
            //    var classValue = divNode.GetAttributeValue(@"class", string.Empty);
            //    Console.WriteLine($"classvalue: {classValue}");
            //}
            //Console.WriteLine(new string('-', 64));

            //用matches和ends-with都提示需要命名空间管理器或 XsltContext。此查询具有前缀、变量或用户定义的函数。还没解决这个问题
            //var htmlNodeCollection = docmentNode.SelectNodes(@"//div[matches(@class,'\""item\d+line1\""')]//dl");
            //var htmlNodeCollection = docmentNode.SelectNodes("//div[starts-with(@class,'\\\"item')]//dl");
            //var htmlNodeCollection = docmentNode.SelectNodes("//div[ends-with(@class,'line1\\\"')]//dl");

            
            
            var docmentNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(HtmlSource);


            if (_isInHtml)
            {

                var htmlNodeCollection = docmentNode.SelectNodes(@"//div[@class='pagination']/parent::div/child::div")
                                         ??docmentNode.SelectNodes(@"//div[@class='comboHd']/parent::div/child::div")?? docmentNode.SelectNodes(@"//div[contains(@class,'item') and contains(@class,'line1')]//dl");

                foreach (var htmlNode in htmlNodeCollection)
                {
                    var attributes = htmlNode.Attributes["class"].Value;
                    //退出 后面的推荐产品不要了
                    if (attributes == @"pagination")
                        break;
                    if (attributes == @"comboHd")
                    {
                        //清空队列
                        _urlQueue.Clear();
                        break;
                    }
                    if (attributes.Contains(@"item") && attributes.Contains(@"line1"))
                    {
                        var htmlNodeDls = htmlNode.SelectNodes(".//dl");
                        foreach (var htmlNodeDl in htmlNodeDls)
                        {
                            var detailNode =
                                    htmlNodeDl.SelectSingleNode(
                                        @".//dd[@class='detail']//a[@class='item-name J_TGoldData']");
                            var productName = getFormatProductName(detailNode.InnerText);
                            var productUrl = detailNode.Attributes["href"].Value;

                            var productId = Regex.Match(productUrl, @"(?<=id=)\d+").Value;
                            //如果hash表不包含的productId
                            if (!_hashTable.ContainsKey(productId))
                            {
                                //productId加入到hash表中
                                _hashTable.Add(productId, null);
                                
                                //Console.WriteLine($"shopId:{productId},shopName:{productName},productUrl:{productUrl}。");
                                var price =
                                    htmlNodeDl.SelectSingleNode(@".//span[@class='c-price']")?.InnerText.Trim();
                                string maxPrice = null;
                                var saleNum =
                                    htmlNodeDl.SelectSingleNode(@".//span[@class='sale-num']")?.InnerText.Trim();
                                var comment = htmlNodeDl.SelectSingleNode(@".//h4/a/span")?.InnerText;
                                comment = comment == null ? null : Regex.Match(comment, @"\d+").Value;
                                var resut = new Resut
                                {
                                    ["productId"] = productId,
                                    ["productName"] = productName,
                                    ["productUrl"] = productUrl,
                                    ["shopId"] = _shopUrl,
                                    ["shopName"] = _shopName,
                                    ["price"] = price,
                                    ["maxPrice"] = maxPrice,
                                    ["saleNum"] = saleNum,
                                    ["comment"] = comment
                                };

                                resultList.Add(resut);
                            }
                        }

                    }
                    //ProductId
                    //PrdouctName
                    //ProductUrl
                    //ShopId
                    //ShopName
                }

                
            }

            else
            {


                var htmlNodeCollection = docmentNode.SelectNodes(
                    @"//div[@class='\""pagination\""']/parent::div/child::div")
                                         ??
                                         docmentNode.SelectNodes(@"//div[@class='\""comboHd\""']/parent::div/child::div");

                //var htmlNodeCollection = docmentNode.SelectNodes(@"//div[contains(@class,'\""item') and contains(@class,'line1\""')]//dl");

                foreach (var htmlNode in htmlNodeCollection)
                {
                    var attributes = htmlNode.Attributes["class"].Value;
                    //退出 后面的推荐产品不要了
                    if (attributes == @"\""pagination\""")
                        break;
                    if (attributes == @"\""comboHd\""")
                    {
                        //清空队列
                        _urlQueue.Clear();
                        break;
                    }
                    if (attributes.Contains(@"\""item") && attributes.Contains(@"line1\"""))
                    {
                        var htmlNodeDls = htmlNode.SelectNodes(".//dl");
                        foreach (var htmlNodeDl in htmlNodeDls)
                        {
                            var productId = getFormatProductId(htmlNodeDl.Attributes["data-id"].Value);
                            //如果hash表不包含的productId
                            if (!_hashTable.ContainsKey(productId))
                            {
                                //productId加入到hash表中
                                _hashTable.Add(productId, null);
                                var detailNode =
                                    htmlNodeDl.SelectSingleNode(
                                        @".//dd[@class='\""detail\""']//a[@class='\""item-name']");
                                var productName = getFormatProductName(detailNode.InnerText);
                                var productUrl = getFormatProductUrl(detailNode.Attributes["href"].Value);
                                //Console.WriteLine($"shopId:{productId},shopName:{productName},productUrl:{productUrl}。");
                                var price =
                                    htmlNodeDl.SelectSingleNode(@".//span[@class='\""c-price\""']")?.InnerText.Trim();
                                var maxPrice =
                                    htmlNodeDl.SelectSingleNode(@".//span[@class='\""s-price\""']")?.InnerText.Trim();
                                var saleNum =
                                    htmlNodeDl.SelectSingleNode(@".//span[@class='\""sale-num\""']")?.InnerText.Trim();
                                var comment = htmlNodeDl.SelectSingleNode(@".//div[@class='\""title\""']")?.InnerText;
                                comment = comment == null ? null : Regex.Match(comment, @"\d+").Value;
                                var resut = new Resut
                                {
                                    ["productId"] = productId,
                                    ["productName"] = productName,
                                    ["productUrl"] = productUrl,
                                    ["shopId"] = _shopUrl,
                                    ["shopName"] = _shopName,
                                    ["price"] = price,
                                    ["maxPrice"] = maxPrice,
                                    ["saleNum"] = saleNum,
                                    ["comment"] = comment
                                };

                                resultList.Add(resut);
                            }
                        }

                    }
                    //ProductId
                    //PrdouctName
                    //ProductUrl
                    //ShopId
                    //ShopName
                }
            }



            return resultList.ToArray();

        }
Пример #26
0
        /// <summary>
        /// 解析当前元素
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList  = new List <IResut>();
            var productName = this.GetProductName(_html);

            if (productName.Equals(string.Empty))
            {
                productName = this.GetSpecialProductName(HtmlSource);
            }
            var          selectColor     = this.GetSelectColor(HtmlSource);
            var          imgSrc          = this.GetImgSrc(HtmlSource);
            var          warmReminder    = this.GetWarmReminder(HtmlSource);
            var          productPrice    = this.GetProductPrice(_html);
            var          whiteBar        = this.GetWhiteBar(_html);
            var          service         = this.GetService(_html);
            var          discount        = this.GetDiscount(_html);
            var          productActivity = this.GetProductActivity(_html);
            var          isExist         = this.ProductIsExist(_html);
            const string notExist        = "该商品已下柜,非常抱歉!";

            if (productName.Equals(string.Empty))
            {
                isExist = "产品不存在!";
            }
            else if (isExist.Equals(notExist))
            {
                isExist = notExist;
            }
            else
            {
                isExist = this.GetIsExist(_html);
            }

            var commentDic = this.GetCommentDic();

            var resut = new Resut
            {
                //产品id
                ["ProductId"] = this._productId,
                //产品名字
                ["ProductName"] = productName,
                //选择颜色
                ["SelectColor"] = selectColor,
                //图片链接
                ["ImgSrc"] = imgSrc,
                //温馨提醒
                ["WarmReminder"] = warmReminder,
                //产品价格
                ["ProductPrice"] = productPrice,
                //白条
                ["WhiteBar"] = whiteBar,
                //服务
                ["Service"] = service,
                //促销
                ["Discount"] = discount,
                //产品活动
                ["ProductActivity"] = productActivity,
                //产品是否有货以及预计送达时间
                ["IsExist"] = isExist,
                //全部评价
                ["AllCnt"] = commentDic["allCnt"],
                //好评
                ["GoodCnt"] = commentDic["goodCnt"],
                //中评
                ["NormalCnt"] = commentDic["normalCnt"],
                //差评
                ["BadCnt"] = commentDic["badCnt"],
                //有图评价
                ["PictureCnt"] = commentDic["pictureCnt"]
            };

            resultList.Add(resut);
            return(resultList.ToArray());
        }
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var urlSet     = GetUrlQueueByUrl(CurrentUrl);
            var resultList = new List <IResut>();

            foreach (var url in urlSet)
            {
                //var url1 = "https://www.aliexpress.com/store/product/baby-girls-4pcs-sets-longsleeve-cotton-romper-birthday-dress-baby-girls-vestidos-with-pink-stripe-ruffle/1240676_32262374446.html";
                //var html = GetMainWebContent(url1, null, ref _cookies, null);
                var html = GetMainWebContent(url, null, ref _cookies, null);
                //var html = GetWebContent(url.Key.ToString(), ref _cookies);
                //GetValueByRegex getValueByRegex = new GetValueByRegex(GetResultByRegex);
                GetValueByHtmlAndRegex getValueByHtmlAndRegex = new GetValueByHtmlAndRegex(GetResultByHtmlAndRegex);
                var title               = getValueByHtmlAndRegex(html, "(?<=<title>).*(?=</title>)").Replace("Aliexpress.com : ", "");
                var percentNum          = getValueByHtmlAndRegex(html, "(?<=<span class=\"percent-num\">).*?(?=</span>)");
                var ratingsNum          = GetInt(getValueByHtmlAndRegex(html, "(?<=<span class=\"rantings-num\">).*?(?=</span>)"));
                var orderNum            = GetInt(getValueByHtmlAndRegex(html, "(?<=<span class=\"order-num\" id=\"j-order-num\">).*?(?=</span>)"));
                var discountRage        = GetInt(getValueByHtmlAndRegex(html, "(?<=<span class=\"p-discount-rate\">).*?(?=</span>)"));
                var actMinPrice         = getValueByHtmlAndRegex(html, "(?<=actMinPrice=\").*?(?=\";)");
                var actMaxPrice         = getValueByHtmlAndRegex(html, "(?<=actMaxPrice=\").*?(?=\";)");
                var minPrice            = getValueByHtmlAndRegex(html, "(?<=minPrice=\").*?(?=\";)");
                var maxPrice            = getValueByHtmlAndRegex(html, "(?<=maxPrice=\").*?(?=\";)");
                var mobileDiscountPrice = GetDouble(getValueByHtmlAndRegex(html, "(?<=mobileDiscountPrice=\").*?(?=\";)"));
                var productId           = getValueByHtmlAndRegex(html, "(?<=productId=\").*?(?=\";)");
                var totalAvailQuantity  = getValueByHtmlAndRegex(html, @"(?<=totalAvailQuantity=)\d+(?=;)");

                string collectNum = string.Empty;
                if (!string.IsNullOrEmpty(productId))
                {
                    var    wishlistUrl  = $"https://us.ae.aliexpress.com/wishlist/wishlist_item_count.htm?itemid={productId}";
                    string wishlistHtml = string.Empty;
                    bool   isSuccess    = false;
                    while (!isSuccess)
                    {
                        try
                        {
                            wishlistHtml = GetMainWebContent(wishlistUrl, null, ref _cookies, null);
                        }
                        catch (Exception e)
                        {
                            if (e.ToString().Contains("操作超时") || e.ToString().Contains("操作已超时"))
                            {
                                continue;
                            }
                        }

                        isSuccess = true;
                    }


                    collectNum = getValueByHtmlAndRegex(wishlistHtml, @"(?<=""num"":)\d+(?=})");
                }

                var eventTimeLeft = Regex.Match(html, "(?<=class=\"p-eventtime-left\").*?(?=</span>)").Value;
                if (eventTimeLeft.Contains("data-hour") || eventTimeLeft.Contains("data-minute") ||
                    eventTimeLeft.Contains("data-second"))
                {
                    var hour = Regex.Match(eventTimeLeft, @"(?<=data-hour="")\d+(?="")").Value;
                    if (hour.Length == 1)
                    {
                        hour = $"0{hour}";
                    }
                    var minute = Regex.Match(eventTimeLeft, @"(?<=data-minute="")\d+(?="")").Value;
                    if (minute.Length == 1)
                    {
                        minute = $"0{minute}";
                    }
                    var second = Regex.Match(eventTimeLeft, @"(?<=data-second="")\d+(?="")").Value;
                    if (second.Length == 1)
                    {
                        second = $"0{second}";
                    }

                    eventTimeLeft = $"{hour}:{minute}:{second}";
                }

                //var dic = new Dictionary<string, string>()
                //{

                //    {"Url",url.Key.ToString() },
                //    { "Title",title},
                //    { "PercentNum",percentNum },
                //    { "RatingsNum",ratingsNum},
                //    { "OrderNum",orderNum },
                //    { "DiscountRage",discountRage },
                //    {"EventTimeLeft",eventTimeLeft },
                //    {"ActMinPrice", actMinPrice},
                //    { "ActMaxPrice",actMaxPrice },
                //    { "MinPrice",minPrice},
                //    { "MaxPrice",maxPrice},
                //    { "MobileDiscountPrice",mobileDiscountPrice },
                //    { "ProductId",productId },
                //    { "TotalAvailQuantity",totalAvailQuantity },
                //    { "CollectNum",collectNum }
                //};

                //DataBaseHelper.MysqlHelper mysqlHelper = new MysqlHelper();
                //mysqlHelper.InsertTable(dic,"AliExpress");



                IResut resut = new Resut()
                {
                    { "ShopId", _shopId },
                    { "Url", url },
                    { "Title", title },
                    { "PercentNum", percentNum },
                    { "RatingsNum", ratingsNum },
                    { "OrderNum", orderNum },
                    { "DiscountRage", discountRage },
                    { "EventTimeLeft", eventTimeLeft },
                    { "ActMinPrice", actMinPrice },
                    { "ActMaxPrice", actMaxPrice },
                    { "MinPrice", minPrice },
                    { "MaxPrice", maxPrice },
                    { "MobileDiscountPrice", mobileDiscountPrice },
                    { "ProductId", productId },
                    { "TotalAvailQuantity", totalAvailQuantity },
                    { "CollectNum", collectNum }
                };

                resultList.Add(resut);
            }


            return(resultList.ToArray());
        }
Пример #28
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var resultList = new List <IResut>();
            //var curHtml = Phantomjs.PhantomjsHelper.GetHtml(CurrentUrl);
            //_httpHelper.Cookies = $"id58={GetId58()}";
            //var curHtml = _httpHelper.GetHtmlByGet(CurrentUrl);


            var htmlNode           = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(HtmlSource);
            var htmlNodeCollection = htmlNode.SelectNodes("//div[@id='infolist']//dl");



            //一段时间会返回错误的结果 需要重试
            var tryTimes = 0;

            while (htmlNodeCollection == null)
            {
                if (++tryTimes > 3)
                {
                    throw new Exception("htmlNodeCollectionNullException tryTimes more than 3 times");
                }
                Console.WriteLine($"htmlNodeCollectionNullException tryTimes {tryTimes}");
                var html = GetWebContent(CurrentUrl);
                htmlNode           = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);
                htmlNodeCollection = htmlNode.SelectNodes("//div[@id='infolist']//dl");
            }

            foreach (var node in htmlNodeCollection)
            {
                var jobUrl = node.SelectSingleNode("./dt/a")?.Attributes["href"].Value;
                if (string.IsNullOrEmpty(jobUrl))
                {
                    throw new Exception("jobUrlNullException");
                }
                var html               = _httpHelper.GetHtmlByGet(jobUrl);
                var infoId             = Regex.Match(html, @"(?<=""info[iI]d"":)\d+").Value;
                var userId             = Regex.Match(html, @"(?<=""user[iI]d"":)\d+").Value;
                var statisticsHtml     = _httpHelper.GetHtmlByGet($"http://statistics.zp.58.com/position/totalcount/?infoId={infoId}&userId={userId}");
                var htmlNodeR          = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(html);
                var jobName            = htmlNodeR.SelectSingleNode("//span[@class='pos_title']")?.InnerText;
                var jobCount           = GetNumber(htmlNodeR.SelectSingleNode("//span[@class='item_condition pad_left_none']")?.InnerText);
                var degreeRequired     = htmlNodeR.SelectSingleNode("//span[@class='item_condition']")?.InnerText;
                var experienceRequired = htmlNodeR.SelectSingleNode("//span[@class='item_condition border_right_None']")?.InnerText.Trim();
                var location           = htmlNodeR.SelectSingleNode("//div[@class='pos-area']/span[1]")?.InnerText.Trim();
                var salary             = htmlNodeR.SelectSingleNode("//span[@class='pos_salary']")?.InnerText ?? htmlNodeR.SelectSingleNode("//span[@class='pos_salary daiding']").InnerText;
                var jobUpdateDate      = FormatTime(Regex.Match(htmlNodeR.SelectSingleNode("//div[@class='pos_base_statistics']/span[1]")?.InnerText, "(?<=更新[::]).*$").Value.Trim());
                //已找到
                _httpHelper.Referer = jobUrl;
                var browseCount = Regex.Match(_httpHelper.GetHtmlByGet($"http://jst1.58.com/counter?infoid={infoId}"), @"(?<=total=)\d+").Value;
                //已找到
                var applyCount = Regex.Match(statisticsHtml, @"(?<=""deliveryCount"":)\d+").Value;

                var companyUrl = htmlNodeR.SelectSingleNode("//div[@class='baseInfo_link']/a")?.Attributes["href"].Value;

                var companyHtml   = _httpHelper.GetHtmlByGet(companyUrl);
                var contactPerson = Regex.Match(companyHtml, @"(?<=<li><span>联系人.*</span>[\s]*)[\S]*?(?=[\s]*</li>)").Value;
                var phoneUrl      = Regex.Match(companyHtml, @"(?<=<li><span>联系电话[\s\S]*?</span><img src="")[\S]*(?=""></li>)").Value;
                var phonePic      = _httpHelper.GetImage(phoneUrl);
                var companyName   = htmlNodeR.SelectSingleNode("//div[@class='baseInfo_link']/a")?.InnerText;
                //a[@class='comp_baseInfo_link']
                //已找到
                //http://zp.service.58.com/api?action=favorite,wltStats&params={"infoUrl":"http://hz.58.com/zptaobao/30334432354220x.shtml","userIds":"13663438612230_0"}
                var memberYearUrl      = $"http://zp.service.58.com/api?action=favorite,wltStats&params={{\"infoUrl\":\"{ Regex.Match(jobUrl, @".*(?=\?)").Value}\",\"userIds\":\"{userId}_0\"}}";
                var memberYear         = Regex.Match(_httpHelper.GetHtmlByGet(memberYearUrl), @"(?<=wlt)\d+").Value;
                var mainIndustry       = htmlNodeR.SelectSingleNode("//a[@class='comp_baseInfo_link']")?.InnerText;
                var companyPersonCount = htmlNodeR.SelectSingleNode("//p[@class='comp_baseInfo_scale']")?.InnerText;
                var businessLicense    = htmlNodeR.SelectSingleNode("//div[@class='identify_con clearfix']/span[1]")?.InnerText;
                var realNameLicense    = htmlNodeR.SelectSingleNode("//div[@class='identify_con clearfix']/span[2]")?.InnerText;
                var taobaoShopLicense  = htmlNodeR.SelectSingleNode("//div[@class='identify_con clearfix']/span[3]")?.InnerText;

                //已找到
                _httpHelper.Cookies = $"id58={GetId58()};58tj_uuid={Guid.NewGuid()}";
                var resumeFeedback = Regex.Match(_httpHelper.GetHtmlByGet($"http://jianli.58.com/ajax/getefrate/{userId}"), @"(?<=""efrate"":)\d+").Value;
                //已找到
                var companyJobNumber = Regex.Match(statisticsHtml, @"(?<=""infoCount"":)\d+").Value;
                var memberMonth      = Regex.Match(htmlNodeR.SelectSingleNode("//span[@class='item_num join58_num']").InnerText, ".*(?=月)").Value;
                var workAddress      = htmlNodeR.SelectSingleNode("//div[@class='pos-area']/span[2]")?.InnerText;
                var jobDescription   = htmlNodeR.SelectSingleNode("//div[@class='des']")?.InnerText;
                //var Phone_OCR

                var resut = new Resut
                {
                    ["JobUrl"]             = jobUrl,
                    ["JobName"]            = jobName,
                    ["JobCount"]           = jobCount,
                    ["DegreeRequired"]     = degreeRequired,
                    ["ExperienceRequired"] = experienceRequired,
                    ["Location"]           = location,
                    ["Salary"]             = salary,
                    ["JobUpdateDate"]      = jobUpdateDate,
                    ["BrowseCount"]        = browseCount,
                    ["ApplyCount"]         = applyCount,
                    ["Phone_Pic"]          = phonePic,
                    ["ContactPerson"]      = contactPerson,
                    ["CompanyUrl"]         = companyUrl,
                    ["CompanyName"]        = companyName,
                    ["MemberYear"]         = memberYear,
                    ["MainIndustry"]       = mainIndustry,
                    ["CompanyPersonCount"] = companyPersonCount,
                    ["BusinessLicense"]    = businessLicense,
                    ["TaobaoShopLicense"]  = taobaoShopLicense,
                    ["RealNameLicense"]    = realNameLicense,
                    ["ResumeFeedback"]     = resumeFeedback,
                    ["CompanyJobNumber"]   = companyJobNumber,
                    ["MemberMonth"]        = memberMonth,
                    ["WorkAddress"]        = workAddress,
                    ["JobDescription"]     = jobDescription,
                    //["Phone_OCR"] =
                };
                resultList.Add(resut);
            }

            return(resultList.ToArray());
        }
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            //var storeNumber = GetStoreNumber(HtmlSource);
            var storeLocation = GetStoreLocation(HtmlSource);
            var shopAge       = GetShopAge(HtmlSource);
            //var storeTime = GetStoreTime(HtmlSource);
            var evaluationDetailHtml = GetEvaluationDetailHtml(HtmlSource);
            var itemList             = GetItemList(evaluationDetailHtml);
            var seller = itemList["seller"];
            var positiveFeedbackPastSixMonths = itemList["positiveFeedbackPastSixMonths"];
            var feedbackScore         = itemList["feedbackScore"];
            var aliExpressSellerSince = itemList["aliExpressSellerSince"];
            var described             = itemList["described"];
            var describedRatings      = itemList["describedRatings"];
            var describedPercent      = itemList["describedPercent"];
            var communication         = itemList["communication"];
            var communicationRatings  = itemList["communicationRatings"];
            var communicationPercent  = itemList["communicationPercent"];
            var shippingSpeed         = itemList["shippingSpeed"];
            var shippingSpeedRatings  = itemList["shippingSpeedRatings"];
            var shippingSpeedPercent  = itemList["shippingSpeedPercent"];

            var positiveOneMonth    = itemList["positiveOneMonth"];
            var positiveThreeMonths = itemList["positiveThreeMonths"];
            var positiveSixMonths   = itemList["positiveSixMonths"];
            var positiveOneYear     = itemList["positiveOneYear"];
            var positiveOverall     = itemList["positiveOverall"];

            var negativeOneMonth    = itemList["negativeOneMonth"];
            var negativeThreeMonths = itemList["negativeThreeMonths"];
            var negativeSixMonths   = itemList["negativeSixMonths"];
            var negativeOneYear     = itemList["negativeOneYear"];
            var negativeOverall     = itemList["negativeOverall"];

            var neutralOneMonth    = itemList["neutralOneMonth"];
            var neutralThreeMonths = itemList["neutralThreeMonths"];
            var neutralSixMonths   = itemList["neutralSixMonths"];
            var neutralOneYear     = itemList["neutralOneYear"];
            var neutralOverAll     = itemList["neutralOverAll"];

            var positiveFeedbackRateOneMonth    = itemList["positiveFeedbackRateOneMonth"];
            var positiveFeedbackRateThreeMonths = itemList["positiveFeedbackRateThreeMonths"];
            var positiveFeedbackRateSixMonths   = itemList["positiveFeedbackRateSixMonths"];
            var positiveFeedbackRateOneYear     = itemList["positiveFeedbackRateOneYear"];
            var positiveFeedbackRateOverall     = itemList["positiveFeedbackRateOverall"];

            var    resultList = new List <IResut>();
            IResut resut      = new Resut()
            {
                { "shopId", _shopId },
                //{ "storeNumber",storeNumber },
                { "storeLocation", storeLocation },
                { "shopAge", FormatNumber(shopAge) },
                //{ "storeTime",storeTime },
                { "seller", seller },
                { "positiveFeedbackPastSixMonths", FormatNumber(RemovePercentSign(positiveFeedbackPastSixMonths)) },
                { "feedbackScore", FormatNumber(FormatNumber(feedbackScore)) },
                { "aliExpressSellerSince", aliExpressSellerSince },
                { "described", FormatNumber(described) },
                { "describedRatings", FormatNumber(describedRatings) },
                { "describedPercent", FormatNumber(describedPercent) },
                { "communication", FormatNumber(communication) },
                { "communicationRatings", FormatNumber(communicationRatings) },
                { "communicationPercent", FormatNumber(communicationPercent) },
                { "shippingSpeed", FormatNumber(shippingSpeed) },
                { "shippingSpeedRatings", FormatNumber(shippingSpeedRatings) },
                { "shippingSpeedPercent", FormatNumber(shippingSpeedPercent) },
                { "positiveOneMonth", FormatNumber(positiveOneMonth) },
                { "positiveThreeMonths", FormatNumber(positiveThreeMonths) },
                { "positiveSixMonths", FormatNumber(positiveSixMonths) },
                { "positiveOneYear", FormatNumber(positiveOneYear) },
                { "positiveOverall", FormatNumber(positiveOverall) },
                { "negativeOneMonth", FormatNumber(negativeOneMonth) },
                { "negativeThreeMonths", FormatNumber(negativeThreeMonths) },
                { "negativeSixMonths", FormatNumber(negativeSixMonths) },
                { "negativeOneYear", FormatNumber(negativeOneYear) },
                { "negativeOverall", FormatNumber(negativeOverall) },
                { "neutralOneMonth", FormatNumber(neutralOneMonth) },
                { "neutralThreeMonths", FormatNumber(neutralThreeMonths) },
                { "neutralSixMonths", FormatNumber(neutralSixMonths) },
                { "neutralOneYear", FormatNumber(neutralOneYear) },
                { "neutralOverAll", FormatNumber(neutralOverAll) },
                { "positiveFeedbackRateOneMonth", FormatNumber(RemovePercentSign(positiveFeedbackRateOneMonth)) },
                { "positiveFeedbackRateThreeMonths", FormatNumber(RemovePercentSign(positiveFeedbackRateThreeMonths)) },
                { "positiveFeedbackRateSixMonths", FormatNumber(RemovePercentSign(positiveFeedbackRateSixMonths)) },
                { "positiveFeedbackRateOneYear", FormatNumber(RemovePercentSign(positiveFeedbackRateOneYear)) },
                { "positiveFeedbackRateOverall", FormatNumber(RemovePercentSign(positiveFeedbackRateOverall)) }
            };

            resultList.Add(resut);

            return(resultList.ToArray());
        }
Пример #30
0
        /// <summary>
        /// ParseCurrentItems
        /// </summary>
        /// <returns></returns>
        protected override IResut[] ParseCurrentItems()
        {
            var stringEmpty = string.Empty;


            var cookies = stringEmpty;

            var ItemTypeName = Regex.Match(HtmlSource, "(?<=\"itemTypeName\":\").*?(?=\")").Value;

            var CollectionNumber = Regex.Match(HtmlSource, @"(?<=""favcount"":"")\d+(?="")").Value;

            var Starts = Regex.Match(HtmlSource, "(?<=\"starts\":\").*?(?=\")").Value;


            JObject jObjectSellPoint = new JObject();
            var     SellPoint        = stringEmpty;



            var UserId = stringEmpty;


            //旧的需要访问很多链接
            //if (ItemTypeName.ToLower().Equals("tmall"))
            //{
            //    _tmall_displayHtml = GetMainWebContent($"{_tmall_displayUrl}{_productId}", null, ref cookies, "");

            //    //CollectionNumber
            //    var getUrl = Regex.Match(_tmall_displayHtml, "(?<=\"apiBeans\":\").*?(?=\")").Value;
            //    if (!getUrl.Equals(stringEmpty))
            //    {
            //        var callback = $"jsonp{Random.Next(100, 999)}";
            //        var dateTime = new DateTime();
            //        var start = new DateTime(1970, 1, 1, 0, 0, 0, dateTime.Kind);
            //        var t = Convert.ToInt64((DateTime.Now - start).TotalSeconds);
            //        var _ksTS = $"{t}_{Random.Next(100, 999)}";
            //        _tmall_counter3Url = $"https:{getUrl}&callback={callback}&_ksTS={_ksTS}";
            //        _tmall_counter3Html = GetMainWebContent(_tmall_counter3Url, null, ref cookies, "");
            //        CollectionNumber = Regex.Match(_tmall_counter3Html, $@"(?<=ICCP_1_{_productId}"":)\d+").Value;
            //    }
            //    else
            //    {
            //        CollectionNumber = stringEmpty;
            //    }


            //    var spuId = Regex.Match(_tmall_displayHtml, @"(?<=""spuId"":"")\d+(?="")").Value;
            //    var sellerId = Regex.Match(_tmall_displayHtml, @"(?<=""sellerId"":)\d+(?=,)").Value;

            //    _tmall_listTagCloudsHtml = GetMainWebContent($"{_tmall_listTagCloudsUrl}{_productId}", null, ref cookies,
            //        "");
            //    var tagClouds = Regex.Match(_tmall_listTagCloudsHtml, "(?<=\"tagClouds\":).*(?=})").Value;
            //    JArray jArray = JArray.Parse(tagClouds);
            //    var dic = jArray.ToDictionary(jToken => jToken["tag"].ToString(), jToken => jToken["count"].ToString());


            //    _tmall_listDetailRateHtml =
            //        GetMainWebContent($"{_tmall_listDetailRateUrl}itemId={_productId}&spuId={spuId}&sellerId={sellerId}",
            //            null, ref cookies, "");

            //    var PicNum = Regex.Match(_tmall_listDetailRateHtml, @"(?<=""picNum"":)\d+(?=,)").Value;
            //    var Userd = Regex.Match(_tmall_listDetailRateHtml, @"(?<=""used"":)\d+").Value;

            //    _tmall_list_dsr_infoHtml =
            //        GetMainWebContent($"{_tmall_list_dsr_infoUrl}itemId={_productId}&spuId={spuId}&sellerId={sellerId}",
            //            null, ref cookies, "");
            //    var GradeAvg = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"gradeAvg\":).*?(?=,)").Value;
            //    var RateTotal = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"rateTotal\":).*?(?=,)").Value;


            //    jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName));
            //    jObjectSellPoint.Add(new JProperty("GradeAvg", GradeAvg));
            //    jObjectSellPoint.Add(new JProperty("RateTotal", RateTotal));
            //    jObjectSellPoint.Add(new JProperty("Userd", Userd));
            //    jObjectSellPoint.Add(new JProperty("PicNum", PicNum));
            //    foreach (var keyValue in dic)
            //    {
            //        jObjectSellPoint.Add(new JProperty(keyValue.Key, keyValue.Value));
            //    }

            //    UserId = Regex.Match(HtmlSource, @"(?<=userId=)\d+(?="")").Value;

            //}
            //else
            //{
            //    ItemTypeName = "taobao";
            //    _taobao_displayHtml = GetMainWebContent($"{_taobao_displayUrl}{_productId}", null, ref cookies, "");

            //    //CollectionNumber
            //    var getUrl = Regex.Match(_taobao_displayHtml, @"(?<=counterApi[\s]*:[\s]*').*?(?=')").Value;
            //    if (!getUrl.Equals(stringEmpty))
            //    {
            //        var callback = $"jsonp{Random.Next(100, 999)}";
            //        var dateTime = new DateTime();
            //        var start = new DateTime(1970, 1, 1, 0, 0, 0, dateTime.Kind);
            //        var t = Convert.ToInt64((DateTime.Now - start).TotalSeconds);
            //        var _ksTS = $"{t}_{Random.Next(100, 999)}";

            //        //_taobao_counter3Url = $"https:{getUrl}&callback={callback}&_ksTS={_ksTS}";
            //        //_taobao_counter3Html = GetMainWebContent(_taobao_counter3Url, null, ref cookies, "");
            //        CollectionNumber = Regex.Match(_taobao_counter3Html, $@"(?<=ICCP_1_{_productId}"":)\d+").Value;

            //        //下面这个可以单独判断 不过讲道理 前面的找不到 这里也找不到了(谁和你讲道理)
            //        _taobao_detailCommonUrl = $"https:{Regex.Match(_taobao_displayHtml, "(?<=data-commonApi = \").*?(?=\")").Value.Replace("&amp;", "&")}";
            //        _taobao_detailCommonHtml = GetMainWebContent(_taobao_detailCommonUrl, null, ref cookies, "");
            //        var Correspond = Regex.Match(_taobao_detailCommonHtml, "(?<=\"correspond\":\").*?(?=\")").Value;
            //        var Total = Regex.Match(_taobao_detailCommonHtml, @"(?<=""totalFull"":)\d+").Value;
            //        var GoodFull = Regex.Match(_taobao_detailCommonHtml, @"(?<=""goodFull"":)\d+").Value;
            //        var Additional = Regex.Match(_taobao_detailCommonHtml, @"(?<=""additional"":)\d+").Value;
            //        var Normal = Regex.Match(_taobao_detailCommonHtml, @"(?<=""normal"":)\d+").Value;
            //        var Pic = Regex.Match(_taobao_detailCommonHtml, @"(?<=""pic"":)\d+").Value;
            //        var Bad = Regex.Match(_taobao_detailCommonHtml, @"(?<=""bad"":)\d+").Value;

            //        jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName));
            //        jObjectSellPoint.Add(new JProperty("Total", Total));
            //        jObjectSellPoint.Add(new JProperty("GoodFull", GoodFull));
            //        jObjectSellPoint.Add(new JProperty("Normal", Normal));
            //        jObjectSellPoint.Add(new JProperty("Bad", Bad));
            //        jObjectSellPoint.Add(new JProperty("Additional", Additional));
            //        jObjectSellPoint.Add(new JProperty("Pic", Pic));

            //    }
            //    else
            //    {
            //        CollectionNumber = stringEmpty;
            //    }



            //    UserId = Regex.Match(HtmlSource, @"(?<=""userNumId"":"")\d+(?="")").Value;


            //}

            //tmall
            if (ItemTypeName.ToLower().Equals("tmall"))
            {
                var sellerId = Regex.Match(HtmlSource, @"(?<=""sellerId"":"")\d+(?="")").Value;
                _tmall_list_dsr_infoHtml =
                    GetMainWebContent($"{_tmall_list_dsr_infoUrl}itemId={_productId}&sellerId={sellerId}",
                                      null, ref cookies, "");
                var GradeAvg  = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"gradeAvg\":).*?(?=,)").Value;
                var RateTotal = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"rateTotal\":).*?(?=,)").Value;


                jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName));
                jObjectSellPoint.Add(new JProperty("GradeAvg", GradeAvg));
                jObjectSellPoint.Add(new JProperty("RateTotal", RateTotal));
            }
            //taobao
            else
            {
                ItemTypeName = "taobao";
                //@"(?<=sellerId=)\d+"
                var userNumId = Regex.Match(HtmlSource, @"(?<=""SELLER_ID"":"")\d+(?="")").Value;
                _taobao_detailCommonUrl  = $"https://rate.taobao.com/detailCommon.htm?userNumId={userNumId}&auctionNumId={_productId}";
                _taobao_detailCommonHtml = GetMainWebContent(_taobao_detailCommonUrl, null, ref cookies, "");
                var Correspond = Regex.Match(_taobao_detailCommonHtml, "(?<=\"correspond\":\").*?(?=\")").Value;
                var Total      = Regex.Match(_taobao_detailCommonHtml, @"(?<=""totalFull"":)\d+").Value;
                var GoodFull   = Regex.Match(_taobao_detailCommonHtml, @"(?<=""goodFull"":)\d+").Value;
                var Additional = Regex.Match(_taobao_detailCommonHtml, @"(?<=""additional"":)\d+").Value;
                var Normal     = Regex.Match(_taobao_detailCommonHtml, @"(?<=""normal"":)\d+").Value;
                var Pic        = Regex.Match(_taobao_detailCommonHtml, @"(?<=""pic"":)\d+").Value;
                var Bad        = Regex.Match(_taobao_detailCommonHtml, @"(?<=""bad"":)\d+").Value;

                jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName));
                jObjectSellPoint.Add(new JProperty("Total", Total));
                jObjectSellPoint.Add(new JProperty("GoodFull", GoodFull));
                jObjectSellPoint.Add(new JProperty("Normal", Normal));
                jObjectSellPoint.Add(new JProperty("Bad", Bad));
                jObjectSellPoint.Add(new JProperty("Additional", Additional));
                jObjectSellPoint.Add(new JProperty("Pic", Pic));

                UserId = Regex.Match(HtmlSource, @"(?<=""userNumId"":"")\d+(?="")").Value;
            }


            jObjectSellPoint.Add("Starts", Starts);



            //var ProductId = Regex.Match(HtmlSource, @"(?<=""itemId"":"")\d+(?="")").Value;
            var ProductName      = Regex.Match(HtmlSource, "(?<=,\"title\":\").*?(?=\")").Value;
            var ProductStateText = Regex.Match(HtmlSource, "(?<=\"ret\":).*?(?=,)").Value;
            var ProductImageUrl  = Regex.Match(HtmlSource, @"(?<=""picsPath"":\["").*?(?="")").Value;

            if (ProductImageUrl.Equals(string.Empty))
            {
                ProductImageUrl = Regex.Match(HtmlSource, "(?<=\"imgUrl\":\").*?(?=\")").Value;
            }


            //活动数据
            if (!string.IsNullOrEmpty(ProductStateText) && !ProductStateText.Equals("[\"ERRCODE_QUERY_DETAIL_FAIL::宝贝不存在\"]"))
            {
                var typeValue = JObject.Parse(HtmlSource)["data"]["apiStack"];
                //里面是个array[1]
                var typeValueString = JArray.Parse(typeValue.ToString())[0]["value"].ToString();
                typeValueString = Regex.Match(typeValueString, @"(?<=""priceUnits"":\[).*?(?=\])").Value;
                var names = Regex.Matches(typeValueString, "(?<=\"name\":\").*?(?=\")");

                //打折秒杀 限时打折
                var DaZheMiaoSha = 0;
                //淘金币 淘金币价 淘金币
                var TaoJinBi = 0;
                //聚划算
                var JuHuaSuan = 0;
                //天天特价
                var TianTianTeJia = 0;

                if (ProductName.Contains("天天特价"))
                {
                    TianTianTeJia = 1;
                }


                foreach (Match name in names)
                {
                    var value = name.Value;
                    if (value.Equals("限时打折"))
                    {
                        DaZheMiaoSha = 1;
                    }
                    else if (value.Equals("聚划算"))
                    {
                        JuHuaSuan = 1;
                    }
                    else if (value.Equals("天天特价"))
                    {
                        TianTianTeJia = 1;
                    }
                    else if (value.Contains("淘金币"))
                    {
                        TaoJinBi = 1;
                    }
                }
                jObjectSellPoint.Add("DaZheMiaoSha", DaZheMiaoSha);
                jObjectSellPoint.Add("TaoJinBi", TaoJinBi);
                jObjectSellPoint.Add("JuHuaSuan", JuHuaSuan);
                jObjectSellPoint.Add("TianTianTeJia", TianTianTeJia);
            }


            var MianYunFei = 0;

            if (Regex.Match(HtmlSource, @"(?<=\\""subInfos\\"":).*?(?=})").Value.Contains("免运费"))
            {
                MianYunFei = 1;
            }
            jObjectSellPoint.Add("MianYunFei", MianYunFei);

            SellPoint = jObjectSellPoint.ToString();



            var ProductLocation      = Regex.Match(HtmlSource, "(?<=\"location\":\").*?(?=\")").Value;
            var ShopId               = Regex.Match(HtmlSource, @"(?<=""shopId"":"")\d+(?="")").Value;
            var RangePriceMatches    = Regex.Matches(HtmlSource, @"(?<=\\""rangePrice\\"":\\"").*?(?=\\"")");
            var PromotionPriceString = stringEmpty;
            var ReservePriceString   = stringEmpty;

            if (RangePriceMatches.Count >= 2)
            {
                PromotionPriceString = RangePriceMatches[0].Value;
                ReservePriceString   = RangePriceMatches[1].Value;
            }

            double?PromotionPrice;
            double?ReservePrice;
            double?ProductPrice;
            double?ProductPriceMax;

            if (PromotionPriceString.Contains("-"))
            {
                ProductPrice    = StringToDouble(Regex.Match(PromotionPriceString, ".*(?=-)").Value);
                ProductPriceMax = StringToDouble(Regex.Match(PromotionPriceString, "(?<=-).*").Value);
                PromotionPrice  = ProductPrice;
            }
            else
            {
                ProductPrice = ProductPriceMax = PromotionPrice = StringToDouble(PromotionPriceString);
            }

            int?ProductQuantity = StringToInt(Regex.Match(HtmlSource, @"(?<=\\""quantity\\"":\\"")\d+(?=\\"")").Value);

            ReservePrice = StringToDouble(ReservePriceString.Contains("-") ? Regex.Match(ReservePriceString, ".*(?=-)").Value : ReservePriceString);


            int?SellCountMonthly  = StringToInt(Regex.Match(HtmlSource, @"(?<=\\""totalSoldQuantity\\"":\\"")\d+(?=\\"")").Value);
            int?TotalCommentCount = StringToInt(Regex.Match(HtmlSource, @"(?<=""rateCounts"":"")\d+(?="")").Value);

            var      PromotionType      = stringEmpty;
            DateTime?PromotionStartTime = null;
            DateTime?PromotionEndTime   = null;
            var      CategoryId         = Regex.Match(HtmlSource, @"(?<=""categoryId"":"")\d+(?="")").Value;
            var      RootCatId          = stringEmpty;
            var      BrandId            = Regex.Match(HtmlSource, @"(?<=""brandId"":"")\d+(?="")").Value;
            var      Brand = stringEmpty;
            //var UserId = Regex.Match(HtmlSource, @"(?<=userId=)\d+(?="")").Value;
            var SpuId         = stringEmpty;
            var EncryptUserId = stringEmpty;
            var BossNickName  = stringEmpty;
            var FanCount      = Regex.Match(HtmlSource, @"(?<=""fansCount"":"")\d+(?="")").Value;
            var CreditLevel   = Regex.Match(HtmlSource, @"(?<=""creditLevel"":"")\d+(?="")").Value;
            //var ProductDescription = HtmlSource;
            var ProductDescription = stringEmpty;

            jObjectSellPoint.Add(new JProperty("CollectionNumber", CollectionNumber));
            jObjectSellPoint.Add(new JProperty("ShopId", ShopId));
            jObjectSellPoint.Add(new JProperty("FanCount", FanCount));
            jObjectSellPoint.Add(new JProperty("CreditLevel", CreditLevel));

            SellPoint = jObjectSellPoint.ToString();



            if (ProductPrice == null)
            {
                var priceAll = Regex.Matches(HtmlSource, @"(?<=\\""price\\"":\\"").*?(?=\\"")");
                foreach (Match price in priceAll)
                {
                    var    value = price.Value;
                    double?valuePrice;
                    valuePrice = StringToDouble(value.Contains("-") ? Regex.Match(PromotionPriceString, ".*(?=-)").Value : value);

                    if (valuePrice != null)
                    {
                        ProductPrice = ProductPriceMax = ReservePrice = PromotionPrice = valuePrice;
                        break;
                    }
                }

                //double? priceAll = StringToDouble(Regex.Match(HtmlSource, @"(?<=\\""price\\"":\\"").*?(?=\\"")").Value);

                //ProductPrice = ProductPriceMax = ReservePrice = PromotionPrice = priceAll;
            }


            //ProductId itemId
            //ProductName title
            //ProductStateText ret
            //SellPoint evaluateInfo
            //ProductImageUrl imgUrl
            //ProductDescription
            //ProductLocation location
            //ShopId shopId
            //ProductPrice rangePrice
            //ProductPriceMax rangePrice
            //ProductQuantity quantity
            //ReservePrice 价格中的小值
            //SellCountMonthly totalSoldQuantity
            //TotalCommentCount rateCounts
            //PromotionPrice 同productPrice
            //PromotionType 空
            //PromotionStartTime 空
            //PromotionEndTime 空
            //CategoryId categoryId
            //RootCatId 空
            //BrandId brandId
            //Brand 空
            //UserId userId
            //SpuId 空
            //EncryptUserId 空
            //BossNickName 空
            //FanCount fansCount
            //CreditLevel creditLevel
            //Content json内容

            var resultList = new List <IResut>();

            IResut resut = new Resut()
            {
                { "ProductId", _productId },
                { "ProductName", ProductName },
                { "ProductStateText", ProductStateText },
                { "SellPoint", SellPoint },
                { "ProductImageUrl", ProductImageUrl },
                { "ProductDescription", ProductDescription },
                { "ProductLocation", ProductLocation },
                { "ShopId", _shopId },
                { "ProductPrice", ProductPrice },
                { "ProductPriceMax", ProductPriceMax },
                { "ProductQuantity", ProductQuantity },
                { "ReservePrice", ReservePrice },
                { "SellCountMonthly", SellCountMonthly },
                { "TotalCommentCount", TotalCommentCount },
                { "PromotionPrice", PromotionPrice },
                { "PromotionType", PromotionType },
                { "PromotionStartTime", PromotionStartTime },
                { "PromotionEndTime", PromotionEndTime },
                { "CategoryId", CategoryId },
                { "RootCatId", RootCatId },
                { "BrandId", BrandId },
                { "Brand", Brand },
                { "UserId", UserId },
                { "SpuId", SpuId },
                { "EncryptUserId", EncryptUserId },
                { "BossNickName", BossNickName },
                //{"ShopId" ,ShopId},
                //{"FanCount" ,FanCount},
                //{"CreditLevel" ,CreditLevel},
                //{"Content" ,Content}
            };

            resultList.Add(resut);

            return(resultList.ToArray());
        }