/// <summary> /// 解析当前元素 /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); //var cookies = string.Empty; //var htmlString = base.GetMainWebContent(CurrentUrl, null,ref cookies, null); var judgementsTitle = GetJudgementsTitle(HtmlSource); var judgementsPubDate = GetJudgementsPubDate(HtmlSource); var judgementsContent = GetJudgementsContent(HtmlSource); var judgementsContentHtml = GetJudgementsContentHtml(HtmlSource); var resut = new Resut { ["DocId"] = Regex.Match(CurrentUrl, @"(?<==).*").Value, ["Url"] = CurrentUrl, ["Title"] = judgementsTitle, ["PubDate"] = judgementsPubDate, ["Content"] = judgementsContent, ["ContentHtml"] = judgementsContentHtml }; resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var ListDic = GetAllResultListDic(); var checkTime = System.DateTime.Now; foreach (var dic in ListDic) { IResut resut = new Resut(); resut["Name"] = _name; resut["Identifier"] = _identifier; resut["UserName"] = dic["UserName"]; resut["CaseTime"] = dic["CaseTime"]; resut["CaseId"] = dic["CaseId"]; resut["Id"] = dic["Id"]; resut["Type"] = dic["Type"]; resut["State"] = dic["State"]; resut["View"] = string.Empty; resut["CheckTime"] = checkTime; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var jObject = JObject.Parse(GetPageConfig(HtmlSource)); var dicList = GetInfoDicList(jObject); var resultList = new List <IResut>(); foreach (var dic in dicList) { IResut resut = new Resut() { { "searchKeyword", dic["searchKeyword"] }, { "productId", dic["productId"] }, { "productName", dic["productName"] }, { "productPrice", dic["productPrice"] }, { "PaymentAcount", dic["PaymentAcount"] }, { "productType", dic["productType"] }, { "shopName", dic["shopName"] }, { "userMemberId", dic["userMemberId"] }, { "location", dic["location"] }, { "productPosition", dic["productPosition"] }, { "positionType", dic["positionType"] }, { "pageIndex", dic["pageIndex"] } }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); //var html = base.GetWebContent("https://feedback.aliexpress.com/display/evaluationList.htm",Encoding.UTF8.GetBytes(CurrentUrl),ref _cookies,null); var nameVipLevelList = GetNameVipLevelList(HtmlSource); var productNameList = GetProductNameList(HtmlSource); var totalPriceList = GetTotalPriceList(HtmlSource); var feedBackDateList = GetFeedBackDateList(HtmlSource); var feedBackContentList = GetFeedBackContentList(HtmlSource); var starMList = GetStarMList(HtmlSource); var productIdList = GetProductId(HtmlSource); var length = nameVipLevelList.Count; for (var i = 0; i < length; i++) { IResut resut = new Resut() { ["shopId"] = _shopId, ["nameVipLevel"] = nameVipLevelList[i], ["productName"] = productNameList[i], ["totalPrice"] = totalPriceList[i], ["feedBackDate"] = feedBackDateList[i], ["feedBackContent"] = feedBackContentList[i], ["starM"] = starMList[i], ["productId"] = productIdList[i] }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var itemList = GetItemList(); foreach (var item in itemList) { var dic = GetInformationByJToken(item); IResut resut = new Resut() { { "imageUrl", dic["imageUrl"] }, { "title", dic["title"] }, { "price", dic["price"] }, { "curPrice", dic["curPrice"] }, { "vipPrice", dic["vipPrice"] }, { "tradeNum", dic["tradeNum"] }, { "nick", _nick }, { "sellerId", dic["sellerId"] }, { "itemId", dic["itemId"] }, { "loc", dic["loc"] }, { "storeLink", dic["storeLink"] }, { "href", dic["href"] }, { "commend", dic["commend"] }, { "commendHref", dic["commendHref"] } }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// 解析当前元素 /// </summary> /// <param name="htmlSource"></param> /// <returns></returns> protected override IResut[] ParseCurrentItems(string htmlSource) { var resultList = new List <IResut>(); var docIds = GetDocId(htmlSource); var pubDate = GetOneDate(htmlSource); Console.WriteLine(pubDate); if (DateTime.Compare(_date, DateTime.Parse(pubDate)) < 0) { foreach (var docId in docIds) { IResut resut = new Resut(); resut["DocId"] = docId; resut["CaseType"] = base._pageInfo.CaseType; resut["ReasonLevel"] = base._pageInfo.ReasonLevel; resut["ReasonKey"] = base._pageInfo.ReasonKey; resut["CourtLevel"] = base._pageInfo.CourtLevel; resut["CourtKey"] = base._pageInfo.CourtKey; resut["Year"] = base._pageInfo.Year; resultList.Add(resut); } } else { _done = 1; } return(resultList.ToArray()); }
/// <summary> /// 解析出商品 /// </summary> /// <param name="htmlSource">The HTML source.</param> /// <param name="listOnly">仅解析出列表,不解析价格等需要再次访问网络的内容.</param> /// <returns></returns> private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false) { /* #if DEBUG * htmlSource = ""; * var htmlSources = File.ReadAllLines(@"C:\Users\Administrator\Desktop\htmlSource.txt",System.Text.Encoding.UTF8); * for (int i=0;i< htmlSources.Length;i++) * { * htmlSource += htmlSources[i]; * } * #endif */ const string SkuIdKey = "ProductSku"; var resultList = new List <IResut>(); var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource); var iterator = navigator.Select(@"//ul/li"); foreach (XPathNavigator item in iterator) { var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()"); var href = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href"); if (string.IsNullOrEmpty(title)) { title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()"); href = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href"); } var imgSrc = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original"); var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=\.html)"); var sku = skuMatchResults.Success ? skuMatchResults.Value : string.Empty; if (string.IsNullOrEmpty(sku)) { continue; } // 评价数据 var comments = ParseComments(item); IResut resut = new Resut(); resut[SkuIdKey] = sku; resut["ShopId"] = ShopUrl; resut["ProductName"] = title; resut["ProductUrl"] = href; resut["ProductImage"] = imgSrc; resut["ProductComments"] = comments; resultList.Add(resut); } if (!listOnly) { this.UpdateResultsPrices(resultList, SkuIdKey); } return(resultList.ToArray()); }
/// <summary> /// 解析当前页的所有产品信息 /// </summary> /// <param name="htmlSource"></param> /// <param name="listOnly"></param> /// <returns></returns> private IResut[] ParseCurrentItems(string htmlSource, bool listOnly = false) { const string SkuIdKey = "ProductSku"; var resultList = new List <IResut>(); // 返回xpath查询器 var navigator = HtmlDocumentHelper.CreateNavigator(htmlSource); var iterator = navigator.Select(@"//ul/li"); foreach (XPathNavigator item in iterator) { var title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']/a/text()"); var href = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jDesc']//@href"); if (string.IsNullOrEmpty(title)) { title = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']/a/text()"); href = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jTitle']//@href"); } //HtmlDocumentHelper.GetNodeValue(item,".//div[@class='jPic']//@original") var imgSrc = HtmlDocumentHelper.GetNodeValue(item, ".//div[@class='jPic']//@original"); if (imgSrc.Equals(string.Empty)) { imgSrc = HtmlDocumentHelper.GetNodeValue(htmlSource, ".//div[@class='jPic']//@src"); } var skuMatchResults = Regex.Match(href, @"(?<=/)\d+(?=.html)"); var sku = skuMatchResults.Success ? skuMatchResults.Value : string.Empty; if (string.IsNullOrEmpty(sku)) { continue; } // 评价数据 var comments = ParseComments(item); IResut resut = new Resut(); resut[SkuIdKey] = sku; resut["ShopUrl"] = this.ShopUrl; resut["ProductName"] = title; resut["ProductUrl"] = href; resut["ProductImage"] = imgSrc; resut["ProductComments"] = comments; resultList.Add(resut); } if (!listOnly) { this.UpdateResultsPrices(resultList, SkuIdKey); } return(resultList.ToArray()); }
/// <summary> /// 开始解析 /// </summary> /// <param name="webContent">Content of the web.</param> /// <returns></returns> public IResut Parse(string webContent) { IResut resut = new Resut(); var navigator = HtmlDocumentHelper.CreateNavigator(webContent); ParseShopScoreResult(resut, navigator); throw new NotImplementedException(); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var partHtml = Regex.Match(HtmlSource, @"<div id=""normal_dealbox""[\s\S]*?(?=<div class= ""page_div clear area page_bottom"">)").Value; var productNameCollection = Regex.Matches(partHtml, "(?<=title=\").*?(?=\")"); var urlCollection = Regex.Matches(partHtml, @"(?<=<h3>\s*<a target=""_blank"" href="")[\S]*?(?="")"); var priceCollection = Regex.Matches(partHtml, "(?<=<em><b>¥</b>).*?(?=</em>)"); var maxPriceCollection = Regex.Matches(partHtml, "(?<=<del class=\"list_price\">¥).*?(?=</del>)"); var count = productNameCollection.Count; if (count != urlCollection.Count || count != priceCollection.Count || count != maxPriceCollection.Count) { throw new Exception("开始的条数不匹配"); } for (var i = 0; i < count; i++) { var resut = new Resut { ["ProductName"] = productNameCollection[i].ToString(), ["Url"] = urlCollection[i].ToString(), //促销价格 ["Price"] = priceCollection[i].ToString(), //最大价格 ["MaxPrice"] = maxPriceCollection[i].ToString() }; resultList.Add(resut); } var jsonString = Regex.Match(HtmlSource, "(?<=window.setDeals = ){.*}(?=;)").Value; //.Replace(@"""\",@"\").Replace(@"}""",@"}").Replace(@"""{","{"); var jObject = JObject.Parse(jsonString); var jArray = JArray.Parse(jObject["deals"].ToString()); foreach (var jToken in jArray) { var urlName = jToken["url_name"].ToString(); var id = jToken["id"].ToString(); var title = jToken["title"].ToString(); var wuxianPrice = jToken["wuxian_price"].ToString(); var listPrice = jToken["list_price"].ToString(); var resut = new Resut { ["ProductName"] = title, ["Url"] = $"//out.zhe800.com/ju/deal/{urlName}_{id}", //促销价格 ["Price"] = wuxianPrice, //最大价格 ["MaxPrice"] = listPrice }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// 解析当前元素 /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var licenceDic = GetLicenceDic(HtmlSource); var resut = new Resut(); foreach (var licence in licenceDic) { resut.Add(licence.Key, licence.Value); } resut.Add("Url", CurrentUrl); return(new IResut[] { resut }); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var itemList = GetItemList(CurrentUrl); var stringEmpty = string.Empty; var SearchKeyword = _q; var ShopName = stringEmpty; var UserMemberId = stringEmpty; var index = 1; foreach (var item in itemList) { var dic = GetInformationByJToken(item); IResut resut = new Resut() { { "SearchKeyword", SearchKeyword }, { "ProductId", dic["ProductId"] }, { "PositionType", dic["PositionType"] }, { "PageIndex", CurrentPage }, { "ProductPosition", index.ToString() } }; //IResut resut = new Resut() //{ // {"SearchKeyword", SearchKeyword}, // {"ProductId", dic["ProductId"] }, // {"ProductName", dic["ProductName"]}, // {"ProductPrice", dic["ProductPrice"]}, // {"PaymentAcount", dic["PaymentAcount"]}, // {"ProductType", dic["ProductType"]}, // {"ShopName", ShopName}, // {"UserMemberId",UserMemberId }, // {"Location",dic["Location"]}, // {"PositionType",dic["PositionType"] }, // {"PageIndex",CurrentPage }, // {"ProductPosition",index.ToString() } //}; resultList.Add(resut); index++; } return(resultList.ToArray()); }
/// <summary> /// 将一个 jobject 值转为 result 值 /// </summary> /// <param name="jObject">The j object.</param> /// <returns></returns> private IResut ConvertToResult(JObject jObject) { IResut resut = new Resut(); var properties = jObject.Properties(); foreach (var property in properties) { resut[property.Name] = property.Value?.Value <string>() ?? string.Empty; // jObject[property].Value<string>(); } return(resut); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var jObject = JObject.Parse(GetPageConfig(HtmlSource)); var dicList = GetInfoDicList(jObject); var resultList = new List <IResut>(); foreach (var dic in dicList) { IResut resut = new Resut() { ["ShopId"] = dic["ShopId"], ["ShopName"] = dic["ShopName"], ["ShopUrl"] = dic["ShopUrl"], ["ShopLogoUrl"] = dic["ShopLogoUrl"], ["MarketName"] = dic["MarketName"], ["ShopRank"] = dic["ShopRank"], ["Location"] = dic["Location"], ["InCountry"] = dic["InCountry"], ["InProvince"] = dic["InProvince"], ["InCity"] = dic["InCity"], ["BossNickName"] = dic["BossNickName"], ["EncryptedUserId"] = dic["EncryptedUserId"], ["MainBiz"] = dic["MainBiz"], ["MainIndustry"] = dic["MainIndustry"], ["SaleCount"] = dic["SaleCount"], ["ProductCount"] = dic["ProductCount"], ["GoodCommentCount"] = dic["GoodCommentCount"], ["GoodCommentRate"] = dic["GoodCommentRate"], ["Comment_MatchDescrip"] = dic["Comment_MatchDescrip"], ["Comment_MatchDescripRate"] = dic["Comment_MatchDescripRate"], ["Comment_ServiceStatue"] = dic["Comment_ServiceStatue"], ["Comment_ServiceStatueRate"] = dic["Comment_ServiceStatueRate"], ["Comment_ShipSpeed"] = dic["Comment_ShipSpeed"], ["Comment_ShipSpeedRate"] = dic["Comment_ShipSpeedRate"], ["Attribute_BuyProtect"] = dic["Attribute_BuyProtect"], ["Attribute_GlobalBuy"] = dic["Attribute_GlobalBuy"], ["Attribute_GoldenSale"] = dic["Attribute_GoldenSale"], ["SearchKeyword"] = dic["SearchKeyword"] }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { List <IResut> resultList = new List <IResut>(); HtmlNode htmlNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(HtmlSource); HtmlNodeCollection htmlNodeCollection = htmlNode.SelectNodes("//td[@class='Font9']"); foreach (HtmlNode node in htmlNodeCollection) { string url = node.SelectSingleNode("./a[@class='five']")?.Attributes["href"]?.Value; string dateTimeString = Regex.Match(node.InnerText, @"\d+-\d+-\d+").Value; if (string.IsNullOrEmpty(url) || string.IsNullOrEmpty(dateTimeString)) { break; } url = $"http://www.ccgp-shandong.gov.cn{url}"; DateTime dateTime = Convert.ToDateTime(dateTimeString); int days = (DateTime.Now - dateTime).Days; if (days > _gatherDays) { _urlQueue.Clear(); break; } string html = _httpHelper.GetHtmlByGet(url); HtmlNode htmlNode2 = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); string title = htmlNode2.SelectSingleNode("//div[@align='center']")?.InnerText; string publisher = Regex.Match(html, "(?<=发布人[::]).*(?=</td>)").Value; string publishTime = Regex.Match(html, "(?<=发布时间[::]).*(?=</td>)").Value; publishTime = Convert.ToDateTime(publishTime).ToString(CultureInfo.CurrentCulture); //string content = htmlNode2.SelectSingleNode("//td[@bgcolor='#FFFFFF' and @align='center' and not(@valign)]").InnerText.Trim(); //content = HttpUtility.HtmlDecode(Regex.Match(content, @".*(?=\r\n)").Value); string content = htmlNode2.SelectSingleNode("//table//tr[2]/td[2]/table").OuterHtml; Resut resut = new Resut() { ["url"] = url, ["title"] = title, ["content"] = content, ["publisher"] = publisher, ["publishTime"] = publishTime }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var basic = GetBasicInfo(_orgId); var manager = GetManagerInfo(_orgId); var equity = GetEquityHtml(_orgId); IResut resut = new Resut() { { "keyWord", _keyWord }, { "basic", basic }, { "manage", manager }, { "equity", equity } }; resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// 解析当前元素 /// </summary> /// <param name="htmlSource"></param> /// <returns></returns> protected virtual IResut[] ParseCurrentItems(string htmlSource) { var resultList = new List <IResut>(); var docIds = GetDocId(htmlSource); foreach (var docId in docIds) { IResut resut = new Resut(); resut["DocId"] = docId; resut["CaseType"] = _pageInfo.CaseType; resut["ReasonLevel"] = _pageInfo.ReasonLevel; resut["ReasonKey"] = _pageInfo.ReasonKey; resut["CourtLevel"] = _pageInfo.CourtLevel; resut["CourtKey"] = _pageInfo.CourtKey; resut["Year"] = _pageInfo.Year; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// 解析出当前值 /// </summary> /// <returns>IResut[].</returns> /// <exception cref="System.NotImplementedException"></exception> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var itemList = GetItemList(CurrentUrl); var activityType = _acvivityEnum.GetType(); foreach (var item in itemList) { var dic = GetInformationByJson(item); IResut resut = new Resut() { { "itemId", dic["itemId"] }, { "title", dic["title"] }, { "reservePrice", dic["reservePrice"] }, { "discountPrice", dic["discountPrice"] }, { "discount", dic["discount"] }, { "currentSellOut", dic["currentSellOut"] }, { "quantity", dic["quantity"] }, { "currentQuantity", dic["currentQuantity"] }, { "activityStartTime", dic["activityStartTime"] }, { "activityEndTime", dic["activityEndTime"] }, { "shopId", dic["shopId"] }, { "shopName", dic["shopName"] }, { "type", _acvivityEnum.ToString() } }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var itemList = GetItemList(CurrentUrl); var activityType = _acvivityEnum.GetType(); foreach (var item in itemList) { var dic = GetInformationByJson(item); IResut resut = new Resut() { { "title", dic["title"] }, { "description", dic["description"] }, { "itemId", dic["itemId"] }, { "ostimeText", dic["ostimeText"] }, { "leftTime", dic["leftTime"] }, { "totalStock", dic["totalStock"] }, { "soldAmount", dic["soldAmount"] }, { "itemUrl", dic["itemUrl"] }, { "actPrice", dic["actPrice"] }, { "origPrice", dic["origPrice"] }, { "discount", dic["discount"] }, { "type", _acvivityEnum.ToString() } }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// 解析当前的元素 /// </summary> /// <returns></returns> protected IResut[] ParseCurrentItems(string htmlSource) { // efea2774-b647-11e3-84e9-5cf3fc0c2c18 eff7f53c-b647-11e3-84e9-5cf3fc0c2c18 f096e352-b647-11e3-84e9-5cf3fc0c2c18 f06ab91c-b647-11e3-84e9-5cf3fc0c2c18 f0750746-b647-11e3-84e9-5cf3fc0c2c18 // http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID=efea2774-b647-11e3-84e9-5cf3fc0c2c18 这里找内容 var resultList = new List <IResut>(); var docIds = GetDocId(htmlSource); foreach (var docId in docIds) { var cookies = string.Empty; var url = $"http://wenshu.court.gov.cn/CreateContentJS/CreateContentJS.aspx?DocID={docId}"; //var htmlString = GetHtmlFromGet(url,Encoding.UTF8); var htmlString = base.GetWebContent(url); var judgementsTitle = GetJudgementsTitle(htmlString); var judgementsPubDate = GetJudgementsPubDate(htmlString); var judgementContent = GetJudgementsContent(htmlString); IResut resut = new Resut(); resut["DocId"] = docId; resut["Url"] = url; resut["Reason"] = _pageInfo.CaseType; resut["Court"] = _pageInfo.Area; resut["Year"] = _pageInfo.Year; resut["Title"] = judgementsTitle; resut["PubDate"] = judgementsPubDate; resut["Content"] = judgementContent; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var html = _httpHelper.GetHtmlByGet(CurrentUrl); var documentNode = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); var htmlNodeCollection = documentNode.SelectNodes("//div[@class='ui-box-body']//li[@class='item']"); if (htmlNodeCollection == null) { return(resultList.ToArray()); } foreach (var htmlNode in htmlNodeCollection) { IResut resut = new Resut(); var productName = HttpUtility.HtmlDecode(htmlNode.SelectSingleNode(".//div[@class='detail']//a")?.InnerText ?? string.Empty); var productUrl = htmlNode.SelectSingleNode(".//div[@class='detail']//a")?.Attributes["href"].Value ?? string.Empty; var productId = Regex.Match(Regex.Match(productUrl, @"\d+_\d+").Value, @"(?<=_)\d+").Value; var price = Regex.Match(htmlNode.SelectSingleNode(".//b")?.InnerText ?? string.Empty, @"\d+\.?\d+").Value; var priceOld = Regex.Match(htmlNode.SelectSingleNode(".//del")?.InnerText ?? string.Empty, @"\d+\.?\d+").Value; var orderNum = Regex.Match(htmlNode.SelectSingleNode(".//div[@class='recent-order']")?.InnerText ?? string.Empty, @"\d+").Value; resut.Add("shopId", _shopId); resut.Add("productName", productName); resut.Add("productUrl", $"https:{productUrl}"); resut.Add("productId", productId); resut.Add("price", FormatNumber(price)); resut.Add("priceOld", FormatNumber(priceOld)); resut.Add("orderNum", FormatNumber(orderNum)); resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// 解析当前元素 /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var stringEmpty = string.Empty; var errorNotice = stringEmpty; var bossNickName = stringEmpty; string goodCommentCount = null; var mainIndustry = stringEmpty; var Comment_MatchDescrip = stringEmpty; var Comment_MatchDescripRate = stringEmpty; var Comment_ServiceStatue = stringEmpty; var Comment_ServiceStatueRate = stringEmpty; var Comment_ShipSpeed = stringEmpty; var Comment_ShipSpeedRate = stringEmpty; var marginCharge = stringEmpty; var shopRank = stringEmpty; var location = stringEmpty; var saleCount = stringEmpty; var productCount = stringEmpty; var goodCommentRate = stringEmpty; var mainBiz = stringEmpty; var shopAge = "0"; var intDefault = 0; var dateDefault = DateTime.Parse("1990-01-01 00:00:00"); var shopStartDate = dateDefault; var resultList = new List <IResut>(); var shopId = GetShopId(HtmlSource); var userId = GetUserId(HtmlSource); var shopName = GetShopName(HtmlSource); _shopType = GetShopType(HtmlSource); if (shopName.Equals(stringEmpty) || shopName.Equals("店铺浏览")) { errorNotice = GetErrorNotice(HtmlSource); if (errorNotice.Equals(stringEmpty)) { errorNotice = "不支持的店铺类型"; } } else if (shopName.Equals("旺铺关闭页面")) { errorNotice = "店铺不符合开通条件"; } else if (shopName.Contains("页面找不到了")) { errorNotice = "页面找不到了"; } else { marginCharge = GetMarginCharge(HtmlSource); shopAge = GetShopAge(HtmlSource, _shopType); shopStartDate = GetShopStartDate(HtmlSource, _shopType); //var shopKeeperTest = GetShopKeeper(HtmlSource, _shopType); //把shopname编码成url中能够识别的编码 不然在url里#这些特殊字符会出错 var shopNameEncoding = System.Web.HttpUtility.UrlEncode(shopName); var url = $"https://shopsearch.taobao.com/search?app=shopsearch&q={shopNameEncoding}"; var htmlString = GetWebContent(url); //httpHelper.Cookies = "thw=cn;"; //var htmlString = _httpHelper.GetHtmlByGet(url); //用userId匹配符合的那段 用shopId也可以 var tempToken = GetContentJsonStringByUserId(htmlString, userId); //var tempTokenString = tempToken.ToString(); if (tempToken == null) { var shopKeeper = GetShopKeeper(HtmlSource, _shopType); url = $"https://shopsearch.taobao.com/search?app=shopsearch&q={System.Web.HttpUtility.UrlEncode(shopKeeper)}"; htmlString = GetWebContent(url); tempToken = GetContentJsonStringByUserId(htmlString, userId); } if (tempToken != null) { var tempString = tempToken.ToString(); bossNickName = tempToken["nick"]?.ToString(); goodCommentCount = GetGoodCommentCount(tempString); mainIndustry = GetMainIndustry(tempString); Comment_MatchDescrip = GetComment_MatchDescrip(tempString); Comment_MatchDescripRate = GetComment_MatchDescripRate(tempString); Comment_ServiceStatue = GetComment_ServiceStatue(tempString); Comment_ServiceStatueRate = GetComment_ServiceStatueRate(tempString); Comment_ShipSpeed = GetComment_ShipSpeed(tempString); Comment_ShipSpeedRate = GetComment_ShipSpeedRate(tempString); shopRank = tempToken["shopIcon"]?["iconClass"]?.ToString(); location = tempToken["provcity"]?.ToString(); saleCount = tempToken["totalsold"]?.ToString(); productCount = tempToken["procnt"]?.ToString(); goodCommentRate = tempToken["goodratePercent"]?.ToString().Replace("%", ""); mainBiz = tempToken["mainAuction"]?.ToString(); } else { errorNotice = "店铺存在但搜索不到"; } } if (!string.IsNullOrEmpty(errorNotice)) { _shopType = "0"; } //System.Func<string, string> GetIntDefault = key => { return string.IsNullOrEmpty(key) ? "0" : key; }; Resut resut = new Resut { //店铺ID ["ShopId"] = shopId, //店铺名 ["ShopName"] = shopName, //店铺名2 ["ShopName2"] = shopName, //旺旺号 ["BossName"] = bossNickName, //旺旺号的昵称 ["BossNickName"] = bossNickName, //公司名称 ["CompanyName"] = stringEmpty, //开店时间 ["ShopStartDate"] = shopStartDate, //ShpAgeNum ["ShpAgeNum"] = shopAge, //采集入口参数 ["ShopUrl"] = GetUrlFormat(CurrentUrl), //好评数 ["GoodCommentCount"] = goodCommentCount, //主营行业 ["MainIndustry"] = mainIndustry, //描述相符 ["Comment_MatchDescrip"] = GetIntDefault(Comment_MatchDescrip), //描述相符率 ["Comment_MatchDescripRate"] = GetIntDefault(Comment_MatchDescripRate), //服务态度 ["Comment_ServiceStatue"] = GetIntDefault(Comment_ServiceStatue), //服务态度率 ["Comment_ServiceStatueRate"] = GetIntDefault(Comment_ServiceStatueRate), //物流服务 ["Comment_ShipSpeed"] = GetIntDefault(Comment_ShipSpeed), //物流服务率 ["Comment_ShipSpeedRate"] = GetIntDefault(Comment_ShipSpeedRate), //保证金 ["MarginCharge"] = GetIntDefault(marginCharge), //店铺等级 ["ShopRank"] = shopRank, //所在位置 ["Location"] = location, //销售数量 ["SaleCount"] = GetIntDefault(saleCount), //产品数量 ["ProductCount"] = GetIntDefault(productCount), //好评率 ["GoodCommentRate"] = GetIntDefault(goodCommentRate), //主营产品 ["MainBiz"] = mainBiz, //店铺类型 ["DayMonitor"] = _shopType, ["Loaned"] = intDefault, ["targetuid"] = _targetUid, //当前店铺状态 ["Error_Notice"] = errorNotice }; resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// 解析当前项目 /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var shopId = GetShopId(HtmlSource); var shopName = GetShopName(HtmlSource); if (shopName.Equals("1688.com,阿里巴巴打造的全球最大的采购批发平台")) { SendLog("发现被屏蔽,暂停30s"); Thread.Sleep(30000); throw new Exception("被屏蔽了!"); } var shopName2 = GetShopName2(HtmlSource); var bossNickName = GetBossName(HtmlSource); var shpAgeNum = GetShpAgeNum(HtmlSource); shpAgeNum = GetIntDefault(shpAgeNum); var mainBiz = GetMainBiz(HtmlSource); var location = GetLocation(HtmlSource); var productCount = GetProductCount(HtmlSource); var shopRank = GetShopRank(HtmlSource); //var dicComment = GetComment(HtmlSource); IDictionary <string, string> dicComment = !shopName.Contains("旺铺关闭") && !shopName.Equals("404-阿里巴巴") && !shopName.Equals("违规下架") ? GetComment() : new Dictionary <string, string>(); var stringEmpty = string.Empty; var intDefault = 0; var dateDefault = DateTime.Parse("1990-01-01 00:00:00"); //if (int.Parse(shpAgeNum) != 0) //{ // dateDefault = DateTime.Now.AddYears(-int.Parse(shpAgeNum)); //} //旺铺关闭页面-未达到 var errorNotice = stringEmpty; if (shopName.Equals("违规下架")) { errorNotice = "违规下架"; } else if (shopName.Contains("旺铺关闭") || shopName.Equals("404-阿里巴巴")) { errorNotice = shopName; } var dayMonitor = string.IsNullOrEmpty(errorNotice) ? "1" : "0"; var resut = new Resut { //店铺ID ["ShopId"] = shopId, //店铺名 ["ShopName"] = shopName, //店铺名2 ["ShopName2"] = shopName2, //旺旺号 ["BossName"] = bossNickName, //旺旺号的昵称 ["BossNickName"] = bossNickName, //公司名称 ["CompanyName"] = shopName, //开店时间 ["ShopStartDate"] = dateDefault, //ShpAgeNum ["ShpAgeNum"] = shpAgeNum, //采集入口参数 ["ShopUrl"] = GetUrlFormat(CurrentUrl), //好评数 ["GoodCommentCount"] = intDefault, //主营行业 ["MainIndustry"] = stringEmpty, //描述相符 ["Comment_MatchDescrip"] = intDefault, //描述相符率 ["Comment_MatchDescripRate"] = dicComment.ContainsKey("Comment_MatchDescripRate") ? GetIntDefault(dicComment["Comment_MatchDescripRate"]) : "0", //服务态度 ["Comment_ServiceStatue"] = intDefault, //服务态度率 ["Comment_ServiceStatueRate"] = dicComment.ContainsKey("Comment_ServiceStatueRate") ? GetIntDefault(dicComment["Comment_ServiceStatueRate"]) : "0", //物流服务 ["Comment_ShipSpeed"] = intDefault, //物流服务率 ["Comment_ShipSpeedRate"] = dicComment.ContainsKey("Comment_ShipSpeedRate") ? GetIntDefault(dicComment["Comment_ShipSpeedRate"]) : "0", //保证金 ["MarginCharge"] = intDefault, //店铺等级 ["ShopRank"] = shopRank, //所在位置 ["Location"] = location, //销售数量 ["SaleCount"] = intDefault, //产品数量 ["ProductCount"] = GetIntDefault(productCount), //好评率 ["GoodCommentRate"] = intDefault, //主营产品 ["MainBiz"] = mainBiz, ["DayMonitor"] = dayMonitor, ["Loaned"] = intDefault, ["targetuid"] = stringEmpty, //当前店铺状态 ["Error_Notice"] = errorNotice }; resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// 解析当前元素 /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var shopIsExist = GetShopIsExist(HtmlSource); Resut resut; if (shopIsExist.Equals("0")) { var stringEmpty = string.Empty; resut = new Resut { //店铺ID ["ShopId"] = _shopId, //品牌简介链接 ["BrandProfile"] = stringEmpty, //品牌 ["ShopIsExist"] = shopIsExist, //公司名称 ["ShopName"] = stringEmpty, //所在地 ["Location"] = stringEmpty, //电话 ["Phone"] = stringEmpty, //违章次数 ["IllegalRecord"] = stringEmpty, //店铺综合评分 ["Comment_General"] = stringEmpty, //店铺综合评分比率 ["Comment_GeneralRate"] = stringEmpty, //店铺质量满意度 ["Comment_MatchDescrip"] = stringEmpty, //店铺质量满意度比率 ["Comment_MatchDescripRate"] = stringEmpty, //服务态度满意度 ["Comment_ServiceStatue"] = stringEmpty, //服务态度满意度比率 ["Comment_ServiceStatueRate"] = stringEmpty, //物流速度满意度 ["Comment_ShipSpeed"] = stringEmpty, //物流速度满意度比率 ["Comment_ShipSpeedRate"] = stringEmpty, //商品描述满意度 ["Comment_ProductDescrip"] = stringEmpty, //商品描述满意度比率 ["Comment_ProductDescripRate"] = stringEmpty, //退换货处理满意度 ["Comment_ReturnGoods"] = stringEmpty, //退换货处理满意度比率 ["Comment_ReturnGoodsRate"] = stringEmpty, //售后处理时长 ["Servece_AfterSales"] = stringEmpty, //售后处理时长比率 ["Servece_AfterSalesRate"] = stringEmpty, //交易纠纷率 ["Service_TradeDispute"] = stringEmpty, //交易纠纷率比率 ["Service_TradeDisputeRate"] = stringEmpty, //退换货返修率 ["Service_ReturnRepair"] = stringEmpty, //退换货返修率比率 ["Service_ReturnRepairRate"] = stringEmpty, //关注人数 ["FollowNumber"] = stringEmpty, //全部商品 ["ProductsNum"] = stringEmpty, //上新 ["NewProducts"] = stringEmpty, //促销 ["PromotionNum"] = stringEmpty, //开店时间 ["OpenTime"] = stringEmpty }; } else { var brandProfile = GetBrandProfile(HtmlSource); var shopName = GetShopName(HtmlSource); var location = GetLocation(HtmlSource); var phone = GetPhone(HtmlSource); var illegalRecord = GetIllegalRecord(HtmlSource); var shopComment = GetShopComment(HtmlSource); var shopService = GetShopService(HtmlSource); var url = $"http://shop.m.jd.com/detail/detail?shopId={_shopId}"; var html = base.GetWebContent(url); var followNumber = GetFollowNumber(html); var totalNumDic = GetTotalNumDic(html); var openTime = GetOpenTime(html); resut = new Resut { //店铺ID ["ShopId"] = _shopId, //品牌简介链接 ["BrandProfile"] = brandProfile, //品牌 ["ShopIsExist"] = shopIsExist, //公司名称 ["ShopName"] = shopName, //所在地 ["Location"] = location, //电话 ["Phone"] = phone, //违章次数 ["IllegalRecord"] = illegalRecord, //店铺综合评分 ["Comment_General"] = shopComment["Comment_General"], //店铺综合评分比率 ["Comment_GeneralRate"] = shopComment["Comment_GeneralRate"], //店铺质量满意度 ["Comment_MatchDescrip"] = shopComment["Comment_MatchDescrip"], //店铺质量满意度比率 ["Comment_MatchDescripRate"] = shopComment["Comment_MatchDescripRate"], //服务态度满意度 ["Comment_ServiceStatue"] = shopComment["Comment_ServiceStatue"], //服务态度满意度比率 ["Comment_ServiceStatueRate"] = shopComment["Comment_ServiceStatueRate"], //物流速度满意度 ["Comment_ShipSpeed"] = shopComment["Comment_ShipSpeed"], //物流速度满意度比率 ["Comment_ShipSpeedRate"] = shopComment["Comment_ShipSpeedRate"], //商品描述满意度 ["Comment_ProductDescrip"] = shopComment["Comment_ProductDescrip"], //商品描述满意度比率 ["Comment_ProductDescripRate"] = shopComment["Comment_ProductDescripRate"], //退换货处理满意度 ["Comment_ReturnGoods"] = shopComment["Comment_ReturnGoods"], //退换货处理满意度比率 ["Comment_ReturnGoodsRate"] = shopComment["Comment_ReturnGoodsRate"], //售后处理时长 ["Servece_AfterSales"] = shopService["Servece_AfterSales"], //售后处理时长比率 ["Servece_AfterSalesRate"] = shopService["Servece_AfterSalesRate"], //交易纠纷率 ["Service_TradeDispute"] = shopService["Service_TradeDispute"], //交易纠纷率比率 ["Service_TradeDisputeRate"] = shopService["Service_TradeDisputeRate"], //退换货返修率 ["Service_ReturnRepair"] = shopService["Service_ReturnRepair"], //退换货返修率比率 ["Service_ReturnRepairRate"] = shopService["Service_ReturnRepairRate"], //关注人数 ["FollowNumber"] = followNumber, //全部商品 ["ProductsNum"] = totalNumDic["ProductsNum"], //上新 ["NewProducts"] = totalNumDic["NewProductsNum"], //促销 ["PromotionNum"] = totalNumDic["PromotionNum"], //开店时间 ["OpenTime"] = openTime }; } resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List<IResut>(); Newtonsoft.Json.Serialization.Func<string, string> getFormatProductId = productId => Regex.Match(productId, @"(?<=\\"").*(?=\\"")").Value; Newtonsoft.Json.Serialization.Func<string, string> getFormatProductName = productName => productName.Trim(); Newtonsoft.Json.Serialization.Func<string, string> getFormatProductUrl = productUrl => $"https:{Regex.Match(productUrl, @"(?<=\\"").*(?=\\"")").Value}"; //var html = Regex.Match(HtmlSource, @"<div class=\\""J_TItems\\"">[\s\S]*?<div class=\\""pagination\\"">").Value; //var docmentNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); //var htmlNodeCollection = docmentNode.SelectNodes(@"//div[@class='\""item4line1\""']//dl") ?? // docmentNode.SelectNodes(@"//div[@class='\""item5line1\""']//dl")?? // docmentNode.SelectNodes(@"//div[@class='\""item30line1\""']//dl"); //var divNodes = docmentNode.SelectNodes(@"//div"); //Console.WriteLine(new string('=', 64)); //foreach (var divNode in divNodes) //{ // var classValue = divNode.GetAttributeValue(@"class", string.Empty); // Console.WriteLine($"classvalue: {classValue}"); //} //Console.WriteLine(new string('-', 64)); //用matches和ends-with都提示需要命名空间管理器或 XsltContext。此查询具有前缀、变量或用户定义的函数。还没解决这个问题 //var htmlNodeCollection = docmentNode.SelectNodes(@"//div[matches(@class,'\""item\d+line1\""')]//dl"); //var htmlNodeCollection = docmentNode.SelectNodes("//div[starts-with(@class,'\\\"item')]//dl"); //var htmlNodeCollection = docmentNode.SelectNodes("//div[ends-with(@class,'line1\\\"')]//dl"); var docmentNode = HtmlAgilityPackHelper.GetDocumentNodeByHtml(HtmlSource); if (_isInHtml) { var htmlNodeCollection = docmentNode.SelectNodes(@"//div[@class='pagination']/parent::div/child::div") ??docmentNode.SelectNodes(@"//div[@class='comboHd']/parent::div/child::div")?? docmentNode.SelectNodes(@"//div[contains(@class,'item') and contains(@class,'line1')]//dl"); foreach (var htmlNode in htmlNodeCollection) { var attributes = htmlNode.Attributes["class"].Value; //退出 后面的推荐产品不要了 if (attributes == @"pagination") break; if (attributes == @"comboHd") { //清空队列 _urlQueue.Clear(); break; } if (attributes.Contains(@"item") && attributes.Contains(@"line1")) { var htmlNodeDls = htmlNode.SelectNodes(".//dl"); foreach (var htmlNodeDl in htmlNodeDls) { var detailNode = htmlNodeDl.SelectSingleNode( @".//dd[@class='detail']//a[@class='item-name J_TGoldData']"); var productName = getFormatProductName(detailNode.InnerText); var productUrl = detailNode.Attributes["href"].Value; var productId = Regex.Match(productUrl, @"(?<=id=)\d+").Value; //如果hash表不包含的productId if (!_hashTable.ContainsKey(productId)) { //productId加入到hash表中 _hashTable.Add(productId, null); //Console.WriteLine($"shopId:{productId},shopName:{productName},productUrl:{productUrl}。"); var price = htmlNodeDl.SelectSingleNode(@".//span[@class='c-price']")?.InnerText.Trim(); string maxPrice = null; var saleNum = htmlNodeDl.SelectSingleNode(@".//span[@class='sale-num']")?.InnerText.Trim(); var comment = htmlNodeDl.SelectSingleNode(@".//h4/a/span")?.InnerText; comment = comment == null ? null : Regex.Match(comment, @"\d+").Value; var resut = new Resut { ["productId"] = productId, ["productName"] = productName, ["productUrl"] = productUrl, ["shopId"] = _shopUrl, ["shopName"] = _shopName, ["price"] = price, ["maxPrice"] = maxPrice, ["saleNum"] = saleNum, ["comment"] = comment }; resultList.Add(resut); } } } //ProductId //PrdouctName //ProductUrl //ShopId //ShopName } } else { var htmlNodeCollection = docmentNode.SelectNodes( @"//div[@class='\""pagination\""']/parent::div/child::div") ?? docmentNode.SelectNodes(@"//div[@class='\""comboHd\""']/parent::div/child::div"); //var htmlNodeCollection = docmentNode.SelectNodes(@"//div[contains(@class,'\""item') and contains(@class,'line1\""')]//dl"); foreach (var htmlNode in htmlNodeCollection) { var attributes = htmlNode.Attributes["class"].Value; //退出 后面的推荐产品不要了 if (attributes == @"\""pagination\""") break; if (attributes == @"\""comboHd\""") { //清空队列 _urlQueue.Clear(); break; } if (attributes.Contains(@"\""item") && attributes.Contains(@"line1\""")) { var htmlNodeDls = htmlNode.SelectNodes(".//dl"); foreach (var htmlNodeDl in htmlNodeDls) { var productId = getFormatProductId(htmlNodeDl.Attributes["data-id"].Value); //如果hash表不包含的productId if (!_hashTable.ContainsKey(productId)) { //productId加入到hash表中 _hashTable.Add(productId, null); var detailNode = htmlNodeDl.SelectSingleNode( @".//dd[@class='\""detail\""']//a[@class='\""item-name']"); var productName = getFormatProductName(detailNode.InnerText); var productUrl = getFormatProductUrl(detailNode.Attributes["href"].Value); //Console.WriteLine($"shopId:{productId},shopName:{productName},productUrl:{productUrl}。"); var price = htmlNodeDl.SelectSingleNode(@".//span[@class='\""c-price\""']")?.InnerText.Trim(); var maxPrice = htmlNodeDl.SelectSingleNode(@".//span[@class='\""s-price\""']")?.InnerText.Trim(); var saleNum = htmlNodeDl.SelectSingleNode(@".//span[@class='\""sale-num\""']")?.InnerText.Trim(); var comment = htmlNodeDl.SelectSingleNode(@".//div[@class='\""title\""']")?.InnerText; comment = comment == null ? null : Regex.Match(comment, @"\d+").Value; var resut = new Resut { ["productId"] = productId, ["productName"] = productName, ["productUrl"] = productUrl, ["shopId"] = _shopUrl, ["shopName"] = _shopName, ["price"] = price, ["maxPrice"] = maxPrice, ["saleNum"] = saleNum, ["comment"] = comment }; resultList.Add(resut); } } } //ProductId //PrdouctName //ProductUrl //ShopId //ShopName } } return resultList.ToArray(); }
/// <summary> /// 解析当前元素 /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); var productName = this.GetProductName(_html); if (productName.Equals(string.Empty)) { productName = this.GetSpecialProductName(HtmlSource); } var selectColor = this.GetSelectColor(HtmlSource); var imgSrc = this.GetImgSrc(HtmlSource); var warmReminder = this.GetWarmReminder(HtmlSource); var productPrice = this.GetProductPrice(_html); var whiteBar = this.GetWhiteBar(_html); var service = this.GetService(_html); var discount = this.GetDiscount(_html); var productActivity = this.GetProductActivity(_html); var isExist = this.ProductIsExist(_html); const string notExist = "该商品已下柜,非常抱歉!"; if (productName.Equals(string.Empty)) { isExist = "产品不存在!"; } else if (isExist.Equals(notExist)) { isExist = notExist; } else { isExist = this.GetIsExist(_html); } var commentDic = this.GetCommentDic(); var resut = new Resut { //产品id ["ProductId"] = this._productId, //产品名字 ["ProductName"] = productName, //选择颜色 ["SelectColor"] = selectColor, //图片链接 ["ImgSrc"] = imgSrc, //温馨提醒 ["WarmReminder"] = warmReminder, //产品价格 ["ProductPrice"] = productPrice, //白条 ["WhiteBar"] = whiteBar, //服务 ["Service"] = service, //促销 ["Discount"] = discount, //产品活动 ["ProductActivity"] = productActivity, //产品是否有货以及预计送达时间 ["IsExist"] = isExist, //全部评价 ["AllCnt"] = commentDic["allCnt"], //好评 ["GoodCnt"] = commentDic["goodCnt"], //中评 ["NormalCnt"] = commentDic["normalCnt"], //差评 ["BadCnt"] = commentDic["badCnt"], //有图评价 ["PictureCnt"] = commentDic["pictureCnt"] }; resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var urlSet = GetUrlQueueByUrl(CurrentUrl); var resultList = new List <IResut>(); foreach (var url in urlSet) { //var url1 = "https://www.aliexpress.com/store/product/baby-girls-4pcs-sets-longsleeve-cotton-romper-birthday-dress-baby-girls-vestidos-with-pink-stripe-ruffle/1240676_32262374446.html"; //var html = GetMainWebContent(url1, null, ref _cookies, null); var html = GetMainWebContent(url, null, ref _cookies, null); //var html = GetWebContent(url.Key.ToString(), ref _cookies); //GetValueByRegex getValueByRegex = new GetValueByRegex(GetResultByRegex); GetValueByHtmlAndRegex getValueByHtmlAndRegex = new GetValueByHtmlAndRegex(GetResultByHtmlAndRegex); var title = getValueByHtmlAndRegex(html, "(?<=<title>).*(?=</title>)").Replace("Aliexpress.com : ", ""); var percentNum = getValueByHtmlAndRegex(html, "(?<=<span class=\"percent-num\">).*?(?=</span>)"); var ratingsNum = GetInt(getValueByHtmlAndRegex(html, "(?<=<span class=\"rantings-num\">).*?(?=</span>)")); var orderNum = GetInt(getValueByHtmlAndRegex(html, "(?<=<span class=\"order-num\" id=\"j-order-num\">).*?(?=</span>)")); var discountRage = GetInt(getValueByHtmlAndRegex(html, "(?<=<span class=\"p-discount-rate\">).*?(?=</span>)")); var actMinPrice = getValueByHtmlAndRegex(html, "(?<=actMinPrice=\").*?(?=\";)"); var actMaxPrice = getValueByHtmlAndRegex(html, "(?<=actMaxPrice=\").*?(?=\";)"); var minPrice = getValueByHtmlAndRegex(html, "(?<=minPrice=\").*?(?=\";)"); var maxPrice = getValueByHtmlAndRegex(html, "(?<=maxPrice=\").*?(?=\";)"); var mobileDiscountPrice = GetDouble(getValueByHtmlAndRegex(html, "(?<=mobileDiscountPrice=\").*?(?=\";)")); var productId = getValueByHtmlAndRegex(html, "(?<=productId=\").*?(?=\";)"); var totalAvailQuantity = getValueByHtmlAndRegex(html, @"(?<=totalAvailQuantity=)\d+(?=;)"); string collectNum = string.Empty; if (!string.IsNullOrEmpty(productId)) { var wishlistUrl = $"https://us.ae.aliexpress.com/wishlist/wishlist_item_count.htm?itemid={productId}"; string wishlistHtml = string.Empty; bool isSuccess = false; while (!isSuccess) { try { wishlistHtml = GetMainWebContent(wishlistUrl, null, ref _cookies, null); } catch (Exception e) { if (e.ToString().Contains("操作超时") || e.ToString().Contains("操作已超时")) { continue; } } isSuccess = true; } collectNum = getValueByHtmlAndRegex(wishlistHtml, @"(?<=""num"":)\d+(?=})"); } var eventTimeLeft = Regex.Match(html, "(?<=class=\"p-eventtime-left\").*?(?=</span>)").Value; if (eventTimeLeft.Contains("data-hour") || eventTimeLeft.Contains("data-minute") || eventTimeLeft.Contains("data-second")) { var hour = Regex.Match(eventTimeLeft, @"(?<=data-hour="")\d+(?="")").Value; if (hour.Length == 1) { hour = $"0{hour}"; } var minute = Regex.Match(eventTimeLeft, @"(?<=data-minute="")\d+(?="")").Value; if (minute.Length == 1) { minute = $"0{minute}"; } var second = Regex.Match(eventTimeLeft, @"(?<=data-second="")\d+(?="")").Value; if (second.Length == 1) { second = $"0{second}"; } eventTimeLeft = $"{hour}:{minute}:{second}"; } //var dic = new Dictionary<string, string>() //{ // {"Url",url.Key.ToString() }, // { "Title",title}, // { "PercentNum",percentNum }, // { "RatingsNum",ratingsNum}, // { "OrderNum",orderNum }, // { "DiscountRage",discountRage }, // {"EventTimeLeft",eventTimeLeft }, // {"ActMinPrice", actMinPrice}, // { "ActMaxPrice",actMaxPrice }, // { "MinPrice",minPrice}, // { "MaxPrice",maxPrice}, // { "MobileDiscountPrice",mobileDiscountPrice }, // { "ProductId",productId }, // { "TotalAvailQuantity",totalAvailQuantity }, // { "CollectNum",collectNum } //}; //DataBaseHelper.MysqlHelper mysqlHelper = new MysqlHelper(); //mysqlHelper.InsertTable(dic,"AliExpress"); IResut resut = new Resut() { { "ShopId", _shopId }, { "Url", url }, { "Title", title }, { "PercentNum", percentNum }, { "RatingsNum", ratingsNum }, { "OrderNum", orderNum }, { "DiscountRage", discountRage }, { "EventTimeLeft", eventTimeLeft }, { "ActMinPrice", actMinPrice }, { "ActMaxPrice", actMaxPrice }, { "MinPrice", minPrice }, { "MaxPrice", maxPrice }, { "MobileDiscountPrice", mobileDiscountPrice }, { "ProductId", productId }, { "TotalAvailQuantity", totalAvailQuantity }, { "CollectNum", collectNum } }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var resultList = new List <IResut>(); //var curHtml = Phantomjs.PhantomjsHelper.GetHtml(CurrentUrl); //_httpHelper.Cookies = $"id58={GetId58()}"; //var curHtml = _httpHelper.GetHtmlByGet(CurrentUrl); var htmlNode = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(HtmlSource); var htmlNodeCollection = htmlNode.SelectNodes("//div[@id='infolist']//dl"); //一段时间会返回错误的结果 需要重试 var tryTimes = 0; while (htmlNodeCollection == null) { if (++tryTimes > 3) { throw new Exception("htmlNodeCollectionNullException tryTimes more than 3 times"); } Console.WriteLine($"htmlNodeCollectionNullException tryTimes {tryTimes}"); var html = GetWebContent(CurrentUrl); htmlNode = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); htmlNodeCollection = htmlNode.SelectNodes("//div[@id='infolist']//dl"); } foreach (var node in htmlNodeCollection) { var jobUrl = node.SelectSingleNode("./dt/a")?.Attributes["href"].Value; if (string.IsNullOrEmpty(jobUrl)) { throw new Exception("jobUrlNullException"); } var html = _httpHelper.GetHtmlByGet(jobUrl); var infoId = Regex.Match(html, @"(?<=""info[iI]d"":)\d+").Value; var userId = Regex.Match(html, @"(?<=""user[iI]d"":)\d+").Value; var statisticsHtml = _httpHelper.GetHtmlByGet($"http://statistics.zp.58.com/position/totalcount/?infoId={infoId}&userId={userId}"); var htmlNodeR = HtmlAgilityPack.HtmlAgilityPackHelper.GetDocumentNodeByHtml(html); var jobName = htmlNodeR.SelectSingleNode("//span[@class='pos_title']")?.InnerText; var jobCount = GetNumber(htmlNodeR.SelectSingleNode("//span[@class='item_condition pad_left_none']")?.InnerText); var degreeRequired = htmlNodeR.SelectSingleNode("//span[@class='item_condition']")?.InnerText; var experienceRequired = htmlNodeR.SelectSingleNode("//span[@class='item_condition border_right_None']")?.InnerText.Trim(); var location = htmlNodeR.SelectSingleNode("//div[@class='pos-area']/span[1]")?.InnerText.Trim(); var salary = htmlNodeR.SelectSingleNode("//span[@class='pos_salary']")?.InnerText ?? htmlNodeR.SelectSingleNode("//span[@class='pos_salary daiding']").InnerText; var jobUpdateDate = FormatTime(Regex.Match(htmlNodeR.SelectSingleNode("//div[@class='pos_base_statistics']/span[1]")?.InnerText, "(?<=更新[::]).*$").Value.Trim()); //已找到 _httpHelper.Referer = jobUrl; var browseCount = Regex.Match(_httpHelper.GetHtmlByGet($"http://jst1.58.com/counter?infoid={infoId}"), @"(?<=total=)\d+").Value; //已找到 var applyCount = Regex.Match(statisticsHtml, @"(?<=""deliveryCount"":)\d+").Value; var companyUrl = htmlNodeR.SelectSingleNode("//div[@class='baseInfo_link']/a")?.Attributes["href"].Value; var companyHtml = _httpHelper.GetHtmlByGet(companyUrl); var contactPerson = Regex.Match(companyHtml, @"(?<=<li><span>联系人.*</span>[\s]*)[\S]*?(?=[\s]*</li>)").Value; var phoneUrl = Regex.Match(companyHtml, @"(?<=<li><span>联系电话[\s\S]*?</span><img src="")[\S]*(?=""></li>)").Value; var phonePic = _httpHelper.GetImage(phoneUrl); var companyName = htmlNodeR.SelectSingleNode("//div[@class='baseInfo_link']/a")?.InnerText; //a[@class='comp_baseInfo_link'] //已找到 //http://zp.service.58.com/api?action=favorite,wltStats¶ms={"infoUrl":"http://hz.58.com/zptaobao/30334432354220x.shtml","userIds":"13663438612230_0"} var memberYearUrl = $"http://zp.service.58.com/api?action=favorite,wltStats¶ms={{\"infoUrl\":\"{ Regex.Match(jobUrl, @".*(?=\?)").Value}\",\"userIds\":\"{userId}_0\"}}"; var memberYear = Regex.Match(_httpHelper.GetHtmlByGet(memberYearUrl), @"(?<=wlt)\d+").Value; var mainIndustry = htmlNodeR.SelectSingleNode("//a[@class='comp_baseInfo_link']")?.InnerText; var companyPersonCount = htmlNodeR.SelectSingleNode("//p[@class='comp_baseInfo_scale']")?.InnerText; var businessLicense = htmlNodeR.SelectSingleNode("//div[@class='identify_con clearfix']/span[1]")?.InnerText; var realNameLicense = htmlNodeR.SelectSingleNode("//div[@class='identify_con clearfix']/span[2]")?.InnerText; var taobaoShopLicense = htmlNodeR.SelectSingleNode("//div[@class='identify_con clearfix']/span[3]")?.InnerText; //已找到 _httpHelper.Cookies = $"id58={GetId58()};58tj_uuid={Guid.NewGuid()}"; var resumeFeedback = Regex.Match(_httpHelper.GetHtmlByGet($"http://jianli.58.com/ajax/getefrate/{userId}"), @"(?<=""efrate"":)\d+").Value; //已找到 var companyJobNumber = Regex.Match(statisticsHtml, @"(?<=""infoCount"":)\d+").Value; var memberMonth = Regex.Match(htmlNodeR.SelectSingleNode("//span[@class='item_num join58_num']").InnerText, ".*(?=月)").Value; var workAddress = htmlNodeR.SelectSingleNode("//div[@class='pos-area']/span[2]")?.InnerText; var jobDescription = htmlNodeR.SelectSingleNode("//div[@class='des']")?.InnerText; //var Phone_OCR var resut = new Resut { ["JobUrl"] = jobUrl, ["JobName"] = jobName, ["JobCount"] = jobCount, ["DegreeRequired"] = degreeRequired, ["ExperienceRequired"] = experienceRequired, ["Location"] = location, ["Salary"] = salary, ["JobUpdateDate"] = jobUpdateDate, ["BrowseCount"] = browseCount, ["ApplyCount"] = applyCount, ["Phone_Pic"] = phonePic, ["ContactPerson"] = contactPerson, ["CompanyUrl"] = companyUrl, ["CompanyName"] = companyName, ["MemberYear"] = memberYear, ["MainIndustry"] = mainIndustry, ["CompanyPersonCount"] = companyPersonCount, ["BusinessLicense"] = businessLicense, ["TaobaoShopLicense"] = taobaoShopLicense, ["RealNameLicense"] = realNameLicense, ["ResumeFeedback"] = resumeFeedback, ["CompanyJobNumber"] = companyJobNumber, ["MemberMonth"] = memberMonth, ["WorkAddress"] = workAddress, ["JobDescription"] = jobDescription, //["Phone_OCR"] = }; resultList.Add(resut); } return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { //var storeNumber = GetStoreNumber(HtmlSource); var storeLocation = GetStoreLocation(HtmlSource); var shopAge = GetShopAge(HtmlSource); //var storeTime = GetStoreTime(HtmlSource); var evaluationDetailHtml = GetEvaluationDetailHtml(HtmlSource); var itemList = GetItemList(evaluationDetailHtml); var seller = itemList["seller"]; var positiveFeedbackPastSixMonths = itemList["positiveFeedbackPastSixMonths"]; var feedbackScore = itemList["feedbackScore"]; var aliExpressSellerSince = itemList["aliExpressSellerSince"]; var described = itemList["described"]; var describedRatings = itemList["describedRatings"]; var describedPercent = itemList["describedPercent"]; var communication = itemList["communication"]; var communicationRatings = itemList["communicationRatings"]; var communicationPercent = itemList["communicationPercent"]; var shippingSpeed = itemList["shippingSpeed"]; var shippingSpeedRatings = itemList["shippingSpeedRatings"]; var shippingSpeedPercent = itemList["shippingSpeedPercent"]; var positiveOneMonth = itemList["positiveOneMonth"]; var positiveThreeMonths = itemList["positiveThreeMonths"]; var positiveSixMonths = itemList["positiveSixMonths"]; var positiveOneYear = itemList["positiveOneYear"]; var positiveOverall = itemList["positiveOverall"]; var negativeOneMonth = itemList["negativeOneMonth"]; var negativeThreeMonths = itemList["negativeThreeMonths"]; var negativeSixMonths = itemList["negativeSixMonths"]; var negativeOneYear = itemList["negativeOneYear"]; var negativeOverall = itemList["negativeOverall"]; var neutralOneMonth = itemList["neutralOneMonth"]; var neutralThreeMonths = itemList["neutralThreeMonths"]; var neutralSixMonths = itemList["neutralSixMonths"]; var neutralOneYear = itemList["neutralOneYear"]; var neutralOverAll = itemList["neutralOverAll"]; var positiveFeedbackRateOneMonth = itemList["positiveFeedbackRateOneMonth"]; var positiveFeedbackRateThreeMonths = itemList["positiveFeedbackRateThreeMonths"]; var positiveFeedbackRateSixMonths = itemList["positiveFeedbackRateSixMonths"]; var positiveFeedbackRateOneYear = itemList["positiveFeedbackRateOneYear"]; var positiveFeedbackRateOverall = itemList["positiveFeedbackRateOverall"]; var resultList = new List <IResut>(); IResut resut = new Resut() { { "shopId", _shopId }, //{ "storeNumber",storeNumber }, { "storeLocation", storeLocation }, { "shopAge", FormatNumber(shopAge) }, //{ "storeTime",storeTime }, { "seller", seller }, { "positiveFeedbackPastSixMonths", FormatNumber(RemovePercentSign(positiveFeedbackPastSixMonths)) }, { "feedbackScore", FormatNumber(FormatNumber(feedbackScore)) }, { "aliExpressSellerSince", aliExpressSellerSince }, { "described", FormatNumber(described) }, { "describedRatings", FormatNumber(describedRatings) }, { "describedPercent", FormatNumber(describedPercent) }, { "communication", FormatNumber(communication) }, { "communicationRatings", FormatNumber(communicationRatings) }, { "communicationPercent", FormatNumber(communicationPercent) }, { "shippingSpeed", FormatNumber(shippingSpeed) }, { "shippingSpeedRatings", FormatNumber(shippingSpeedRatings) }, { "shippingSpeedPercent", FormatNumber(shippingSpeedPercent) }, { "positiveOneMonth", FormatNumber(positiveOneMonth) }, { "positiveThreeMonths", FormatNumber(positiveThreeMonths) }, { "positiveSixMonths", FormatNumber(positiveSixMonths) }, { "positiveOneYear", FormatNumber(positiveOneYear) }, { "positiveOverall", FormatNumber(positiveOverall) }, { "negativeOneMonth", FormatNumber(negativeOneMonth) }, { "negativeThreeMonths", FormatNumber(negativeThreeMonths) }, { "negativeSixMonths", FormatNumber(negativeSixMonths) }, { "negativeOneYear", FormatNumber(negativeOneYear) }, { "negativeOverall", FormatNumber(negativeOverall) }, { "neutralOneMonth", FormatNumber(neutralOneMonth) }, { "neutralThreeMonths", FormatNumber(neutralThreeMonths) }, { "neutralSixMonths", FormatNumber(neutralSixMonths) }, { "neutralOneYear", FormatNumber(neutralOneYear) }, { "neutralOverAll", FormatNumber(neutralOverAll) }, { "positiveFeedbackRateOneMonth", FormatNumber(RemovePercentSign(positiveFeedbackRateOneMonth)) }, { "positiveFeedbackRateThreeMonths", FormatNumber(RemovePercentSign(positiveFeedbackRateThreeMonths)) }, { "positiveFeedbackRateSixMonths", FormatNumber(RemovePercentSign(positiveFeedbackRateSixMonths)) }, { "positiveFeedbackRateOneYear", FormatNumber(RemovePercentSign(positiveFeedbackRateOneYear)) }, { "positiveFeedbackRateOverall", FormatNumber(RemovePercentSign(positiveFeedbackRateOverall)) } }; resultList.Add(resut); return(resultList.ToArray()); }
/// <summary> /// ParseCurrentItems /// </summary> /// <returns></returns> protected override IResut[] ParseCurrentItems() { var stringEmpty = string.Empty; var cookies = stringEmpty; var ItemTypeName = Regex.Match(HtmlSource, "(?<=\"itemTypeName\":\").*?(?=\")").Value; var CollectionNumber = Regex.Match(HtmlSource, @"(?<=""favcount"":"")\d+(?="")").Value; var Starts = Regex.Match(HtmlSource, "(?<=\"starts\":\").*?(?=\")").Value; JObject jObjectSellPoint = new JObject(); var SellPoint = stringEmpty; var UserId = stringEmpty; //旧的需要访问很多链接 //if (ItemTypeName.ToLower().Equals("tmall")) //{ // _tmall_displayHtml = GetMainWebContent($"{_tmall_displayUrl}{_productId}", null, ref cookies, ""); // //CollectionNumber // var getUrl = Regex.Match(_tmall_displayHtml, "(?<=\"apiBeans\":\").*?(?=\")").Value; // if (!getUrl.Equals(stringEmpty)) // { // var callback = $"jsonp{Random.Next(100, 999)}"; // var dateTime = new DateTime(); // var start = new DateTime(1970, 1, 1, 0, 0, 0, dateTime.Kind); // var t = Convert.ToInt64((DateTime.Now - start).TotalSeconds); // var _ksTS = $"{t}_{Random.Next(100, 999)}"; // _tmall_counter3Url = $"https:{getUrl}&callback={callback}&_ksTS={_ksTS}"; // _tmall_counter3Html = GetMainWebContent(_tmall_counter3Url, null, ref cookies, ""); // CollectionNumber = Regex.Match(_tmall_counter3Html, $@"(?<=ICCP_1_{_productId}"":)\d+").Value; // } // else // { // CollectionNumber = stringEmpty; // } // var spuId = Regex.Match(_tmall_displayHtml, @"(?<=""spuId"":"")\d+(?="")").Value; // var sellerId = Regex.Match(_tmall_displayHtml, @"(?<=""sellerId"":)\d+(?=,)").Value; // _tmall_listTagCloudsHtml = GetMainWebContent($"{_tmall_listTagCloudsUrl}{_productId}", null, ref cookies, // ""); // var tagClouds = Regex.Match(_tmall_listTagCloudsHtml, "(?<=\"tagClouds\":).*(?=})").Value; // JArray jArray = JArray.Parse(tagClouds); // var dic = jArray.ToDictionary(jToken => jToken["tag"].ToString(), jToken => jToken["count"].ToString()); // _tmall_listDetailRateHtml = // GetMainWebContent($"{_tmall_listDetailRateUrl}itemId={_productId}&spuId={spuId}&sellerId={sellerId}", // null, ref cookies, ""); // var PicNum = Regex.Match(_tmall_listDetailRateHtml, @"(?<=""picNum"":)\d+(?=,)").Value; // var Userd = Regex.Match(_tmall_listDetailRateHtml, @"(?<=""used"":)\d+").Value; // _tmall_list_dsr_infoHtml = // GetMainWebContent($"{_tmall_list_dsr_infoUrl}itemId={_productId}&spuId={spuId}&sellerId={sellerId}", // null, ref cookies, ""); // var GradeAvg = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"gradeAvg\":).*?(?=,)").Value; // var RateTotal = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"rateTotal\":).*?(?=,)").Value; // jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName)); // jObjectSellPoint.Add(new JProperty("GradeAvg", GradeAvg)); // jObjectSellPoint.Add(new JProperty("RateTotal", RateTotal)); // jObjectSellPoint.Add(new JProperty("Userd", Userd)); // jObjectSellPoint.Add(new JProperty("PicNum", PicNum)); // foreach (var keyValue in dic) // { // jObjectSellPoint.Add(new JProperty(keyValue.Key, keyValue.Value)); // } // UserId = Regex.Match(HtmlSource, @"(?<=userId=)\d+(?="")").Value; //} //else //{ // ItemTypeName = "taobao"; // _taobao_displayHtml = GetMainWebContent($"{_taobao_displayUrl}{_productId}", null, ref cookies, ""); // //CollectionNumber // var getUrl = Regex.Match(_taobao_displayHtml, @"(?<=counterApi[\s]*:[\s]*').*?(?=')").Value; // if (!getUrl.Equals(stringEmpty)) // { // var callback = $"jsonp{Random.Next(100, 999)}"; // var dateTime = new DateTime(); // var start = new DateTime(1970, 1, 1, 0, 0, 0, dateTime.Kind); // var t = Convert.ToInt64((DateTime.Now - start).TotalSeconds); // var _ksTS = $"{t}_{Random.Next(100, 999)}"; // //_taobao_counter3Url = $"https:{getUrl}&callback={callback}&_ksTS={_ksTS}"; // //_taobao_counter3Html = GetMainWebContent(_taobao_counter3Url, null, ref cookies, ""); // CollectionNumber = Regex.Match(_taobao_counter3Html, $@"(?<=ICCP_1_{_productId}"":)\d+").Value; // //下面这个可以单独判断 不过讲道理 前面的找不到 这里也找不到了(谁和你讲道理) // _taobao_detailCommonUrl = $"https:{Regex.Match(_taobao_displayHtml, "(?<=data-commonApi = \").*?(?=\")").Value.Replace("&", "&")}"; // _taobao_detailCommonHtml = GetMainWebContent(_taobao_detailCommonUrl, null, ref cookies, ""); // var Correspond = Regex.Match(_taobao_detailCommonHtml, "(?<=\"correspond\":\").*?(?=\")").Value; // var Total = Regex.Match(_taobao_detailCommonHtml, @"(?<=""totalFull"":)\d+").Value; // var GoodFull = Regex.Match(_taobao_detailCommonHtml, @"(?<=""goodFull"":)\d+").Value; // var Additional = Regex.Match(_taobao_detailCommonHtml, @"(?<=""additional"":)\d+").Value; // var Normal = Regex.Match(_taobao_detailCommonHtml, @"(?<=""normal"":)\d+").Value; // var Pic = Regex.Match(_taobao_detailCommonHtml, @"(?<=""pic"":)\d+").Value; // var Bad = Regex.Match(_taobao_detailCommonHtml, @"(?<=""bad"":)\d+").Value; // jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName)); // jObjectSellPoint.Add(new JProperty("Total", Total)); // jObjectSellPoint.Add(new JProperty("GoodFull", GoodFull)); // jObjectSellPoint.Add(new JProperty("Normal", Normal)); // jObjectSellPoint.Add(new JProperty("Bad", Bad)); // jObjectSellPoint.Add(new JProperty("Additional", Additional)); // jObjectSellPoint.Add(new JProperty("Pic", Pic)); // } // else // { // CollectionNumber = stringEmpty; // } // UserId = Regex.Match(HtmlSource, @"(?<=""userNumId"":"")\d+(?="")").Value; //} //tmall if (ItemTypeName.ToLower().Equals("tmall")) { var sellerId = Regex.Match(HtmlSource, @"(?<=""sellerId"":"")\d+(?="")").Value; _tmall_list_dsr_infoHtml = GetMainWebContent($"{_tmall_list_dsr_infoUrl}itemId={_productId}&sellerId={sellerId}", null, ref cookies, ""); var GradeAvg = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"gradeAvg\":).*?(?=,)").Value; var RateTotal = Regex.Match(_tmall_list_dsr_infoHtml, "(?<=\"rateTotal\":).*?(?=,)").Value; jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName)); jObjectSellPoint.Add(new JProperty("GradeAvg", GradeAvg)); jObjectSellPoint.Add(new JProperty("RateTotal", RateTotal)); } //taobao else { ItemTypeName = "taobao"; //@"(?<=sellerId=)\d+" var userNumId = Regex.Match(HtmlSource, @"(?<=""SELLER_ID"":"")\d+(?="")").Value; _taobao_detailCommonUrl = $"https://rate.taobao.com/detailCommon.htm?userNumId={userNumId}&auctionNumId={_productId}"; _taobao_detailCommonHtml = GetMainWebContent(_taobao_detailCommonUrl, null, ref cookies, ""); var Correspond = Regex.Match(_taobao_detailCommonHtml, "(?<=\"correspond\":\").*?(?=\")").Value; var Total = Regex.Match(_taobao_detailCommonHtml, @"(?<=""totalFull"":)\d+").Value; var GoodFull = Regex.Match(_taobao_detailCommonHtml, @"(?<=""goodFull"":)\d+").Value; var Additional = Regex.Match(_taobao_detailCommonHtml, @"(?<=""additional"":)\d+").Value; var Normal = Regex.Match(_taobao_detailCommonHtml, @"(?<=""normal"":)\d+").Value; var Pic = Regex.Match(_taobao_detailCommonHtml, @"(?<=""pic"":)\d+").Value; var Bad = Regex.Match(_taobao_detailCommonHtml, @"(?<=""bad"":)\d+").Value; jObjectSellPoint.Add(new JProperty("ItemTypeName", ItemTypeName)); jObjectSellPoint.Add(new JProperty("Total", Total)); jObjectSellPoint.Add(new JProperty("GoodFull", GoodFull)); jObjectSellPoint.Add(new JProperty("Normal", Normal)); jObjectSellPoint.Add(new JProperty("Bad", Bad)); jObjectSellPoint.Add(new JProperty("Additional", Additional)); jObjectSellPoint.Add(new JProperty("Pic", Pic)); UserId = Regex.Match(HtmlSource, @"(?<=""userNumId"":"")\d+(?="")").Value; } jObjectSellPoint.Add("Starts", Starts); //var ProductId = Regex.Match(HtmlSource, @"(?<=""itemId"":"")\d+(?="")").Value; var ProductName = Regex.Match(HtmlSource, "(?<=,\"title\":\").*?(?=\")").Value; var ProductStateText = Regex.Match(HtmlSource, "(?<=\"ret\":).*?(?=,)").Value; var ProductImageUrl = Regex.Match(HtmlSource, @"(?<=""picsPath"":\["").*?(?="")").Value; if (ProductImageUrl.Equals(string.Empty)) { ProductImageUrl = Regex.Match(HtmlSource, "(?<=\"imgUrl\":\").*?(?=\")").Value; } //活动数据 if (!string.IsNullOrEmpty(ProductStateText) && !ProductStateText.Equals("[\"ERRCODE_QUERY_DETAIL_FAIL::宝贝不存在\"]")) { var typeValue = JObject.Parse(HtmlSource)["data"]["apiStack"]; //里面是个array[1] var typeValueString = JArray.Parse(typeValue.ToString())[0]["value"].ToString(); typeValueString = Regex.Match(typeValueString, @"(?<=""priceUnits"":\[).*?(?=\])").Value; var names = Regex.Matches(typeValueString, "(?<=\"name\":\").*?(?=\")"); //打折秒杀 限时打折 var DaZheMiaoSha = 0; //淘金币 淘金币价 淘金币 var TaoJinBi = 0; //聚划算 var JuHuaSuan = 0; //天天特价 var TianTianTeJia = 0; if (ProductName.Contains("天天特价")) { TianTianTeJia = 1; } foreach (Match name in names) { var value = name.Value; if (value.Equals("限时打折")) { DaZheMiaoSha = 1; } else if (value.Equals("聚划算")) { JuHuaSuan = 1; } else if (value.Equals("天天特价")) { TianTianTeJia = 1; } else if (value.Contains("淘金币")) { TaoJinBi = 1; } } jObjectSellPoint.Add("DaZheMiaoSha", DaZheMiaoSha); jObjectSellPoint.Add("TaoJinBi", TaoJinBi); jObjectSellPoint.Add("JuHuaSuan", JuHuaSuan); jObjectSellPoint.Add("TianTianTeJia", TianTianTeJia); } var MianYunFei = 0; if (Regex.Match(HtmlSource, @"(?<=\\""subInfos\\"":).*?(?=})").Value.Contains("免运费")) { MianYunFei = 1; } jObjectSellPoint.Add("MianYunFei", MianYunFei); SellPoint = jObjectSellPoint.ToString(); var ProductLocation = Regex.Match(HtmlSource, "(?<=\"location\":\").*?(?=\")").Value; var ShopId = Regex.Match(HtmlSource, @"(?<=""shopId"":"")\d+(?="")").Value; var RangePriceMatches = Regex.Matches(HtmlSource, @"(?<=\\""rangePrice\\"":\\"").*?(?=\\"")"); var PromotionPriceString = stringEmpty; var ReservePriceString = stringEmpty; if (RangePriceMatches.Count >= 2) { PromotionPriceString = RangePriceMatches[0].Value; ReservePriceString = RangePriceMatches[1].Value; } double?PromotionPrice; double?ReservePrice; double?ProductPrice; double?ProductPriceMax; if (PromotionPriceString.Contains("-")) { ProductPrice = StringToDouble(Regex.Match(PromotionPriceString, ".*(?=-)").Value); ProductPriceMax = StringToDouble(Regex.Match(PromotionPriceString, "(?<=-).*").Value); PromotionPrice = ProductPrice; } else { ProductPrice = ProductPriceMax = PromotionPrice = StringToDouble(PromotionPriceString); } int?ProductQuantity = StringToInt(Regex.Match(HtmlSource, @"(?<=\\""quantity\\"":\\"")\d+(?=\\"")").Value); ReservePrice = StringToDouble(ReservePriceString.Contains("-") ? Regex.Match(ReservePriceString, ".*(?=-)").Value : ReservePriceString); int?SellCountMonthly = StringToInt(Regex.Match(HtmlSource, @"(?<=\\""totalSoldQuantity\\"":\\"")\d+(?=\\"")").Value); int?TotalCommentCount = StringToInt(Regex.Match(HtmlSource, @"(?<=""rateCounts"":"")\d+(?="")").Value); var PromotionType = stringEmpty; DateTime?PromotionStartTime = null; DateTime?PromotionEndTime = null; var CategoryId = Regex.Match(HtmlSource, @"(?<=""categoryId"":"")\d+(?="")").Value; var RootCatId = stringEmpty; var BrandId = Regex.Match(HtmlSource, @"(?<=""brandId"":"")\d+(?="")").Value; var Brand = stringEmpty; //var UserId = Regex.Match(HtmlSource, @"(?<=userId=)\d+(?="")").Value; var SpuId = stringEmpty; var EncryptUserId = stringEmpty; var BossNickName = stringEmpty; var FanCount = Regex.Match(HtmlSource, @"(?<=""fansCount"":"")\d+(?="")").Value; var CreditLevel = Regex.Match(HtmlSource, @"(?<=""creditLevel"":"")\d+(?="")").Value; //var ProductDescription = HtmlSource; var ProductDescription = stringEmpty; jObjectSellPoint.Add(new JProperty("CollectionNumber", CollectionNumber)); jObjectSellPoint.Add(new JProperty("ShopId", ShopId)); jObjectSellPoint.Add(new JProperty("FanCount", FanCount)); jObjectSellPoint.Add(new JProperty("CreditLevel", CreditLevel)); SellPoint = jObjectSellPoint.ToString(); if (ProductPrice == null) { var priceAll = Regex.Matches(HtmlSource, @"(?<=\\""price\\"":\\"").*?(?=\\"")"); foreach (Match price in priceAll) { var value = price.Value; double?valuePrice; valuePrice = StringToDouble(value.Contains("-") ? Regex.Match(PromotionPriceString, ".*(?=-)").Value : value); if (valuePrice != null) { ProductPrice = ProductPriceMax = ReservePrice = PromotionPrice = valuePrice; break; } } //double? priceAll = StringToDouble(Regex.Match(HtmlSource, @"(?<=\\""price\\"":\\"").*?(?=\\"")").Value); //ProductPrice = ProductPriceMax = ReservePrice = PromotionPrice = priceAll; } //ProductId itemId //ProductName title //ProductStateText ret //SellPoint evaluateInfo //ProductImageUrl imgUrl //ProductDescription //ProductLocation location //ShopId shopId //ProductPrice rangePrice //ProductPriceMax rangePrice //ProductQuantity quantity //ReservePrice 价格中的小值 //SellCountMonthly totalSoldQuantity //TotalCommentCount rateCounts //PromotionPrice 同productPrice //PromotionType 空 //PromotionStartTime 空 //PromotionEndTime 空 //CategoryId categoryId //RootCatId 空 //BrandId brandId //Brand 空 //UserId userId //SpuId 空 //EncryptUserId 空 //BossNickName 空 //FanCount fansCount //CreditLevel creditLevel //Content json内容 var resultList = new List <IResut>(); IResut resut = new Resut() { { "ProductId", _productId }, { "ProductName", ProductName }, { "ProductStateText", ProductStateText }, { "SellPoint", SellPoint }, { "ProductImageUrl", ProductImageUrl }, { "ProductDescription", ProductDescription }, { "ProductLocation", ProductLocation }, { "ShopId", _shopId }, { "ProductPrice", ProductPrice }, { "ProductPriceMax", ProductPriceMax }, { "ProductQuantity", ProductQuantity }, { "ReservePrice", ReservePrice }, { "SellCountMonthly", SellCountMonthly }, { "TotalCommentCount", TotalCommentCount }, { "PromotionPrice", PromotionPrice }, { "PromotionType", PromotionType }, { "PromotionStartTime", PromotionStartTime }, { "PromotionEndTime", PromotionEndTime }, { "CategoryId", CategoryId }, { "RootCatId", RootCatId }, { "BrandId", BrandId }, { "Brand", Brand }, { "UserId", UserId }, { "SpuId", SpuId }, { "EncryptUserId", EncryptUserId }, { "BossNickName", BossNickName }, //{"ShopId" ,ShopId}, //{"FanCount" ,FanCount}, //{"CreditLevel" ,CreditLevel}, //{"Content" ,Content} }; resultList.Add(resut); return(resultList.ToArray()); }