public void LoadHtml() { this.html = HttpUtils.HttpGet(url); }
public void ParseHTML() { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); HtmlNode docNode = doc.DocumentNode; HtmlNode h1Node = docNode.SelectNodes(".//h1").First(); this.Title = h1Node.InnerText; //HtmlNode producetNode = doc.DocumentNode.SelectSingleNode("//div[@id='j-product-info-sku']"); HtmlNodeCollection dlNodes = doc.DocumentNode.SelectNodes(".//dl[@class='p-property-item']"); if (dlNodes != null && dlNodes.Count > 0) { foreach (HtmlNode dlNode in dlNodes) { HtmlNode dtNode = dlNode.SelectSingleNode(".//dt[@class='p-item-title']"); string propertyTitle = dtNode.InnerText; List <SkuPropertyValue> list = new List <SkuPropertyValue>(); HtmlNodeCollection liNodes = dlNode.SelectNodes(".//li"); foreach (var liNode in liNodes) { SkuPropertyValue skuPropertyValue = new SkuPropertyValue(); skuPropertyValue.Prop = propertyTitle; HtmlNode aNode = liNode.SelectSingleNode(".//a"); skuPropertyValue.Id = aNode.Attributes["data-sku-id"].Value; HtmlNode imgNode = aNode.SelectSingleNode(".//img"); if (imgNode != null) { skuPropertyValue.ImageUrl = imgNode.Attributes["src"].Value; } HtmlNode spanNode = aNode.SelectSingleNode(".//span"); if (spanNode != null) { //Console.WriteLine(spanNode.InnerText); skuPropertyValue.Name = spanNode.InnerText; } list.Add(skuPropertyValue); // for index PropertyDict4Index.Add(skuPropertyValue.Id, skuPropertyValue); } PropertyDict.Add(propertyTitle, list); } } int i = 0; HtmlNodeCollection scriptList = doc.DocumentNode.SelectNodes(".//script"); foreach (HtmlNode item in scriptList) { //Console.WriteLine("===========================" + i); i++; //Console.WriteLine(item.InnerHtml); string script = item.InnerHtml; if (script.Contains("window.runParams.maxPrice")) { //Console.WriteLine(script); string v = script; v = v.Substring(script.IndexOf("runParams.maxPrice=")); v = v.Replace("runParams.maxPrice=", ""); v = v.Substring(0, v.IndexOf(";")); v = v.Replace("\"", ""); this.Price = v; //Console.WriteLine(this.Price); } if (script.Contains("window.runParams.baseCurrencyCode")) { //Console.WriteLine(script); string v = script; v = v.Substring(script.IndexOf("runParams.baseCurrencyCode=")); v = v.Replace("runParams.baseCurrencyCode=", ""); v = v.Substring(0, v.IndexOf(";")); v = v.Replace("\"", ""); this.CurrencyCode = v; //Console.WriteLine(this.CurrencyCode); } if (script.Contains("runParams.imageBigViewURL")) { //Console.WriteLine(script); string v = script; v = v.Substring(script.IndexOf("runParams.imageBigViewURL=")); v = v.Replace("runParams.imageBigViewURL=", ""); v = v.Substring(0, v.IndexOf(";")); v = v.Replace("\"", ""); v = v.Replace("\n", ""); v = v.Replace("\r", ""); v = v.Replace("\t", ""); v = v.Replace("[", ""); v = v.Replace("]", ""); foreach (var imageUrl in v.Split(',')) { this.MainImageList.Add(imageUrl); } //this.CurrencyCode = v; //Console.WriteLine(this.MainImageList); } if (script.Contains("runParams.descUrl")) { //Console.WriteLine(script); string v = script; v = v.Substring(script.IndexOf("runParams.descUrl=")); v = v.Replace("runParams.descUrl=", ""); v = v.Substring(0, v.IndexOf(";")); v = v.Replace("\"", ""); //this.CurrencyCode = v; v = "http:" + v; // Console.WriteLine(v); string descHtml = HttpUtils.HttpGet(v); descHtml = descHtml.Replace("window.productDescription='", ""); descHtml = descHtml.Replace("';", ""); this.DescHtml = descHtml; //Console.WriteLine("descHtml:" + descHtml); HtmlAgilityPack.HtmlDocument descDoc = new HtmlAgilityPack.HtmlDocument(); descDoc.LoadHtml(DescHtml); HtmlNode descDoccNode = descDoc.DocumentNode; HtmlNodeCollection imgList = descDoccNode.SelectNodes(".//img"); foreach (HtmlNode imgNode in imgList) { //Console.WriteLine(imgNode.Attributes["src"].Value); string src = imgNode.Attributes["src"].Value; if (src.StartsWith(@"//")) { src = src.Substring(2); } if (!src.StartsWith("http")) { src = "http://" + src; } this.ContentImageList.Add(src); } } if (script.Contains("var skuProducts=")) { //Console.WriteLine(script); string v = script; v = v.Substring(script.IndexOf("var skuProducts=")); v = v.Replace("var skuProducts=", ""); v = v.Substring(0, v.IndexOf("}}];") + 3); ProductItem[] ProductItems = JsonConvert.DeserializeObject <ProductItem[]>(v); foreach (ProductItem productItem in ProductItems) { //Console.WriteLine("----------------"); SkuItem skuItem = new SkuItem(); skuItem.SkuId = productItem.skuId.ToString(); skuItem.Price = Price; skuItem.Title = Title; skuItem.Propertys = new List <string>(); foreach (var propId in productItem.skuPropIds.Split(',')) { if (propId == null || propId.Length == 0) { continue; } //Console.WriteLine("============propId: " + propId); SkuPropertyValue spv = PropertyDict4Index[propId]; string prop = spv.Name != null ? spv.Name : spv.ImageUrl; //Console.WriteLine(prop); skuItem.Propertys.Add(prop); } ProductSkuItemList.Add(skuItem); } //v = v.Replace("\"", ""); //v = v.Replace("\n", ""); //v = v.Replace("\r", ""); //v = v.Replace("\t", ""); //v = v.Replace("[", ""); //v = v.Replace("]", ""); //this.CurrencyCode = v; //Console.WriteLine(JsonConvert.SerializeObject(ProductSkuItemList)); } } }
public void ParseHTML4NL() { HtmlNode h1Node = docNode.SelectNodes(".//title").First(); this.Title = h1Node.InnerText; try { this.Title = this.Title.Split('|').First(); this.Title = this.Title.Split('&').First(); } catch (Exception e) { Console.WriteLine(e.StackTrace); } HtmlNodeCollection scriptList = docNode.SelectNodes(".//script"); foreach (HtmlNode item in scriptList) { string script = item.InnerHtml; if (script.Contains("window.runParams")) { //Console.WriteLine(script); Console.WriteLine("==========================="); //Console.WriteLine(item.InnerHtml); string v = script; v = v.Substring(script.IndexOf("window.runParams = ")); v = v.Replace("window.runParams = ", ""); v = v.Substring(0, v.IndexOf("var GaData")); v = v.Replace("\n", ""); v = v.Replace("\r", ""); v = v.Replace(" ", ""); v = v.Replace("\t", ""); v = v.Substring(0, v.Length - 1); //this.Price = v; Console.WriteLine(v); NLParser nLParser = JsonConvert.DeserializeObject <NLParser>(v); // 大主图 foreach (var imagePath in nLParser.data.imageModule.imagePathList) { MainImageList.Add(imagePath); } // 下载小图 //foreach (var imagePath in nLParser.data.imageModule.summImagePathList) //{ // MainImageList.Add(imagePath); //} Skumodule skuModule = nLParser.data.skuModule; // 处理属性 if (skuModule.hasSkuProperty) { Productskupropertylist[] plist = skuModule.productSKUPropertyList; foreach (var productProperty in plist) { List <SkuPropertyValue> list = new List <SkuPropertyValue>(); Skupropertyvalue[] skuProps = productProperty.skuPropertyValues; foreach (var skuProp in skuProps) { SkuPropertyValue skuPropertyValue = new SkuPropertyValue(); skuPropertyValue.Id = skuProp.propertyValueId.ToString(); skuPropertyValue.Name = skuProp.propertyValueName; skuPropertyValue.ImageUrl = skuProp.skuPropertyImagePath; // 变体图 if (skuPropertyValue.ImageUrl != null && skuPropertyValue.ImageUrl.Length != 0) { //MainImageList.Add(skuProp.skuPropertyImagePath); string btImage = skuPropertyValue.ImageUrl.Replace("_50x50.jpg", ""); BiantiImageList.Add(btImage); Console.WriteLine("----------------------skuPropertyValue.ImageUrl: " + btImage); } skuPropertyValue.Prop = productProperty.skuPropertyName; list.Add(skuPropertyValue); PropertyDict4Index.Add(skuPropertyValue.Id, skuPropertyValue); } PropertyDict.Add(productProperty.skuPropertyName, list); Console.WriteLine("productProperty.skuPropertyName: " + productProperty.skuPropertyName); } } string descUrl = nLParser.data.descriptionModule.descriptionUrl; this.DescHtml = HttpUtils.HttpGet(descUrl); //Console.WriteLine(descHtml); HtmlAgilityPack.HtmlDocument descDoc = new HtmlAgilityPack.HtmlDocument(); descDoc.LoadHtml(DescHtml); HtmlNode descDoccNode = descDoc.DocumentNode; HtmlNodeCollection imgList = descDoccNode.SelectNodes(".//img"); foreach (HtmlNode imgNode in imgList) { //Console.WriteLine(imgNode.Attributes["src"].Value); string src = imgNode.Attributes["src"].Value; if (src.StartsWith(@"//")) { src = src.Substring(2); } if (!src.StartsWith("http")) { src = "http://" + src; } this.ContentImageList.Add(src); } Price = nLParser.data.priceModule.maxAmount.value.ToString(); CurrencyCode = nLParser.data.priceModule.maxAmount.currency; Skupricelist[] skupricelsit = nLParser.data.skuModule.skuPriceList; foreach (var skuprice in skupricelsit) { SkuItem skuItem = new SkuItem(); skuItem.SkuId = skuprice.skuId.ToString(); skuItem.Price = Price; skuItem.Title = Title; skuItem.Propertys = new List <string>(); foreach (var propId in skuprice.skuPropIds.Split(',')) { if (propId == null || propId.Length == 0) { continue; } //Console.WriteLine("============propId: " + propId); SkuPropertyValue spv = PropertyDict4Index[propId]; string prop = spv.ImageUrl != null ? spv.ImageUrl : spv.Name; //Console.WriteLine(prop); skuItem.Propertys.Add(prop); } ProductSkuItemList.Add(skuItem); } } } }