static ShangPin getShangPin(string id) { //id = "522670612044"; var sp = new ShangPin(); sp.Id = id; var html = getHtml(baseItemUrl + id); var indexHH = html.IndexOf("货号"); if (indexHH < 1) { indexHH = html.IndexOf("款号"); if (indexHH < 1) { indexHH = html.IndexOf("型号"); if (indexHH < 1) { File.AppendAllText("err.txt", id + Environment.NewLine); return(null); } } } if (html.Substring(indexHH - 7, 12).StartsWith("name")) { sp.HuoHao = html.Substring(indexHH + 13, 60); sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('"')); } else { sp.HuoHao = html.Substring(indexHH, 60); sp.HuoHao = sp.HuoHao.Replace(" ", "").Substring(3); sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('<')); } sp.DetailJsonUrl = html.Substring(html.IndexOf("descUrl") + 10); sp.DetailJsonUrl = "https:" + sp.DetailJsonUrl.Substring(0, sp.DetailJsonUrl.IndexOf('"')); CQ doc = html; var shoutulist = doc["#J_UlThumb img"].ToList(); var i = 1; foreach (var st in shoutulist) { var src = "https:" + st.GetAttribute("src"); src = src.Substring(0, src.LastIndexOf('_')); sp.TiTu.Add("题图" + i, src); Console.WriteLine("采集到题图:{0}", src); i += 1; } var setuList = doc[".tb-sku .J_TSaleProp a"].ToList(); i = 1; foreach (var st in setuList) { var style = st.GetAttribute("style"); if (string.IsNullOrEmpty(style)) { continue; } style = style.Substring(style.IndexOf("(") + 1); style = style.Substring(0, style.IndexOf(")")); style = "http:" + style; style = style.Substring(0, style.LastIndexOf('_')); sp.SeTu.Add(st.InnerText.Trim() + i, style); Console.WriteLine("采集到颜色图:{0}", style); i += 1; } var neirongJsonStr = getHtml(sp.DetailJsonUrl); var neirongArr = Regex.Split(neirongJsonStr, @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); i = 1; foreach (var nrt in neirongArr) { if (!nrt.StartsWith("http") || nrt.EndsWith("spaceball.gif")) { continue; } sp.NeiRongTu.Add("内容" + i, nrt); Console.WriteLine("采集到内容图:{0}", nrt); i += 1; } return(sp); }
static ShangPin getShangPin(string id) { //id = "522670612044"; var sp = new ShangPin(); sp.Id = id; var html = getHtml(baseItemUrl + id); var indexHH = html.IndexOf("货号"); if(indexHH < 1) { indexHH = html.IndexOf("款号"); if(indexHH < 1) { indexHH = html.IndexOf("型号"); if (indexHH < 1) { File.AppendAllText("err.txt", id + Environment.NewLine); return null; } } } if (html.Substring(indexHH - 7, 12).StartsWith("name")) { sp.HuoHao = html.Substring(indexHH + 13,60); sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('"')); } else { sp.HuoHao = html.Substring(indexHH, 60); sp.HuoHao = sp.HuoHao.Replace(" ", "").Substring(3); sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('<')); } sp.DetailJsonUrl = html.Substring(html.IndexOf("descUrl") + 10); sp.DetailJsonUrl = "https:" + sp.DetailJsonUrl.Substring(0, sp.DetailJsonUrl.IndexOf('"')); CQ doc = html; var shoutulist = doc["#J_UlThumb img"].ToList(); var i = 1; foreach (var st in shoutulist) { var src = "https:" + st.GetAttribute("src"); src = src.Substring(0, src.LastIndexOf('_')); sp.TiTu.Add("题图" + i, src); Console.WriteLine("采集到题图:{0}", src); i += 1; } var setuList = doc[".tb-sku .J_TSaleProp a"].ToList(); i = 1; foreach (var st in setuList) { var style = st.GetAttribute("style"); if (string.IsNullOrEmpty(style)) { continue; } style = style.Substring(style.IndexOf("(") + 1); style = style.Substring(0, style.IndexOf(")")); style = "http:" + style; style = style.Substring(0, style.LastIndexOf('_')); sp.SeTu.Add(st.InnerText.Trim() + i, style); Console.WriteLine("采集到颜色图:{0}", style); i += 1; } var neirongJsonStr = getHtml(sp.DetailJsonUrl); var neirongArr = Regex.Split(neirongJsonStr, @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase); i = 1; foreach (var nrt in neirongArr) { if (!nrt.StartsWith("http") || nrt.EndsWith("spaceball.gif")) { continue; } sp.NeiRongTu.Add("内容" + i, nrt); Console.WriteLine("采集到内容图:{0}", nrt); i += 1; } return sp; }