Example #1
0
        static ShangPin getShangPin(string id)
        {
            //id = "522670612044";
            var sp = new ShangPin();

            sp.Id = id;
            var html    = getHtml(baseItemUrl + id);
            var indexHH = html.IndexOf("货号");

            if (indexHH < 1)
            {
                indexHH = html.IndexOf("款号");
                if (indexHH < 1)
                {
                    indexHH = html.IndexOf("型号");
                    if (indexHH < 1)
                    {
                        File.AppendAllText("err.txt", id + Environment.NewLine);
                        return(null);
                    }
                }
            }
            if (html.Substring(indexHH - 7, 12).StartsWith("name"))
            {
                sp.HuoHao = html.Substring(indexHH + 13, 60);
                sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('"'));
            }
            else
            {
                sp.HuoHao = html.Substring(indexHH, 60);
                sp.HuoHao = sp.HuoHao.Replace("&nbsp;", "").Substring(3);
                sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('<'));
            }

            sp.DetailJsonUrl = html.Substring(html.IndexOf("descUrl") + 10);
            sp.DetailJsonUrl = "https:" + sp.DetailJsonUrl.Substring(0, sp.DetailJsonUrl.IndexOf('"'));
            CQ  doc        = html;
            var shoutulist = doc["#J_UlThumb img"].ToList();
            var i          = 1;

            foreach (var st in shoutulist)
            {
                var src = "https:" + st.GetAttribute("src");
                src = src.Substring(0, src.LastIndexOf('_'));
                sp.TiTu.Add("题图" + i, src);
                Console.WriteLine("采集到题图:{0}", src);
                i += 1;
            }
            var setuList = doc[".tb-sku .J_TSaleProp a"].ToList();

            i = 1;
            foreach (var st in setuList)
            {
                var style = st.GetAttribute("style");
                if (string.IsNullOrEmpty(style))
                {
                    continue;
                }
                style = style.Substring(style.IndexOf("(") + 1);
                style = style.Substring(0, style.IndexOf(")"));
                style = "http:" + style;
                style = style.Substring(0, style.LastIndexOf('_'));
                sp.SeTu.Add(st.InnerText.Trim() + i, style);
                Console.WriteLine("采集到颜色图:{0}", style);
                i += 1;
            }
            var neirongJsonStr = getHtml(sp.DetailJsonUrl);
            var neirongArr     = Regex.Split(neirongJsonStr, @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

            i = 1;
            foreach (var nrt in neirongArr)
            {
                if (!nrt.StartsWith("http") || nrt.EndsWith("spaceball.gif"))
                {
                    continue;
                }
                sp.NeiRongTu.Add("内容" + i, nrt);
                Console.WriteLine("采集到内容图:{0}", nrt);
                i += 1;
            }
            return(sp);
        }
Example #2
0
        static ShangPin getShangPin(string id)
        {
            //id = "522670612044";
            var sp = new ShangPin();
            sp.Id = id;
            var html = getHtml(baseItemUrl + id);
            var indexHH = html.IndexOf("货号");
            if(indexHH < 1)
            {
                indexHH = html.IndexOf("款号");
                if(indexHH < 1)
                {
                    indexHH = html.IndexOf("型号");
                    if (indexHH < 1)
                    {
                        File.AppendAllText("err.txt", id + Environment.NewLine);
                        return null;
                    }
                    
                }
            }
            if (html.Substring(indexHH - 7, 12).StartsWith("name"))
            {
                sp.HuoHao = html.Substring(indexHH + 13,60);
                sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('"'));
            }
            else
            {
                sp.HuoHao = html.Substring(indexHH, 60);
                sp.HuoHao = sp.HuoHao.Replace("&nbsp;", "").Substring(3);
                sp.HuoHao = sp.HuoHao.Substring(0, sp.HuoHao.IndexOf('<'));
            }
            
            sp.DetailJsonUrl = html.Substring(html.IndexOf("descUrl") + 10);
            sp.DetailJsonUrl = "https:" + sp.DetailJsonUrl.Substring(0, sp.DetailJsonUrl.IndexOf('"'));
            CQ doc = html;
            var shoutulist = doc["#J_UlThumb img"].ToList();
            var i = 1;
            foreach (var st in shoutulist)
            {
                var src = "https:" + st.GetAttribute("src");
                src = src.Substring(0, src.LastIndexOf('_'));
                sp.TiTu.Add("题图" + i, src);
                Console.WriteLine("采集到题图:{0}", src);
                i += 1;
            }
            var setuList = doc[".tb-sku .J_TSaleProp a"].ToList();
            i = 1;
            foreach (var st in setuList)
            {
                var style = st.GetAttribute("style");
                if (string.IsNullOrEmpty(style))
                {
                    continue;
                }
                style = style.Substring(style.IndexOf("(") + 1);
                style = style.Substring(0, style.IndexOf(")"));
                style = "http:" + style;
                style = style.Substring(0, style.LastIndexOf('_'));
                sp.SeTu.Add(st.InnerText.Trim() + i, style);
                Console.WriteLine("采集到颜色图:{0}", style);
                i += 1;
            }
            var neirongJsonStr = getHtml(sp.DetailJsonUrl);
            var neirongArr = Regex.Split(neirongJsonStr, @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

            i = 1;
            foreach (var nrt in neirongArr)
            {
                if (!nrt.StartsWith("http") || nrt.EndsWith("spaceball.gif"))
                {
                    continue;
                }
                sp.NeiRongTu.Add("内容" + i, nrt);
                Console.WriteLine("采集到内容图:{0}", nrt);
                i += 1;
            }
            return sp;
        }