Example #1
0
        private static IEnumerable <string> GetUrlsFromHtml(string url)
        {
            var urlList = new List <string>();
            //int ver = GetListUrlVersion(url);
            int    ver     = 0;                                       //有些图书html样式居然不一样~
            string docHtml = HtmlCls.GetHtmlByUrl(url, SiteEncoding); //HtmlCls.GetHtmlByUrl(url, _useProxy);)

            if (!string.IsNullOrEmpty(docHtml))
            {
                docHtml = RegexHelper.ClearTrn(docHtml);
                var cssName  = "p-img";
                var listHtml = HtmlCls.GetHtmlById(docHtml, "plist");
                if (listHtml.IsNullOrEmpty())
                {
                    cssName  = "i-img";
                    listHtml = HtmlCls.GetHtmlByCss(docHtml, "list-h").FirstOrDefault();
                }
                var list =
                    HtmlCls.GetHtmlByCss(listHtml, cssName).Select(
                        t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>#]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct().
                    ToList();
                return(list);
            }
            return(urlList);
        }
Example #2
0
        private static IEnumerable <TamllBase> GetTUrlsFromHtml(string url, out string next)
        {
            next = "";
            var urls = new List <TamllBase>();

            //tmall根据cookie不一样,前端显示也不一样。。
            const string cookie =
                "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=22291aea11e397a82512118642ac0abe; passtime=1341285069752; isFirstOpen=true";
            string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie);

            if (!string.IsNullOrEmpty(docHtml))
            {
                docHtml = RegexHelper.ClearBr(docHtml);
                next    =
                    Utils.UrlDecode(RegexHelper.Match(docHtml,
                                                      "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>"));
                var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList");
                var list     = HtmlCls.GetHtmlByCss(listHtml, "product");
                //1:url,2:name
                const string regStr =
                    "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-title['\"][^>]*title=['\"]([^'\"]+?)['\"][^>]*>";
                //price
                const string priceReg =
                    "<span[^>]*class=['\"]product-normal['\"][^>]*title=['\"]([^'\"\\s]+)['\"][^>]*>";

                urls.AddRange(list.Select(item => new TamllBase
                {
                    Url   = Utils.UrlDecode(RegexHelper.Match(item, regStr, 1)),
                    Title = RegexHelper.Match(item, regStr, 2),
                    Price = Convert.ToDecimal(RegexHelper.Match(item, priceReg))
                }));
            }
            return(urls);
        }
Example #3
0
        /// <summary>
        /// 获取大图html
        /// </summary>
        /// <param name="jdNum"></param>
        /// <returns></returns>
        public static string GetProBigPics(string jdNum)
        {
            string picArea = "";

            try
            {
                const string bigUrl  = BaseUrl + "/bigimage.aspx?id={0}";
                string       url     = String.Format(bigUrl, jdNum);
                string       picHtml = HtmlCls.GetHtmlByUrl(url);
                if (!string.IsNullOrEmpty(picHtml))
                {
                    string biger   = HtmlCls.GetHtmlByCss(picHtml, "right").FirstOrDefault();
                    var    bigList = RegexHelper.Matches(biger, "http://img10.360buyimg.com/n5([^'\"]*)");
                    if (bigList.Count() > 0)
                    {
                        picArea =
                            "<table width=\"750\" align=\"center\" border=\"0\" cellSpacing=\"0\" cellPadding=\"0\">";
                        picArea =
                            bigList.Aggregate(picArea,
                                              (current, s) =>
                                              current + "<tr><td><img src=\"http://img10.360buyimg.com/n0" + s +
                                              "\" /></td></tr>");
                        picArea += "</table>";
                    }
                }
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(ex);
            }
            return(picArea);
        }
Example #4
0
        /// <summary>
        /// 获取描述
        /// </summary>
        /// <param name="docHtml"></param>
        /// <param name="sanId"></param>
        /// <returns></returns>
        public static string GetProDesc(string docHtml, string sanId)
        {
            var desc = "";
            var area = HtmlCls.GetHtmlByCss(docHtml, "detailBox");

            if (area.Count() > 0)
            {
                desc = area.Aggregate("", (current, t) => current + t);

                //图片居然单独一个请求

                const string imgUrl = "http://www.sanfo.com/shop/product.info.asp?command=findthumb&vid={0}";

                var imgs = HtmlCls.GetHtmlByUrl(String.Format(imgUrl, sanId), Encoding.UTF8);

                desc = Regex.Replace(desc, "<dt class=\"detailImg\" id=\"item_product_images\"></dt>",
                                     "<dt class=\"detailImg\" id=\"item_product_images\">" + imgs + "</dt>");

                //排除a标签
                desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", "$1");
                //排除script标签
                desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", "");
                //清除样式
                desc = Regex.Replace(desc, "(\\s*class=\"[^\"]+\")|(\\s*style=\"[^\"]+\")", "");

                //替换成绝对路径
                desc = Regex.Replace(desc, "src=\"(/[^\"]+)\"", "src=\"" + SanfoUrl + "$1\"");

                //替换三夫
                desc = Regex.Replace(desc, "(三夫(户外?)?)", "本商城");
            }
            return(desc);
        }
Example #5
0
        /// <summary>
        /// 根据商品编号获取链接(合作方式变更)
        /// 不能提供分类编号,只能用强大的搜索功能了~
        /// </summary>
        /// <param name="num">商品编号</param>
        /// <returns></returns>
        public static string GetProUrlFromNum(string num)
        {
            string       searchUrl = RedBabyUrl + "/search?keyword=" + num;
            string       html      = HtmlCls.GetHtmlByUrl(searchUrl, Encoding.UTF8);
            const string proReg    =
                "<div[^>]*class=\"globalProductName\"[^>]*>\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>";

            return(Utils.GetAbsoluteUrl(RedBabyUrl, RegexHelper.Match(html, proReg)));
        }
Example #6
0
        private static IEnumerable <string> GetUrlsFromHtml(string url)
        {
            var    urls    = new List <string>();
            string docHtml = HtmlCls.GetHtmlByUrl(url);

            if (!string.IsNullOrEmpty(docHtml))
            {
                var          listHtml = HtmlCls.GetHtmlById(docHtml, "Id_prodItemList");
                const string regStr   = "<div[^>]*class=['\"]proPic['\"][^>]*><a[^>]*href=['\"]([^'\"]+)['\"][^>]*>";
                urls = RegexHelper.Matches(listHtml, regStr);
                urls = urls.Select(t => (t.StartsWith("/") ? SanfoUrl + t : t)).ToList();
            }
            return(urls);
        }
Example #7
0
        /// <summary>
        /// 获取成都地区库存Code
        /// </summary>
        /// <param name="docHtml">页面html</param>
        /// <returns>-1,未知;0,缺货;1,有货</returns>
        public static int GetCdStockCode(string docHtml)
        {
            int code = -1;

            try
            {
                docHtml = RegexHelper.ClearBr(docHtml);
                string stockUrl = RegexHelper.Match(docHtml,
                                                    "(http://price.360buy.com/ows/stock/pshow-[a-zA-Z0-9]*.html)");
                if (string.IsNullOrEmpty(stockUrl))
                {
                    string skUid = RegexHelper.Match(docHtml, "wareinfo.*sid[^\"]*\"([0-9a-zA-Z]*)\"");

                    //省级库存
                    string purl =
                        "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=provincestock&skuid=" +
                        skUid + "&provinceid=22";

                    //市级库存
                    //string url =
                    //    "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=pcstock&skuid=" +
                    //    skUid + "&provinceid=22&cityid=1930";
                    string stockHtml = HtmlCls.GetHtmlByUrl(purl, SiteEncoding);
                    if (!string.IsNullOrEmpty(stockHtml))
                    {
                        string stockCode = RegexHelper.Match(stockHtml, "\"StockState\":(\\w+),");
                        code = (stockCode == "33" ? 1 : 0);
                    }
                }
                else
                {
                    string stockHtml = HtmlCls.GetHtmlByUrl(stockUrl, SiteEncoding);
                    //源代码中有库存连接<script type="text/javascript" src="http://price.360buy.com/ows/stock/pshow-0F5D9F92C79383CED35C7903D3927987.html"></script>
                    //内容如下:var stockdata = [{"Wid":183192,"Rid":6,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":3,"Stock":33,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":10,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":4,"Stock":33,"Days":0,"Purchase":0,"IsPop":false}];
                    //js主要代码如下:var orgname = { 6: "北京仓", 3: "上海仓", 10: "广州仓", 4: "成都仓", 5: "武汉仓", 7: "南京仓", 8: "济南仓", 9: "沈阳仓" };
                    //var stockstatus = { 33: "现货", 34: "无货", 36: "预定", 39: "在途", 0: "统计中" };
                    if (!string.IsNullOrEmpty(stockHtml))
                    {
                        string stockCode = RegexHelper.Match(stockHtml, "\"Rid\":4,\"Stock\":([^,]+),"); //仅仅对成都仓库
                        code = (stockCode == "33" ? 1 : 0);
                    }
                }
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(ex);
            }
            return(code);
        }
Example #8
0
        public static string GetProDesc(string link)
        {
            //id=J_DivItemDesc
            var desc    = "";
            var docHtml = HtmlCls.GetHtmlByUrl(link);
            var desurl  = RegexHelper.Match(docHtml, "\"apiItemDesc\":\"([^\"]+?)\"");

            if (!string.IsNullOrEmpty(desurl))
            {
                desurl = desurl.Replace("\\", "");
                desc   = HtmlCls.GetHtmlByUrl(desurl);
                desc   = desc.Replace("var desc='", "").TrimEnd('\'');
            }
            return(desc);
        }
Example #9
0
        /// <summary>
        /// 根据列表页获取产品链接
        /// </summary>
        /// <param name="listUrl">列表链接</param>
        /// <param name="deepth">扫描深度</param>
        /// <returns></returns>
        public static IEnumerable <string> GetUrlListFromList(string listUrl, int deepth)
        {
            var listArea = new List <string>();

            try
            {
                listUrl = listUrl.TrimEnd('/');
                var docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8);
                if (string.IsNullOrEmpty(listUrl))
                {
                    return(listArea);
                }
                docHtml = RegexHelper.ClearTrn(docHtml);
                const string prolink =
                    "<div[^>]*class=\"globalProductName\">\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>";
                var page = 1;
                listArea = RegexHelper.Matches(docHtml, prolink);

                //分页处理
                int count = listArea.Count();
                while (count < deepth)
                {
                    var pstr = RegexHelper.Match(listUrl, @"p(\d+)?$");
                    if (!string.IsNullOrEmpty(pstr))
                    {
                        page = Convert.ToInt32(pstr);
                    }
                    page++;
                    listUrl = Regex.Replace(listUrl, @"p(\d+)$", "") + "p" + page;
                    docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8);
                    if (string.IsNullOrEmpty(docHtml))
                    {
                        break;
                    }
                    listArea.AddRange(RegexHelper.Matches(docHtml, prolink));
                    count = listArea.Count();
                }

                listArea = listArea.Take(deepth).ToList();
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(Err, ex);
            }
            return(listArea);
        }
Example #10
0
        /// <summary>
        /// 获取价格信息[market_price]、[price]
        /// </summary>
        /// <param name="rNum">002站点产品Id</param>
        /// <returns></returns>
        //public static JsonCls.JsonObject GetPriceInfo(string rNum)
        //{
        //    const string baseUrl = "http://www.redbaby.com.cn/catalog/category/getPriceInfo?ids={0}";
        //    string url = String.Format(baseUrl, rNum);
        //    string info = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8);
        //    if (!string.IsNullOrEmpty(info) && info != "[]")
        //    {
        //        JsonCls.JsonObject json = JsonCls.JsonConvert.DeserializeObject(info);
        //        //初始化
        //        JsonCls.JsonConvert.SetJson(new JsonCls.JsonObject());
        //        return (JsonCls.JsonObject)json[rNum];
        //    }
        //    return null;
        //}

        /// <summary>
        /// 判断四川是否有货
        /// </summary>
        /// <param name="proId">002站点产品ID</param>
        /// <returns></returns>
        public static bool CheckScStock(string proId)
        {
            const string baseUrl = "http://www.redbaby.com.cn/catalog/product/getStockInfo?id={0}";
            string       url     = String.Format(baseUrl, proId);
            string       info    = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8);

            info = Encoding.UTF8.GetString(Encoding.Default.GetBytes(info));
            if (!string.IsNullOrEmpty(info))
            {
                if (info.IndexOf("有货") > -1 && info.IndexOf("四川") > -1)
                {
                    return(true);
                }
                return(false);
            }
            return(false);
        }
Example #11
0
        private static IEnumerable <string> GetUrlsFromHtml(string url, out string next)
        {
            next = "";
            var urls = new List <string>();

            //tmall根据cookie不一样,前端显示也不一样。。
            const string cookie =
                "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=8eb29ff22cbe3bddcad34d264d01806f; passtime=1341280687588; isFirstOpen=false; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0";
            string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie);

            if (!string.IsNullOrEmpty(docHtml))
            {
                docHtml = RegexHelper.ClearBr(docHtml);
                next    =
                    Utils.UrlDecode(RegexHelper.Match(docHtml,
                                                      "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>"));
                var          listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList");
                const string regStr   = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-Img['\"][^>]*>";
                //"<a[^>]*class=['\"]product-title['\"][^>]*href=['\"]([^'\"]+)['\"][^>]*>";
                urls = RegexHelper.Matches(listHtml, regStr);
                urls = urls.Select(t => (t.StartsWith("/") ? BaseUrl : "") + Utils.UrlDecode(t)).ToList();
            }
            return(urls);
        }
Example #12
0
        public override int GetStockCode()
        {
            int code = -1;

            try
            {
                GetHtml(SiteEncoding);
                string stockUrl = RegexHelper.Match(DocHtml,
                                                    "(http://price.360buy.com/ows/stock/pshow-[a-zA-Z0-9]*.html)");
                if (string.IsNullOrEmpty(stockUrl))
                {
                    string skUid = RegexHelper.Match(DocHtml, "[\"']?((skuidkey)|(sid))[\"']?:\\s*[\"']([0-9a-zA-Z]+)[\"']", 4);
                    var    type  = RegexHelper.Match(DocHtml, "type:\\s*(\\d+)");
                    var    sUrl  = "";
                    if (type == "1")
                    {
                        //市级库存
                        sUrl =
                            "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=ststock&skuid=" +
                            skUid + "&provinceid=22&cityid=1930&areaid=1945";
                    }
                    else
                    {
                        sUrl =
                            "http://st.3.cn/gsi.html?callback=gSC&type=provincestock&skuid=" + skUid + "&provinceid=22";
                        //省级库存
                        //sUrl =
                        //    "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=provincestock&skuid=" +
                        //    skUid + "&provinceid=22";
                    }

                    string stockHtml = HtmlCls.GetHtmlByUrl(sUrl, SiteEncoding);
                    if (!string.IsNullOrEmpty(stockHtml))
                    {
                        var stockStr = RegexHelper.Match(stockHtml, "\"StockStateName\":\"([^\"]+)\"", 1);
                        if (stockStr == "有货")
                        {
                            return(1);
                        }
                        var scode =
                            (RegexHelper.Match(stockHtml,
                                               type == "1" ? "\"S\":\"1-(\\d+)-1-0-0\"" : "\"StockState\":(\\w+),"));
                        code = (scode == "33" ? 1 : 0);
                    }
                }
                else
                {
                    string stockHtml = HtmlCls.GetHtmlByUrl(stockUrl, SiteEncoding);
                    //源代码中有库存连接<script type="text/javascript" src="http://price.360buy.com/ows/stock/pshow-0F5D9F92C79383CED35C7903D3927987.html"></script>
                    //内容如下:var stockdata = [{"Wid":183192,"Rid":6,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":3,"Stock":33,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":10,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":4,"Stock":33,"Days":0,"Purchase":0,"IsPop":false}];
                    //js主要代码如下:var orgname = { 6: "北京仓", 3: "上海仓", 10: "广州仓", 4: "成都仓", 5: "武汉仓", 7: "南京仓", 8: "济南仓", 9: "沈阳仓" };
                    //var stockstatus = { 33: "现货", 34: "无货", 36: "预定", 39: "在途", 0: "统计中" };
                    if (!string.IsNullOrEmpty(stockHtml))
                    {
                        string stockCode = RegexHelper.Match(stockHtml, "\"Rid\":4,\"Stock\":([^,]+),"); //仅仅对成都仓库
                        code = (stockCode == "33" ? 1 : 0);
                    }
                }
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(ex);
            }
            return(code);
        }