예제 #1
0
 public override IEnumerable <string> GetUrlList(string listUrl)
 {
     try
     {
         using (var http = new HttpHelper(listUrl, SiteEncoding))
         {
             var html = http.GetHtml();
             var url  = http.GetRequestUrl();
             if (Regex.IsMatch(url, "^http://www.newegg.com.cn/Product/[0-9a-zA-Z\\-]+.htm$"))
             {
                 return new List <string> {
                            url
                 }
             }
             ;
             if (!html.IsNullOrEmpty())
             {
                 html = RegexHelper.ClearTrn(html);
             }
             var showList = HtmlCls.GetHtmlById(html, "itemGrid1");
             var list     =
                 RegexHelper.Matches(showList, "(http://www.newegg.com.cn/Product/[0-9a-zA-Z\\-]+.htm)").Distinct()
                 .ToList();
             return
                 (list.Where(t => !t.IsNullOrEmpty()).Select(
                      t => Utils.GetAbsoluteUrl(GetWebSiteInfo().BaseUrl, t)).ToList());
         }
     }
     catch (Exception)
     {
         return(new List <string>());
     }
 }
예제 #2
0
        public override string GetProName()
        {
            GetHtml(SiteEncoding);
            string area = HtmlCls.GetHtmlById(DocHtml, "name");

            return(RegexHelper.Match(area, "<h1>([^<]*)<"));
        }
예제 #3
0
        private static IEnumerable <TamllBase> GetTUrlsFromHtml(string url, out string next)
        {
            next = "";
            var urls = new List <TamllBase>();

            //tmall根据cookie不一样,前端显示也不一样。。
            const string cookie =
                "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=22291aea11e397a82512118642ac0abe; passtime=1341285069752; isFirstOpen=true";
            string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie);

            if (!string.IsNullOrEmpty(docHtml))
            {
                docHtml = RegexHelper.ClearBr(docHtml);
                next    =
                    Utils.UrlDecode(RegexHelper.Match(docHtml,
                                                      "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>"));
                var listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList");
                var list     = HtmlCls.GetHtmlByCss(listHtml, "product");
                //1:url,2:name
                const string regStr =
                    "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-title['\"][^>]*title=['\"]([^'\"]+?)['\"][^>]*>";
                //price
                const string priceReg =
                    "<span[^>]*class=['\"]product-normal['\"][^>]*title=['\"]([^'\"\\s]+)['\"][^>]*>";

                urls.AddRange(list.Select(item => new TamllBase
                {
                    Url   = Utils.UrlDecode(RegexHelper.Match(item, regStr, 1)),
                    Title = RegexHelper.Match(item, regStr, 2),
                    Price = Convert.ToDecimal(RegexHelper.Match(item, priceReg))
                }));
            }
            return(urls);
        }
예제 #4
0
        /// <summary>
        /// 获取描述
        /// </summary>
        /// <param name="docHtml"></param>
        /// <param name="sanId"></param>
        /// <returns></returns>
        public static string GetProDesc(string docHtml, string sanId)
        {
            var desc = "";
            var area = HtmlCls.GetHtmlByCss(docHtml, "detailBox");

            if (area.Count() > 0)
            {
                desc = area.Aggregate("", (current, t) => current + t);

                //图片居然单独一个请求

                const string imgUrl = "http://www.sanfo.com/shop/product.info.asp?command=findthumb&vid={0}";

                var imgs = HtmlCls.GetHtmlByUrl(String.Format(imgUrl, sanId), Encoding.UTF8);

                desc = Regex.Replace(desc, "<dt class=\"detailImg\" id=\"item_product_images\"></dt>",
                                     "<dt class=\"detailImg\" id=\"item_product_images\">" + imgs + "</dt>");

                //排除a标签
                desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", "$1");
                //排除script标签
                desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", "");
                //清除样式
                desc = Regex.Replace(desc, "(\\s*class=\"[^\"]+\")|(\\s*style=\"[^\"]+\")", "");

                //替换成绝对路径
                desc = Regex.Replace(desc, "src=\"(/[^\"]+)\"", "src=\"" + SanfoUrl + "$1\"");

                //替换三夫
                desc = Regex.Replace(desc, "(三夫(户外?)?)", "本商城");
            }
            return(desc);
        }
예제 #5
0
 public override IEnumerable <string> SearchWord(string word)
 {
     try
     {
         string searchUrl = GetWebSiteInfo().BaseUrl +
                            "/s/ref=nb_sb_noss_1?__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99&url=search-alias%3Daps&field-keywords={0}";
         var url = String.Format(searchUrl, Utils.UrlEncode(word, SiteEncoding));
         using (var http = new HttpHelper(url, SiteEncoding))
         {
             var html = http.GetHtml();
             if (!html.IsNullOrEmpty())
             {
                 html = RegexHelper.ClearTrn(html);
             }
             var showList = HtmlCls.GetHtmlById(html, "atfResults") + HtmlCls.GetHtmlById(html, "btfResults");
             var list     =
                 HtmlCls.GetHtmlByCss(showList, "productImage").Select(
                     t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct().
                 ToList();
             return(list);
         }
     }
     catch (Exception ex)
     {
         FileHelper.WriteException(ex);
         return(new List <string>());
     }
 }
예제 #6
0
        /// <summary>
        /// 获取首图
        /// </summary>
        /// <param name="docHtml"></param>
        /// <returns></returns>
        public static string GetBigPic(string docHtml)
        {
            string area = HtmlCls.GetHtmlById(docHtml, "spec-n1");
            string src  = RegexHelper.Match(area, "<img[^>]*src=['\"]([^'\"]*)['\"][^>]*>");

            return(src);
        }
예제 #7
0
        private static IEnumerable <string> GetUrlsFromHtml(string url)
        {
            var urlList = new List <string>();
            //int ver = GetListUrlVersion(url);
            int    ver     = 0;                                       //有些图书html样式居然不一样~
            string docHtml = HtmlCls.GetHtmlByUrl(url, SiteEncoding); //HtmlCls.GetHtmlByUrl(url, _useProxy);)

            if (!string.IsNullOrEmpty(docHtml))
            {
                docHtml = RegexHelper.ClearTrn(docHtml);
                var cssName  = "p-img";
                var listHtml = HtmlCls.GetHtmlById(docHtml, "plist");
                if (listHtml.IsNullOrEmpty())
                {
                    cssName  = "i-img";
                    listHtml = HtmlCls.GetHtmlByCss(docHtml, "list-h").FirstOrDefault();
                }
                var list =
                    HtmlCls.GetHtmlByCss(listHtml, cssName).Select(
                        t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>#]+)(#[^\"'>]*)?[\"']?[^>]*>")).Distinct().
                    ToList();
                return(list);
            }
            return(urlList);
        }
예제 #8
0
        /// <summary>
        /// 获取大图html
        /// </summary>
        /// <param name="jdNum"></param>
        /// <returns></returns>
        public static string GetProBigPics(string jdNum)
        {
            string picArea = "";

            try
            {
                const string bigUrl  = BaseUrl + "/bigimage.aspx?id={0}";
                string       url     = String.Format(bigUrl, jdNum);
                string       picHtml = HtmlCls.GetHtmlByUrl(url);
                if (!string.IsNullOrEmpty(picHtml))
                {
                    string biger   = HtmlCls.GetHtmlByCss(picHtml, "right").FirstOrDefault();
                    var    bigList = RegexHelper.Matches(biger, "http://img10.360buyimg.com/n5([^'\"]*)");
                    if (bigList.Count() > 0)
                    {
                        picArea =
                            "<table width=\"750\" align=\"center\" border=\"0\" cellSpacing=\"0\" cellPadding=\"0\">";
                        picArea =
                            bigList.Aggregate(picArea,
                                              (current, s) =>
                                              current + "<tr><td><img src=\"http://img10.360buyimg.com/n0" + s +
                                              "\" /></td></tr>");
                        picArea += "</table>";
                    }
                }
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(ex);
            }
            return(picArea);
        }
예제 #9
0
        public override string GetProPic()
        {
            GetHtml(SiteEncoding);
            string area = HtmlCls.GetHtmlById(DocHtml, "spec-n1");
            string src  = RegexHelper.Match(area, "<img[^>]*src=['\"]([^'\"]*)['\"][^>]*>");

            return(src);
        }
예제 #10
0
        /// <summary>
        /// 根据商品编号获取链接(合作方式变更)
        /// 不能提供分类编号,只能用强大的搜索功能了~
        /// </summary>
        /// <param name="num">商品编号</param>
        /// <returns></returns>
        public static string GetProUrlFromNum(string num)
        {
            string       searchUrl = RedBabyUrl + "/search?keyword=" + num;
            string       html      = HtmlCls.GetHtmlByUrl(searchUrl, Encoding.UTF8);
            const string proReg    =
                "<div[^>]*class=\"globalProductName\"[^>]*>\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>";

            return(Utils.GetAbsoluteUrl(RedBabyUrl, RegexHelper.Match(html, proReg)));
        }
예제 #11
0
        public static string GetPackingList(string docHtml)
        {
            string area = HtmlCls.GetHtmlById(docHtml, "bzqd");

            if (!string.IsNullOrEmpty(area))
            {
                return(RegexHelper.Match(area, "<[^>]*>([^<]+)<[^>]*>").Trim());
            }
            return("");
        }
예제 #12
0
        public static string GetBrandName(string docHtml)
        {
            string area = HtmlCls.GetHtmlById(docHtml, "i-detail");

            if (!string.IsNullOrEmpty(area))
            {
                return(RegexHelper.Match(area, "<li[^>]*>生产厂家:<a[^>]*brand[^>]*>([^<]+)</a>").Trim());
            }
            return("");
        }
예제 #13
0
 public override string GetProPic()
 {
     try
     {
         GetHtml(SiteEncoding);
         var pic = HtmlCls.GetHtmlById(DocHtml, "bgPics");
         return(RegexHelper.Match(pic, "\\s+src=[\"']([^\"'>]+)[\"']"));
     }
     catch (Exception)
     {
         return("");
     }
 }
예제 #14
0
 public override string GetProName()
 {
     try
     {
         GetHtml(SiteEncoding);
         var name = HtmlCls.GetHtmlById(DocHtml, "title-descript");
         return(Regex.Replace(name, "</?[0-9a-zA-Z]+[^>]*>", "").Trim());
     }
     catch (Exception)
     {
         return("");
     }
 }
예제 #15
0
        private static IEnumerable <string> GetUrlsFromHtml(string url)
        {
            var    urls    = new List <string>();
            string docHtml = HtmlCls.GetHtmlByUrl(url);

            if (!string.IsNullOrEmpty(docHtml))
            {
                var          listHtml = HtmlCls.GetHtmlById(docHtml, "Id_prodItemList");
                const string regStr   = "<div[^>]*class=['\"]proPic['\"][^>]*><a[^>]*href=['\"]([^'\"]+)['\"][^>]*>";
                urls = RegexHelper.Matches(listHtml, regStr);
                urls = urls.Select(t => (t.StartsWith("/") ? SanfoUrl + t : t)).ToList();
            }
            return(urls);
        }
예제 #16
0
        /// <summary>
        /// 获取成都地区库存Code
        /// </summary>
        /// <param name="docHtml">页面html</param>
        /// <returns>-1,未知;0,缺货;1,有货</returns>
        public static int GetCdStockCode(string docHtml)
        {
            int code = -1;

            try
            {
                docHtml = RegexHelper.ClearBr(docHtml);
                string stockUrl = RegexHelper.Match(docHtml,
                                                    "(http://price.360buy.com/ows/stock/pshow-[a-zA-Z0-9]*.html)");
                if (string.IsNullOrEmpty(stockUrl))
                {
                    string skUid = RegexHelper.Match(docHtml, "wareinfo.*sid[^\"]*\"([0-9a-zA-Z]*)\"");

                    //省级库存
                    string purl =
                        "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=provincestock&skuid=" +
                        skUid + "&provinceid=22";

                    //市级库存
                    //string url =
                    //    "http://price.360buy.com/stocksoa/StockHandler.ashx?callback=getProvinceStockCallback&type=pcstock&skuid=" +
                    //    skUid + "&provinceid=22&cityid=1930";
                    string stockHtml = HtmlCls.GetHtmlByUrl(purl, SiteEncoding);
                    if (!string.IsNullOrEmpty(stockHtml))
                    {
                        string stockCode = RegexHelper.Match(stockHtml, "\"StockState\":(\\w+),");
                        code = (stockCode == "33" ? 1 : 0);
                    }
                }
                else
                {
                    string stockHtml = HtmlCls.GetHtmlByUrl(stockUrl, SiteEncoding);
                    //源代码中有库存连接<script type="text/javascript" src="http://price.360buy.com/ows/stock/pshow-0F5D9F92C79383CED35C7903D3927987.html"></script>
                    //内容如下:var stockdata = [{"Wid":183192,"Rid":6,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":3,"Stock":33,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":10,"Stock":34,"Days":0,"Purchase":0,"IsPop":false},{"Wid":183192,"Rid":4,"Stock":33,"Days":0,"Purchase":0,"IsPop":false}];
                    //js主要代码如下:var orgname = { 6: "北京仓", 3: "上海仓", 10: "广州仓", 4: "成都仓", 5: "武汉仓", 7: "南京仓", 8: "济南仓", 9: "沈阳仓" };
                    //var stockstatus = { 33: "现货", 34: "无货", 36: "预定", 39: "在途", 0: "统计中" };
                    if (!string.IsNullOrEmpty(stockHtml))
                    {
                        string stockCode = RegexHelper.Match(stockHtml, "\"Rid\":4,\"Stock\":([^,]+),"); //仅仅对成都仓库
                        code = (stockCode == "33" ? 1 : 0);
                    }
                }
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(ex);
            }
            return(code);
        }
예제 #17
0
 public override string GetProPic()
 {
     try
     {
         GetHtml(SiteEncoding);
         var str = HtmlCls.GetHtmlById(DocHtml, "prodImageCell");
         str = RegexHelper.Match(str, "\\s+src=[\"']([^\"'>]+)[\"']");
         return(str);
     }
     catch (Exception ex)
     {
         FileHelper.WriteException(ex);
         return("");
     }
 }
예제 #18
0
        public static string GetProDesc(string link)
        {
            //id=J_DivItemDesc
            var desc    = "";
            var docHtml = HtmlCls.GetHtmlByUrl(link);
            var desurl  = RegexHelper.Match(docHtml, "\"apiItemDesc\":\"([^\"]+?)\"");

            if (!string.IsNullOrEmpty(desurl))
            {
                desurl = desurl.Replace("\\", "");
                desc   = HtmlCls.GetHtmlByUrl(desurl);
                desc   = desc.Replace("var desc='", "").TrimEnd('\'');
            }
            return(desc);
        }
예제 #19
0
 public override string GetProPic()
 {
     try
     {
         GetHtml(SiteEncoding);
         var str = HtmlCls.GetHtmlById(DocHtml, "midImg");
         str = HtmlCls.GetAttrValue(str, "src340");
         return(Regex.Replace(str, "\\?.*$", ""));
     }
     catch (Exception ex)
     {
         FileHelper.WriteException(ex);
         return("");
     }
 }
예제 #20
0
 public override string GetProName()
 {
     try
     {
         GetHtml(SiteEncoding);
         var str = HtmlCls.GetHtmlById(DocHtml, "btAsinTitle");
         str = Regex.Replace(str, "</?[0-9a-zA-Z]+[^>]*>", "");
         return(str);
     }
     catch (Exception ex)
     {
         FileHelper.WriteException(ex);
         return("");
     }
 }
예제 #21
0
        /// <summary>
        /// 产品描述
        /// </summary>
        /// <param name="docHtml"></param>
        /// <returns></returns>
        public static string GetDescFromHtml(string docHtml)
        {
            string desc = HtmlCls.GetHtmlById(docHtml, "productDescription");

            if (!string.IsNullOrEmpty(desc))
            {
                desc = Regex.Replace(desc, @"红孩子母婴商城|红孩子", "本商场");
                desc = desc.Replace("id=\"productDescription\"", "");                           //排除样式冲突
                //area = Regex.Replace(area, "class=['\"][^'\"]*['\"]", "");//排除样式冲突.终极
                desc = Regex.Replace(desc, "<a[^]*href=[\"|'][^'\"]*[\"'][^>]*>(.*?)</a>", ""); //排除a标签
                desc = Regex.Replace(desc, "<script[^>]*>[^<]*</script>", "");                  //排除script标签
                desc = Regex.Replace(desc, "src\\d=", "src=");                                  //显示src
            }
            return(desc);
        }
예제 #22
0
        public static string GetAftersaleService(string docHtml)
        {
            string area = HtmlCls.GetHtmlById(docHtml, "detail");

            if (!string.IsNullOrEmpty(area))
            {
                var list = HtmlCls.GetHtmlByCss(area, "mc tabcon hide").ToList();
                if (list.Count() >= 3)
                {
                    return(RegexHelper.Match(list[2], "<[^>]*>([^<]+)<[^>]*>").Trim());
                }
                return("");
            }
            return("");
        }
예제 #23
0
        /// <summary>
        /// 获取价格信息[market_price]、[price]
        /// </summary>
        /// <param name="rNum">002站点产品Id</param>
        /// <returns></returns>
        //public static JsonCls.JsonObject GetPriceInfo(string rNum)
        //{
        //    const string baseUrl = "http://www.redbaby.com.cn/catalog/category/getPriceInfo?ids={0}";
        //    string url = String.Format(baseUrl, rNum);
        //    string info = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8);
        //    if (!string.IsNullOrEmpty(info) && info != "[]")
        //    {
        //        JsonCls.JsonObject json = JsonCls.JsonConvert.DeserializeObject(info);
        //        //初始化
        //        JsonCls.JsonConvert.SetJson(new JsonCls.JsonObject());
        //        return (JsonCls.JsonObject)json[rNum];
        //    }
        //    return null;
        //}

        /// <summary>
        /// 判断四川是否有货
        /// </summary>
        /// <param name="proId">002站点产品ID</param>
        /// <returns></returns>
        public static bool CheckScStock(string proId)
        {
            const string baseUrl = "http://www.redbaby.com.cn/catalog/product/getStockInfo?id={0}";
            string       url     = String.Format(baseUrl, proId);
            string       info    = HtmlCls.GetHtmlByUrl(url, Encoding.UTF8);

            info = Encoding.UTF8.GetString(Encoding.Default.GetBytes(info));
            if (!string.IsNullOrEmpty(info))
            {
                if (info.IndexOf("有货") > -1 && info.IndexOf("四川") > -1)
                {
                    return(true);
                }
                return(false);
            }
            return(false);
        }
예제 #24
0
        /// <summary>
        /// 根据列表页获取产品链接
        /// </summary>
        /// <param name="listUrl">列表链接</param>
        /// <param name="deepth">扫描深度</param>
        /// <returns></returns>
        public static IEnumerable <string> GetUrlListFromList(string listUrl, int deepth)
        {
            var listArea = new List <string>();

            try
            {
                listUrl = listUrl.TrimEnd('/');
                var docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8);
                if (string.IsNullOrEmpty(listUrl))
                {
                    return(listArea);
                }
                docHtml = RegexHelper.ClearTrn(docHtml);
                const string prolink =
                    "<div[^>]*class=\"globalProductName\">\\s*<a[^>]*href=['|\"]([^'\"]*)['|\"][^>]*>";
                var page = 1;
                listArea = RegexHelper.Matches(docHtml, prolink);

                //分页处理
                int count = listArea.Count();
                while (count < deepth)
                {
                    var pstr = RegexHelper.Match(listUrl, @"p(\d+)?$");
                    if (!string.IsNullOrEmpty(pstr))
                    {
                        page = Convert.ToInt32(pstr);
                    }
                    page++;
                    listUrl = Regex.Replace(listUrl, @"p(\d+)$", "") + "p" + page;
                    docHtml = HtmlCls.GetHtmlByUrl(listUrl, Encoding.UTF8);
                    if (string.IsNullOrEmpty(docHtml))
                    {
                        break;
                    }
                    listArea.AddRange(RegexHelper.Matches(docHtml, prolink));
                    count = listArea.Count();
                }

                listArea = listArea.Take(deepth).ToList();
            }
            catch (Exception ex)
            {
                FileHelper.WriteException(Err, ex);
            }
            return(listArea);
        }
예제 #25
0
 public override int GetStockCode()
 {
     try
     {
         GetHtml(SiteEncoding);
         var stock = HtmlCls.GetHtmlByCss(DocHtml, "availGreen").FirstOrDefault();
         if (stock.IsNullOrEmpty())
         {
             return(0);
         }
         return(1);
     }
     catch (Exception ex)
     {
         FileHelper.WriteException(ex);
         return(-1);
     }
 }
예제 #26
0
 public override IEnumerable <string> GetUrlList(string listUrl)
 {
     try
     {
         using (var http = new HttpHelper(listUrl, SiteEncoding))
         {
             var html = http.GetHtml();
             html = RegexHelper.ClearTrn(html);
             var showList = HtmlCls.GetHtmlById(html, "proShow");
             var linkReg  = "<a[^>]*href=[\"']?(" + GetWebSiteInfo().BaseUrl +
                            "/emall/prd_\\d+_\\d+_-\\d+_\\d+_.html)[\"']?[^>]*>";
             var list = RegexHelper.Matches(showList, linkReg).Distinct().ToList();
             return(list);
         }
     }
     catch (Exception ex)
     {
         FileHelper.WriteException(ex);
         return(new List <string>());
     }
 }
예제 #27
0
 public override int GetStockCode()
 {
     try
     {
         GetHtml(SiteEncoding);
         var proNum = HtmlCls.GetHtmlByCss(DocHtml, "prodNum").ToList()[1];
         proNum = Regex.Replace(proNum, "</?[0-9a-zA-Z]+[^>]*>", "").Replace("商品编号:", "").Trim();
         var stockUrl = GetWebSiteInfo().BaseUrl + "/ec/homeus/browse/exactMethod.jsp?goodsNo={0}&city=71010000";
         stockUrl = String.Format(stockUrl, proNum);
         using (var http = new HttpHelper(stockUrl, SiteEncoding))
         {
             var html = http.GetHtml();
             var str  = RegexHelper.Match(html, "\"result\":\"([a-zA-Z])\"");
             return(str == "Y" ? 1 : 0);
         }
     }
     catch (Exception)
     {
         return(-1);
     }
 }
예제 #28
0
        public static decimal GetMarketerPrice(string docHtml)
        {
            decimal mprice;

            try
            {
                string str = HtmlCls.GetHtmlById(docHtml, "summary");
                str = RegexHelper.Match(str, "<del>¥([^<]+)</del>");
                if (string.IsNullOrEmpty(str))
                {
                    str = HtmlCls.GetHtmlById(docHtml, "book-price");
                    str = RegexHelper.Match(str, "<del>¥([^<]+)</del>");
                }
                mprice = decimal.Parse(str.Replace(",", ""));
            }
            catch (Exception)
            {
                mprice = 0;
            }
            return(mprice > 100 ? Math.Round(mprice, 0) : Math.Round(mprice, 1));
        }
예제 #29
0
        private static IEnumerable <string> GetUrlsFromHtml(string url, out string next)
        {
            next = "";
            var urls = new List <string>();

            //tmall根据cookie不一样,前端显示也不一样。。
            const string cookie =
                "x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; cna=qMo3B45XYmoCAct2enaIrZoT; t=9bfd6b376a1f1e450056f0e1b1c54240; tracknick=luoyong87610; mpp=t%3D0%26m%3D%26h%3D0%26l%3D0; uc1=x; cookie2=8eb29ff22cbe3bddcad34d264d01806f; passtime=1341280687588; isFirstOpen=false; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0";
            string docHtml = HtmlCls.GetHtmlByUrl(url, Encoding.Default, cookie);

            if (!string.IsNullOrEmpty(docHtml))
            {
                docHtml = RegexHelper.ClearBr(docHtml);
                next    =
                    Utils.UrlDecode(RegexHelper.Match(docHtml,
                                                      "<a[^>]*href=['\"]([^'\"\\s]+)['\"][^>]*class=['\"]ui-page-s-next['\"][^>]*>"));
                var          listHtml = HtmlCls.GetHtmlById(docHtml, "J_itemList");
                const string regStr   = "<a[^>]*href=['\"]([^'\"\\s]+?)['\"][^>]*class=['\"]product-Img['\"][^>]*>";
                //"<a[^>]*class=['\"]product-title['\"][^>]*href=['\"]([^'\"]+)['\"][^>]*>";
                urls = RegexHelper.Matches(listHtml, regStr);
                urls = urls.Select(t => (t.StartsWith("/") ? BaseUrl : "") + Utils.UrlDecode(t)).ToList();
            }
            return(urls);
        }
예제 #30
0
 public override IEnumerable <string> GetUrlList(string listUrl)
 {
     try
     {
         using (var http = new HttpHelper(listUrl, SiteEncoding))
         {
             var html = http.GetHtml();
             html = (!html.IsNullOrEmpty() ? RegexHelper.ClearTrn(html) : http.GetHtml());
             if (html.IsNullOrEmpty())
             {
                 return(new List <string>());
             }
             var showList = HtmlCls.GetHtmlByCss(html, "pic");
             var list     =
                 showList.Select(t => RegexHelper.Match(t, "<a[^>]*href=[\"']?([^\"'>;]+)(;[^\"'>]*)?[\"']?[^>]*>"))
                 .Distinct().ToList();
             return(list.Where(t => !t.IsNullOrEmpty()).Select(t => Utils.GetAbsoluteUrl(GetWebSiteInfo().BaseUrl, t)).ToList());
         }
     }
     catch (Exception)
     {
         return(new List <string>());
     }
 }