Example #1
0
        public void SaveAllSiteClass()
        {
            HasBindClasslist = new SiteClassInfoDB().getAllSiteCatInfo(Baseinfo.SiteId);
            string url = "http://www.j1.com/sitemap.html";
            string page = HtmlAnalysis.Gethtmlcode(url);
            string content = RegGroupsX<string>(page, "<div class=\"sitemap_sortwrap qbfl\">(?<x>.*?)</div>");
            var list = RegGroupCollection(content, "<a target='_blank' href='(?<y>.*?)'>(?<x>.*?)</a>");
            for (int i = 0; i < list.Count; i++)
            {
                string catUrl = list[i].Groups["y"].Value;
                string catid = RegGroupsX<string>(catUrl, "http://www.j1.com/p-(?<x>\\d+)");
                string catName = list[i].Groups["x"].Value;
                if (!HasBindClasslist.Exists(p => p.ClassId == catid))
                {
                    SiteClassInfo cat = new SiteClassInfo
                    {
                        ParentUrl = "",
                        ParentClass = "",
                        ParentName = "",
                        TotalProduct = 0,
                        Urlinfo = catUrl,
                        ClassId = catid,
                        UpdateTime = DateTime.Now,
                        IsDel = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        IsBind = false,
                        IsHide = false,
                        ClassName = catName,
                        SiteId = Baseinfo.SiteId,
                        ClassCrumble = "",

                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(cat);
                    shopClasslist.Add(cat);
                }
                if (shopClasslist.Count > 0)
                {
                    new SiteClassInfoDB().AddSiteClass(shopClasslist);
                    shopClasslist.Clear();
                }
            }
        }
Example #2
0
        private void GetYhdClassInfo(string url)
        {
            if (!url.Contains("b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k/"))
                url += "b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k/";
            string pageInfo = Gethtmlcode(url);

            string classInfo = RegGroupsX<string>(pageInfo, "<ul class=\"listCon clearfix\">(?<x>.*?)<a class=\"c_btn c_next iconSearch\"");

            var list = RegGroupCollection(classInfo, "href=\"(?<x>.*?)\".*?title=\"(?<y>.*?)\"");
            if (list == null)
                return;

            string pcatUrl = "";
            string pcatName = "";
            string pcatId = "";
            string classCrumble = "";
            int total = RegGroupsX<int>(pageInfo, "共(?<x>\\d+?)条");

            if (list.Count == 1)
            {
                string categoryname = RegGroupsX<string>(pageInfo, "var categoryName = '(?<x>.*?)'");
                string extid = RegGroupsX<string>(pageInfo, "var expectCategoryId = \"(?<x>\\d+)\"");
                string tempcurcatid = "c" + extid + "-" + categoryname;
                if(HasBindClasslist.Exists(p => p.ClassId == tempcurcatid))
                    return;

                string current = RegGroupsX<string>(pageInfo, "<title>(?<x>.*?)品种齐全|<div class=\"guide_title\"><span title=\"(?<x>.*?)\">");
                SiteClassInfo catInfo = new SiteClassInfo
                {
                    Urlinfo = $"http://list.yhd.com/{tempcurcatid}/b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k",
                    ClassId = tempcurcatid,
                    ClassName = current,
                    BindClassId = 0,
                    BindClassName = "",
                    CreateDate = DateTime.Now,
                    UpdateTime = DateTime.Now,
                    SiteId = Baseinfo.SiteId,
                    IsBind = false,
                    IsDel = false,
                    IsHide = false
                };

                catInfo.HasChild = HasBindClasslist.Exists(p => p.ParentClass == catInfo.ClassId);
                catInfo.ParentClass = pcatId;
                catInfo.ParentName = pcatName;
                catInfo.ParentUrl = pcatUrl;
                if (regIsMatch(tempcurcatid, "^(?<x>c\\d+-\\d+(-\\d+)?)$"))
                {

                    if (!HasBindClasslist.Exists(p => p.ClassId == catInfo.ClassId))
                    {
                        new SiteClassInfoDB().AddSiteClass(catInfo);
                        LogServer.WriteLog("线程id:" + Thread.CurrentThread.ManagedThreadId + "\t" + catInfo.ClassId + "\t" + catInfo.ClassName + "111111111111111", "addpro");
                        HasBindClasslist.Add(catInfo);
                    }

                }
                return;
            }

            //SiteClassInfoDB db = new SiteClassInfoDB();
            for (int i = 0; i < list.Count; i++)
            {
                SiteClassInfo catInfo = new SiteClassInfo();
                catInfo.Urlinfo = list[i].Groups["x"].Value;
                catInfo.ClassId = RegGroupsX<string>(catInfo.Urlinfo, "http://list.yhd.com/(?<x>.*?)/");
                catInfo.ClassName = list[i].Groups["y"].Value;
                if (catInfo.Urlinfo == null || catInfo.ClassId == null || catInfo.ClassName == null)
                {
                    LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1url\t" + url, "AddClassError");
                    continue;
                }

                if (!HasBindClasslist.Exists(p => p.ClassId == catInfo.ClassId) )
                {
                    catInfo.ParentClass = pcatId;
                    catInfo.ParentName = pcatName;
                    catInfo.ParentUrl = pcatUrl;
                    if (i != 0 && pcatId!="")
                        classCrumble += pcatId + ",";

                    catInfo.ClassCrumble = classCrumble.TrimEnd(',');
                    catInfo.BindClassId = 0;
                    catInfo.BindClassName = "";
                    catInfo.CreateDate = DateTime.Now;
                    catInfo.UpdateTime = DateTime.Now;
                    catInfo.SiteId = Baseinfo.SiteId;
                    catInfo.IsBind = false;
                    catInfo.IsDel = false;
                    catInfo.IsHide = false;
                    catInfo.HasChild = true;
                    if( list.Count - 1 == i)
                    {
                        catInfo.HasChild = false;
                        catInfo.TotalProduct = total;
                    }
                    HasBindClasslist.Add(catInfo);
                    LogServer.WriteLog("线程id:"+Thread.CurrentThread.ManagedThreadId+ "\t" + catInfo.ClassId +"\t"+catInfo.ClassName+"111111111111111","addpro");
                    new SiteClassInfoDB().AddSiteClass(catInfo);

                }

                pcatUrl = catInfo.Urlinfo;
                pcatName = catInfo.ClassName;
                pcatId = catInfo.ClassId;
            }
        }
Example #3
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string pageinfo = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            string crumb = RegGroupsX<string>(pageinfo, "<div class=detailnav>(?<x>.*?)</div>");
            if (crumb == null)
                return;
            var deep = RegGroupCollection(crumb, "href=\"(?<y>.*?)\"( target=\"_blank\")?>(?<x>.*?)</a>");
            string parentUrl = "";
            string parentName = "";
            string parentId = "";
            for (int i = 0; i < deep.Count; i++)
            {
                if (deep[i].Groups["x"].Value.Contains("首页") || deep[i].Groups["x"].Value == siteClassInfo.ClassName)
                    continue;
                parentUrl = deep[i].Groups["y"].Value;
                parentName = deep[i].Groups["x"].Value;
                parentId = RegGroupsX<string>(parentUrl, "http://www.j1.com/p-(?<x>\\d+)");
            }
            string children = RegGroupsX<string>(pageinfo, "<div class=\"listpageChooseBox\">(?<x>.*?)</div>");
            var catlist = RegGroupCollection(children, "<a href=\"(?<y>.*?)\">(?<x>.*?)<span>");

            siteClassInfo.HasChild = !children.Contains(siteClassInfo.ClassName);

            if (catlist != null)
            {
                for (int i = 0; i < catlist.Count; i++)
                {
                    string url = catlist[i].Groups["y"].Value;
                    string catid = RegGroupsX<string>(url, "http://www.j1.com/p-(?<x>\\d+)");
                    if (!HasBindClasslist.Exists(p => p.ClassId == catid))
                    {
                        string catName = catlist[i].Groups["x"].Value;
                        SiteClassInfo cat = new SiteClassInfo
                        {
                            ParentUrl = "",
                            ParentClass = "",
                            ParentName = "",
                            TotalProduct = 0,
                            Urlinfo = url,
                            ClassId = catid,
                            UpdateTime = DateTime.Now,
                            IsDel = false,
                            BindClassId = 0,
                            BindClassName = "",
                            HasChild = true,
                            IsBind = false,
                            IsHide = false,
                            ClassName = catName,
                            SiteId = Baseinfo.SiteId,
                            ClassCrumble = "",

                            CreateDate = DateTime.Now
                        };
                        HasBindClasslist.Add(cat);
                        shopClasslist.Add(cat);
                    }
                }
            }

            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }

            siteClassInfo.ParentUrl = parentUrl;
            siteClassInfo.ParentClass = parentId;
            siteClassInfo.ParentName = parentName;
            siteClassInfo.TotalProduct = RegGroupsX<int>(pageinfo, "共(?<x>\\d+)个商品");
            siteClassInfo.UpdateTime = DateTime.Now;
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string page = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            string cromb = RegGroupsX<string>(page, "您现在的位置:</span>(?<x>.*?)</div>");
            if (cromb == null)
                return;
            var plist = RegGroupCollection(cromb, "<a class=\"\" href=\"(?<x>.*?)\">(?<y>.*?)</a>");
            if (plist == null)
                return;
            string parentUrl = "";
            string parentName = "";
            string parentId = "";
            foreach (Match item in plist)
            {

                parentUrl = item.Groups["x"].Value;
                parentName = item.Groups["y"].Value;
                if(parentName=="首页")
                {
                    continue;
                }
                if (!string.IsNullOrEmpty(parentName))
                {
                    parentName = parentName.Trim();
                }
                if (parentName == "")
                { parentUrl = ""; continue; }
                parentId = RegGroupsX<string>(parentUrl, "category/(?<x>\\d+)-");
                if (!ValidCatId(parentId))
                {
                    parentId = RegGroupsX<string>(parentUrl, "/(?<x>.*?).html");
                    if (string.IsNullOrEmpty(parentId))
                        continue;
                }

                parentUrl = string.Format(domain+"{0}", parentUrl);
                if (!HasBindClasslist.Exists(c => c.ClassId == parentId))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = parentName,
                        ClassId = parentId,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = parentUrl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }

            }

            var templist = RegGroupCollection(page, "getJSON\\(\"(?<x>.*?)\"");
            for (int i = 0; i < templist.Count; i++)
            {
                var caturl = templist[i].Groups["x"].Value;
                string temppage = HtmlAnalysis.Gethtmlcode(domain+ caturl);
                var catlist = RegGroupCollection(temppage, "n_(?<x>.*?)\"EntityState");
                if (catlist == null)
                    continue;
                foreach (Match item in catlist)
                {
                    string cat = item.Groups["x"].Value;

                    string catid = RegGroupsX<string>(cat, "id\":(?<x>\\d+),");
                    string catName = RegGroupsX<string>(cat, "\"n_name\":\"(?<x>.*?)\"");
                    string catpid = RegGroupsX<string>(cat, "\"parentid\":(?<x>.*?),");
                    string tempurl = string.Format("http://www.360kxr.com/category/{0}-0-2-1-15-1.html", catid);
                    if (!HasBindClasslist.Exists(c => c.ClassId == catid))
                    {
                        SiteClassInfo iteminfo = new SiteClassInfo
                        {
                            ParentClass = catpid,
                            ParentName = "",
                            ClassName = catName,
                            ClassId = catid,
                            ParentUrl = "",

                            IsDel = false,
                            IsBind = false,
                            IsHide = false,
                            BindClassId = 0,
                            BindClassName = "",
                            HasChild = true,
                            ClassCrumble = "",
                            TotalProduct = 0,
                            SiteId = Baseinfo.SiteId,
                            Urlinfo = tempurl,
                            UpdateTime = DateTime.Now,
                            CreateDate = DateTime.Now
                        };
                        HasBindClasslist.Add(iteminfo);
                        shopClasslist.Add(iteminfo);
                    }

                }

            }

            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }

            siteClassInfo.HasChild = HasBindClasslist.Exists(c => c.ParentClass == siteClassInfo.ClassId);
            siteClassInfo.ParentClass = parentId;
            siteClassInfo.ParentName = parentName;
            siteClassInfo.ParentUrl = parentUrl;
            siteClassInfo.UpdateTime = DateTime.Now;
            siteClassInfo.TotalProduct = RegGroupsX<int>(page, "<div class=\"goods-total\">共<b>(?<x>\\d+)</b>个商品</div>");
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
Example #5
0
        public bool updateSpiderOnly(SiteClassInfo catinfo)
        {
            if (catinfo == null) throw new ArgumentNullException("catinfo");
            using (var db = _dbFactory.OpenDbConnection())
            {
                try
                {
                    //db.UpdateNonDefaults(new SiteClassInfo { Id = catinfo.Id, ClassName = catinfo.ClassName, ClassId = catinfo.ClassId, ParentClass = catinfo.ParentClass, ParentName = catinfo.ParentName, ParentUrl = catinfo.ParentUrl, UpdateTime = DateTime.Now, HasChild = catinfo.HasChild, Urlinfo = catinfo.Urlinfo, ClassCrumble = catinfo.ClassCrumble }, p => p.Id == catinfo.Id);
                    int res = db.UpdateOnly(catinfo,
                        u => new { u.ClassId, u.ClassName, u.Urlinfo, u.UpdateTime, u.ParentClass, u.ParentUrl, u.ParentName, u.ClassCrumble, u.TotalProduct, u.HasChild }, u => u.Id == catinfo.Id);
                    if (res > 0)
                        return true;
                    return false;
                }
                catch (Exception ex)
                {
                    LogServer.WriteLog(ex, "DBError");
                    return false;
                }
                //int res = db.Update(catinfo);
                //db.Update(catinfo, p => p.ClassId == catinfo.ClassId);

            }
        }
Example #6
0
        public void SetIsDel(SiteClassInfo catinfo)
        {
            if (catinfo == null) throw new ArgumentNullException("catinfo");
            using (var db = _dbFactory.OpenDbConnection())
            {
                try
                {
                    db.UpdateOnly(new SiteClassInfo{IsDel=true}, p => p.IsDel,p=>p.Id==catinfo.Id);
                }
                catch (Exception ex)
                {
                    LogServer.WriteLog(ex, "DBError");
                    //  throw;
                }

            }
        }
Example #7
0
        private void GetCatInfo(string directoryHtml)
        {
            string catArea = RegGroupsX<string>(directoryHtml,
                "<div class=\"all-category-box\">(?<x>.*?)<div class=\"category-contact\">");
            if (catArea == null)
                return;

            var list = RegGroupCollection(catArea, "href=\"(?<x>.*?)\".*?>(?<y>.*?)</a>");

            foreach (Match item in list)
            {
                string tempUrl = item.Groups["x"].Value;
                string tempName = item.Groups["y"].Value;
                if (tempName == "商品分类")
                    continue;
                string tempid = RegGroupsX<string>(tempUrl, "http://www.ehaier.com/l/(?<x>\\d+).html|http://www.ehaier.com/l/(?<x>\\d+-\\d+).html|http://www.ehaier.com/l/(?<x>\\d+-\\d+-\\d+).html");
                if (!ValidCatId(tempid))
                    continue;
                if (!HasBindClasslist.Exists(c => c.ClassId == tempid))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = tempName,
                        ClassId = tempid,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild =tempid.Contains("-"),
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = tempUrl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }
            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
        private void GetCatInfo(string directoryHtml)
        {
            string catArea = RegGroupsX<string>(directoryHtml, "<div class=\"category-box\" id=\"category_box\">(?<x>.*?)</ul>");
            if (catArea == null)
                return;

            var list = RegGroupCollection(catArea, "<a(?<x>.*?)</a>");

            foreach (Match item in list)
            {
                string tempUrl = RegGroupsX<string>(item.ToString(), "href=\"(?<x>.*?)\"");
                if (string.IsNullOrEmpty(tempUrl))
                    continue;
                tempUrl = string.Format("http://www.hangowa.com{0}", tempUrl);
                string tempName = RegGroupsX<string>(item.ToString(), ">(?<x>.*?)</a>");
                string tempid = RegGroupsX<string>(tempUrl, "gallery-(?<x>\\d+?).html");
                if (!HasBindClasslist.Exists(c => c.ClassId == tempid))
                {

                    int page = RegGroupsX<int>(directoryHtml, "共<b class=\"op-search-result\">(?<x>\\d+?)</b>件");
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = tempName,
                        ClassId = tempid,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = !item.ToString().Contains("class=\"level3\""),
                        ClassCrumble = "",
                        TotalProduct = page,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = tempUrl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }
            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
        /// <summary>
        /// 批量添加分类
        /// </summary>
        /// <param name="catid"></param>
        private void bathAddCat(string catid)
        {
            SiteClassBll bll = new SiteClassBll();

            int siteClassId;
            int parentId;
            SiteClassInfo siteCat = new SiteClassInfo();
            if (int.TryParse(catid, out siteClassId))
            {
                siteCat = bll.GetCatById(siteClassId);
                SiteCatName = siteCat.ClassName.Replace(" ","");
            }
            if (!int.TryParse(Request.Form["parCatName"], out parentId))
                return;
            #region 添加分类
            ClassInfo cat = new ClassInfo();
            cat.CatName = SiteCatName;
            cat.SpellWord = WordCenter.GetShortPinyin(cat.CatName);
            cat.SEOWords = "";
            cat.Sort = 0;
            cat.CreateDate = DateTime.Now;
            cat.UpdateTime = DateTime.Now;
            cat.HasChild = false;
            ClassInfoBll catbll = new ClassInfoBll();
            if (parentId != 0)
            {
                ClassInfo parCat = catbll.getCat(parentId);
                cat.ParentId = parCat.Id;
                cat.Level = parCat.Level + 1;
                cat.ParentName = parCat.CatName;
                if (!string.IsNullOrEmpty(parCat.CatCrumbleIds))
                {
                    cat.CatCrumbleIds = parCat.CatCrumbleIds + "," + parCat.Id;
                    cat.CatCrumbleNames = parCat.CatCrumbleNames + "," + parCat.CatName;
                }
                else
                {
                    cat.CatCrumbleIds = parCat.Id.ToString(CultureInfo.InvariantCulture);
                    cat.CatCrumbleNames = parCat.CatName;
                }
                if (!parCat.HasChild)
                {
                    parCat.HasChild = true;
                    catbll.UpdateCat(parCat);
                }
            }
            else
            {
                cat.ParentId = 0;
                cat.Level = 1;
                cat.ParentName = "";
                cat.CatCrumbleIds = "";
                cat.CatCrumbleNames = "";
            }
            cat.Id = catbll.AddCat(cat);
            #endregion
            #region 添加子分类

            AllSiteCat = bll.GetClassInfo(siteCat.SiteId);

            addChildCat(siteCat.ClassId, cat);

            #endregion
        }
Example #10
0
        private void UpdateTmallNode(SiteClassInfo item)
        {
            if(item.ParentClass!="")
                return;

            string url = string.Format("http://list.tmall.com/search_product.htm?cat={0}" , item.ClassId);
            HtmlAnalysis reqest = new HtmlAnalysis();

            reqest.Headers.Add("Cookie", "_med=dw:1440&dh:900&pw:1440&ph:900&ist:0; pnm_cku822=126UW5TcyMNYQwiAiwQRHhBfEF8QXtHcklnMWc%3D%7CUm5Ockt%2BQXVPdUp%2BQH9Dfyk%3D%7CU2xMHDJxPk82UjVOI1h2VnhCbExiPl85VTJMNhhOGA%3D%3D%7CVGhXd1llXGlWYlhiXWlXaFRoX2JAekN3TndMdUB1S3RAeUx0TmA2%7CVWldfS0SMgwzCCgULg4gWz0ReEB2Aix6LA%3D%3D%7CVmhIGCcYJAQ%2FAyMXLRc3DTQNORklHCUYOAwxDCwQKRAtDTgDPmg%2B%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; cq=ccp%3D1; tt=login.taobao.com; res=scroll%3A990*776-client%3A977*290-offset%3A977*290-screen%3A1440*900; hng=; uss=BqRyb7nd5KLIbC5D91VCamaiwt66iy8KP0cAS24EJNQWFeWsxGZv%2FwEo%2BAs%3D; cna=cFJaEEwJdRsCATyy24A1yMNe; l=AkZGKGJIZ/WDVSsY65u6dVSj1jLItYph; isg=Alpa8TTm1nmgf1rVi7OVW5M1rADEst5lFaLZTWTTFO241_oRTBsudSCt8xs0; OZ_1U_2061=vid=v801c15a894bb1.0&ctime=1478143053&ltime=1476512356; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; _tb_token_=eeed7bb353eb5; ck1=; uc1=cookie14=UoW%2FX9QwsnjAzg%3D%3D&lng=zh_CN&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&existShop=false&cookie21=V32FPkk%2FhSg%2F&tag=0&cookie15=UIHiLt3xD8xYTw%3D%3D&pas=0; uc3=sg2=AQI4ctClVx2ycnFp5kyAa%2F3VFKDYjzhZBJFC8KK2LVw%3D&nk2=D9ZNP7htc6w%3D&id2=UU8Lx7%2BmPirPbw%3D%3D&vt3=F8dARHfHI%2BnGtn3VuNA%3D&lg2=UtASsssmOIJ0bQ%3D%3D; lgc=lunce188; tracknick=lunce188; cookie2=10682dca3e46d779e26f299924785699; cookie1=AV0h8l61cg4iTp3AqqPZRlYP3nQGpHHQCAg%2FB5Sm3VI%3D; unb=2731635449; t=65336f3349d3648c68445898ef92bec2; skt=2c4d55251dbb75a9; _nk_=lunce188; _l_g_=Ug%3D%3D; cookie17=UU8Lx7%2BmPirPbw%3D%3D; login=true");
            reqest.RequestUserAgent = "Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413";
            string catPage=  reqest.HttpRequest(url);

            Thread.Sleep(new Random().Next(6, 30)*1000);
            //string catPage = HtmlAnalysis.Gethtmlcode("http://list.tmall.com/search_product.htm?cat=" + item.ClassId);
            if (catPage.Contains("ResponseUri:http://www.tmall.com/") && item.UpdateTime.AddDays(15)<DateTime.Now)
            {
                new SiteClassBll().delClass(item);
                return;
            }
            var crumbsList = RegGroupCollection(catPage,
                "<li data-tag=\"cat\">(?<x>.*?)</li>");
            if (crumbsList == null)
                return;
            SiteClassInfo catinfo = new SiteClassInfo();

            string paraInfo = "";
            string paraUrl = "";
            string paraCatId = "";
            string paraName = "";
            for (int i = 0; i < crumbsList.Count; i++)
            {
                catinfo.ParentName = paraName;
                catinfo.ParentClass = paraCatId;
                catinfo.ParentUrl = paraCatId == "" ? "" : "http://list.tmall.com/search_product.htm?cat=" + paraCatId;

                //添加父类
                if (paraCatId != "" && paraCatId != "" &&
                    !HasBindClasslist.Exists(c => c.ClassId == paraCatId))
                {

                    string tempparaInfo = "";
                    string tempparaUrl = "";
                    string tempparaCatId = "";
                    string tempparaName = "";
                    if (i > 1)
                    {
                        Match pnode = crumbsList[i - 2];
                        tempparaInfo = pnode.Groups["x"].Value;
                    //  tempparaUrl = RegGroupsX<string>(tempparaInfo, "href=\"(?<x>.*?)\"");
                        tempparaCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)");
                        tempparaUrl = "http://list.tmall.com/search_product.htm?cat=" + tempparaCatId;
                        tempparaName = RegGroupsX<string>(tempparaInfo, "title=\"(?<x>.*?)\"");
                    }

                    SiteClassInfo catPareInfo = new SiteClassInfo
                    {
                        ClassName = paraName,
                        ClassId = paraCatId,
                        SiteId = Baseinfo.SiteId,
                        CreateDate = DateTime.Now,
                        UpdateTime = DateTime.Now,
                        IsHide = false,
                        ParentUrl = tempparaUrl,
                        ParentName = tempparaName,
                        ClassCrumble = tempparaCatId + ",",

                        ParentClass = tempparaCatId,
                        Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId,
                        IsDel=false,
                        HasChild = true,
                        IsBind = false
                    };
                    catPareInfo.ClassCrumble = catPareInfo.ClassCrumble.TrimEnd(',');
                    HasBindClasslist.Add(catPareInfo);
                    shopClasslist.Add(catPareInfo);
                    //父类的同级分类
                    GetAllBrotherCats(catPareInfo);
                    //子类
                    GetChildCats(catPareInfo, catPage);

                }

                Match node = crumbsList[i];
                paraInfo = node.Groups["x"].Value;
                paraUrl = RegGroupsX<string>(paraInfo, "href=\"(?<x>.*?)\"");
                paraCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)");
                paraName = RegGroupsX<string>(paraInfo, "title=\"(?<x>.*?)\"");

                catinfo.ClassName = paraName;
                catinfo.ClassId = paraCatId;
                catinfo.SiteId = Baseinfo.SiteId;
                catinfo.CreateDate = DateTime.Now;
                catinfo.UpdateTime = DateTime.Now;
                catinfo.Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId;
                catinfo.TotalProduct = RegGroupsX<int>(catPage, "共<span> (?<x>\\d+)</span>件相关商品");
                catinfo.IsHide = false;
                catinfo.IsBind = false;
                catinfo.IsDel = false;
                catinfo.ClassCrumble += paraCatId + ",";

                GetAllBrotherCats(catinfo);

            }
            if (string.IsNullOrEmpty(catinfo.ClassId))
                return;
            if(regIsMatch(catPage, "<div class=\"cateAttrs\" data-spm=\".*?\">(?<x>.*?)<div class=\"propAttrs\""))
            {
                GetChildCats(catinfo, catPage);
                catinfo.HasChild = true;
            }
            else
                catinfo.HasChild = false;
            catinfo.ClassCrumble = catinfo.ClassCrumble.TrimEnd(',');

            var oldCatInfo = HasBindClasslist.Find(c => c.ClassId == catinfo.ClassId);
            if (oldCatInfo==null)
            {
                catinfo.HasChild = true;
                HasBindClasslist.Add(catinfo);
                shopClasslist.Add(catinfo);
            }
            else
            {
                oldCatInfo.Urlinfo = catinfo.Urlinfo;
                oldCatInfo.ClassId = catinfo.ClassId;
                oldCatInfo.ClassName = catinfo.ClassName;
                oldCatInfo.TotalProduct = catinfo.TotalProduct;
                oldCatInfo.ParentUrl = catinfo.ParentUrl;
                oldCatInfo.ParentClass = catinfo.ParentClass;
                oldCatInfo.ParentUrl = catinfo.ParentUrl;
                oldCatInfo.UpdateTime = DateTime.Now;
                new SiteClassBll().UpdateSiteCat(oldCatInfo);
            }

            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
Example #11
0
        private void SaveCat(string catId)
        {
            if (!ValidCatId(catId)) return;
            Thread.Sleep(new Random().Next(10, 60) * 1000);
            string catPage = HtmlAnalysis.Gethtmlcode("http://list.tmall.com/search_product.htm?cat=" + catId);

            var crumbsList = RegGroupCollection(catPage,
                "<li data-tag=\"cat\">(?<x>.*?)</li>");
            if (crumbsList == null)
                return;
            SiteClassInfo catinfo = new SiteClassInfo();

            string paraInfo = "";
            string paraUrl = "";
            string paraCatId = "";
            string paraName = "";
            for (int i = 0; i < crumbsList.Count; i++)
            {
                catinfo.ParentName = paraName;
                catinfo.ParentClass = paraCatId;
                catinfo.ParentUrl  = paraCatId=="" ?"": "http://list.tmall.com/search_product.htm?cat=" + paraCatId;

                //添加父类
                if (ValidCatId(paraCatId) &&!HasBindClasslist.Exists(c => c.ClassId == paraCatId))
                {

                    string tempparaInfo = "";
                    string tempparaUrl = "";
                    string tempparaCatId = "";
                    string tempparaName = "";
                    if (i > 1)
                    {
                        Match pnode = crumbsList[i - 2];
                        tempparaInfo = pnode.Groups["x"].Value;
                        tempparaUrl = RegGroupsX<string>(tempparaInfo, "href=\"(?<x>.*?)\"");
                        tempparaCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)");
                        tempparaUrl = "http://list.tmall.com/search_product.htm?cat=" + tempparaCatId;
                        tempparaName = RegGroupsX<string>(tempparaInfo, "title=\"(?<x>.*?)\"");
                    }

                    SiteClassInfo catPareInfo = new SiteClassInfo
                    {
                        ClassName = paraName,
                        ClassId = paraCatId,
                        SiteId = Baseinfo.SiteId,
                        CreateDate = DateTime.Now,
                        UpdateTime = DateTime.Now,
                        IsHide = false,
                        ParentUrl = tempparaUrl,
                        ParentName = tempparaName,
                        ClassCrumble = tempparaCatId + ",",
                        IsDel=false,

                        ParentClass = tempparaCatId,
                        Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId,

                        HasChild = true,
                        IsBind = false
                    };
                    catPareInfo.ClassCrumble = catPareInfo.ClassCrumble.TrimEnd(',');
                    HasBindClasslist.Add(catPareInfo);
                    shopClasslist.Add(catPareInfo);
                    //父类的同级分类
                    GetAllBrotherCats(catPareInfo);
                    //子类
                    GetChildCats(catPareInfo, "");

                }

                Match node = crumbsList[i];
                paraInfo = node.Groups["x"].Value;
                paraUrl = RegGroupsX<string>(paraInfo, "href=\"(?<x>.*?)\"");
                paraCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)");
                paraName = RegGroupsX<string>(paraInfo, "title=\"(?<x>.*?)\"");

                catinfo.ClassName = paraName;
                catinfo.ClassId = paraCatId;
                catinfo.SiteId = Baseinfo.SiteId;
                catinfo.CreateDate = DateTime.Now;
                catinfo.UpdateTime = DateTime.Now;
                catinfo.Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId;
                catinfo.TotalProduct = RegGroupsX<int>(catPage, "共<span> (?<x>\\d+)</span>件相关商品");
                catinfo.IsHide = false;
                catinfo.IsBind = false;
                catinfo.ClassCrumble += paraCatId + ",";
                catinfo.IsDel = false;
                GetAllBrotherCats(catinfo);

            }
            GetChildCats(catinfo, catPage);
            catinfo.ClassCrumble = catinfo.ClassCrumble.TrimEnd(',');
            if (catinfo.ClassId != "" && !HasBindClasslist.Exists(c => c.ClassId == catinfo.ClassId))
            {
                catinfo.HasChild = true;
                HasBindClasslist.Add(catinfo);
                shopClasslist.Add(catinfo);
            }
            else if (catinfo.ClassName != "" &&
                     !HasBindClasslist.Exists(c => c.ClassName == catinfo.ClassName))
            {
                catinfo.HasChild = true;
                HasBindClasslist.Add(catinfo);
                shopClasslist.Add(catinfo);
            }

            if (shopClasslist.Count > 100)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
Example #12
0
        private void SaveBand(SiteClassInfo siteClassInfo)
        {
            string brandUrl = string.Format("http://list.tmall.com/ajax/allBrandShowForGaiBan.htm?cat={0}", siteClassInfo.ClassId);

            string page = HtmlAnalysis.Gethtmlcode(brandUrl);
            if (string.IsNullOrEmpty(page))
                return;
            page = page.Replace("\r", "").Replace("\n", "").Trim();
            if (page == "")
                return;

            var list = RegGroupCollection(page, "\\{(?<x>.*?)\\}");
            if (list == null)
                return;
            List<SiteClassBand> catBands = new List<SiteClassBand>();
            List<SiteBandInfo> siteBand = new List<SiteBandInfo>();

            for(int i=0;i<list.Count;i++)
            {
                string templist = list[i].ToString();
                string url = RegGroupsX<string>(templist, "\"href\":\"(?<x>.*?)\"");
                if (string.IsNullOrEmpty(url))
                    continue;
                url = url.Replace("&amp;", "&");
                if (!url.Contains("http://"))
                {
                    url = "http://list.tmall.com/search_product.htm" + url;
                }
                string bandId = RegGroupsX<string>(templist, "brand=(?<x>\\d+)");
                string disName = RegGroupsX<string>(templist, "\"title\":\"(?<x>.*?)\"");
                string img = RegGroupsX<string>(templist, "\"img\":\"(?<x>.*?)\"");
                string cnName = "";
                string enName = "";
                string key = Baseinfo.SiteId + "_" + siteClassInfo.ClassId + "_" + bandId;
                string key1 = Baseinfo.SiteId + "_" + bandId;
                string[] names = disName.Split('/');
                foreach (string obj in names)
                {
                    if (regIsMatch(obj, @"[\u4e00-\u9fa5]"))
                        cnName = obj;
                    else
                        enName = obj;
                }

                SiteClassBand tempBand = new SiteClassBand
                {
                    ImgUrl=img,
                    UniqueKey = key,
                    DisplayName = disName,
                    CnName=cnName,
                    EnName = enName,
                    CommentCount=0,
                    ProductCount=0,
                    Urlinfo=url,
                    SiteBandId = bandId,
                    SiteClassId= siteClassInfo.ClassId,
                    SiteId=Baseinfo.SiteId,
                    IsHid=false,
                    UpdateDate=DateTime.Now,
                    CreateDate =DateTime.Now
                };
                if (!HasBindBandlist.Exists(p => p.UniqueKey == key))
                {
                    HasBindBandlist.Add(tempBand);
                    catBands.Add(tempBand);
                }
                if (!HasSiteBandlist.Exists(p => p.UniqueKey == key1))
                {
                    SiteBandInfo tempsband = new SiteBandInfo
                    {
                        CatArea="",
                        EnName=tempBand.EnName,
                        ImgUrl=tempBand.ImgUrl,
                        Introduction="",
                        IsHid=false,
                        Remark="",
                        TotalComments=0,
                        TotalProduts=0,
                        UniqueKey = key1,
                        SiteId=tempBand.SiteId,
                        SiteBandId=tempBand.SiteBandId,
                        DisplayName=tempBand.DisplayName,
                        CnName=tempBand.CnName,
                        CreateDate=DateTime.Now,
                        UpdateDate=DateTime.Now
                    };

                    HasSiteBandlist.Add(tempsband);
                    siteBand.Add(tempsband);
                }

            }
            try
            {
                new SiteClassBandDb().Save(catBands);
                new SiteBandDb().Save(siteBand);
            }
            catch (Exception ex)
            {
                LogServer.WriteLog(ex);
            }
        }
Example #13
0
        /// <summary>
        /// 获取子分类
        /// </summary>
        /// <param name="catinfo"></param>
        private void GetChildCats(SiteClassInfo catinfo,string pageinfo)
        {
            if (pageinfo == "")
            {
                Thread.Sleep(new Random().Next(10, 30)*1000);
                pageinfo = HtmlAnalysis.Gethtmlcode(catinfo.Urlinfo);
            }
            string catInfo = RegGroupsX<string>(pageinfo, "<div class=\"cateAttrs\" data-spm=\".*?\">(?<x>.*?)<div class=\"propAttrs\"");

            if (catInfo == null)
                return;
            var catList = RegGroupCollection(catInfo, "<a title=\"(?<x>.*?)\">\r\n <b>(?<y>.*?)</b><span>\\((?<z>\\d+)\\)</span>\r\n </a>");
               if (catList == null)
                return;
               foreach (Match item in catList)
               {
                   string catUrl = item.Groups["x"].Value;
                   string catId = RegGroupsX<string>(catUrl, "cat=(?<x>\\d+)");
                   if (string.IsNullOrEmpty(catId))
                       continue;
                   int total = 0;
                   int.TryParse(item.Groups["z"].Value, out total);
                   if (!HasBindClasslist.Exists(c => c.ClassId == catId))
                   {
                       SiteClassInfo cat = new SiteClassInfo {
                           ClassId =catId,
                           ClassCrumble = catinfo.ClassCrumble+","+catinfo.ClassId,
                           ParentClass = catinfo.ClassId,
                           ParentName = catinfo.ClassName,
                           ClassName=item.Groups["y"].Value,
                           IsHide=false,
                           ParentUrl=catinfo.Urlinfo,
                           UpdateTime=DateTime.Now,
                           IsBind=false,
                           IsDel=false,
                           SiteId=Baseinfo.SiteId,
                           Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + catId,
                           TotalProduct=total,
                           CreateDate=DateTime.Now

                       };

                       HasBindClasslist.Add(cat);
                       shopClasslist.Add(cat);

                   }
               }
        }
Example #14
0
        /// <summary>
        /// 获取同父级的分类
        /// </summary>
        /// <param name="catinfo"></param>
        private void GetAllBrotherCats(SiteClassInfo catinfo)
        {
            // Thread.Sleep(new Random().Next(5, 30) * 1000);
            string catPage = HtmlAnalysis.Gethtmlcode("http://list.tmall.com/ajax/getAllBrotherCats.htm?cat=" + catinfo.ClassId);
            var catList = RegGroupCollection(catPage, "\"href\":\"(?<x>.*?)\",\r\n\"title\":\"(?<y>.*?)\",\r\n\"atp\"");
               if (catList == null)
                return;
               foreach (Match item in catList)
               {
                   string catUrl = item.Groups["x"].Value;
                   string catId = RegGroupsX<string>(catUrl, "cat=(?<x>\\d+)");
                   if (string.IsNullOrEmpty(catId))
                       continue;

                   if (!HasBindClasslist.Exists(c => c.ClassId == catId))
                   {
                       SiteClassInfo cat = new SiteClassInfo {
                           ClassId =catId,
                           ClassCrumble = catinfo.ClassCrumble,
                           ParentClass = catinfo.ParentClass,
                           ParentName = catinfo.ParentName,
                           ClassName=item.Groups["y"].Value,
                           IsHide=false,
                           ParentUrl=catinfo.ParentUrl,
                           UpdateTime=DateTime.Now,
                           IsBind=false,
                           IsDel=false,
                           SiteId=Baseinfo.SiteId,
                           Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + catId,
                           CreateDate=DateTime.Now

                       };

                       HasBindClasslist.Add(cat);
                       shopClasslist.Add(cat);

                   }
               }
        }
Example #15
0
        private void UpdateNode(SiteClassInfo siteClassInfo)
        {
            //if (!siteClassInfo.Urlinfo.Contains("b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k"))
            //    siteClassInfo.Urlinfo += "b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k/";

            siteClassInfo.Urlinfo = $"http://list.yhd.com/{siteClassInfo.ClassId}/b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k";
            string pageInfo = Gethtmlcode(siteClassInfo.Urlinfo);

            string categoryname = RegGroupsX<string>(pageInfo, "var categoryName = '(?<x>.*?)'");
            string extid = RegGroupsX<string>(pageInfo, "var expectCategoryId = \"(?<x>\\d+)\"");

            string tempcurcatid = "c" + extid + "-" + categoryname;
            if (!regIsMatch(tempcurcatid, "^(?<x>c\\d+-\\d+(-\\d+)?)$"))
            {
                var aa = "ddd";
                return;
            }
            if (categoryname == "0-0")
                tempcurcatid = "c" + extid;

            if (extid=="0")
                return;

            //string classInfo = RegGroupsX<string>(pageInfo, "<div id=\"searchColSub\"(?<x>.*?)<div id=\"bodyRight\"|<div class=\"crumbClip\">(?<x>.*?)<li class=\"crumb_search search_empty\">");
            //string classInfo = RegGroupsX<string>(pageInfo, "<div class=\"classWrap\">(?<x>.*?)<div class=\"brandWrap\">");
            string classInfo = RegGroupsX<string>(pageInfo, "<ul class=\"listCon clearfix\">(?<x>.*?)<a class=\"c_btn c_next iconSearch\"");
            string classinfo2 = RegGroupsX<string>(pageInfo, "<ul class=\"guide_con clearfix\">(?<x>.*?)</ul>");
            if (!string.IsNullOrEmpty(classinfo2))
            {
                classInfo += classinfo2;
            }
            string tempcatid = RegGroupsX<string>(siteClassInfo.Urlinfo, "c\\d+-0-(?<x>\\d+)/");
            if (!string.IsNullOrEmpty(tempcatid))
            {
                string classurl =
                    string.Format(
                        "http://list.yhd.com/lazyLoadBrotherCategory/nc{0}-a-f0d-mid0-k/?urlFilterSuffix=/b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k/",
                        tempcatid);

                string catpage = Gethtmlcode(classurl);
                if (!string.IsNullOrEmpty(catpage))
                    classInfo += catpage.Replace("\\\"","\"");
            }

            var catList = RegGroupCollection(classInfo, "href=\"(?<x>.*?)\".*?title=\"(?<y>.*?)\"");
            if (catList == null)
            {
                if (pageInfo.Contains("没有找到符合条件的商品,建议您更改下搜索条件")||pageInfo.Contains("很抱歉!没有找到与<span class=\"color_red\">\"\"</span>相关的商品,要不你换个关键词我帮你再找找吧"))
                    new SiteClassBll().delClass(siteClassInfo);
                return;
            }
            for (int i = 0; i < catList.Count; i++)
            {
                string url = catList[i].Groups["x"].Value;
                string catname = catList[i].Groups["y"].Value;
                if (string.IsNullOrEmpty(url))
                    continue;
                string catId = RegGroupsX<string>(url, "http://list.yhd.com/(?<x>.*?)/");
                if (!HasBindClasslist.Exists(p => p.ClassId == catId) && !HasBindClasslist.Exists(p => p.ClassName == catname))
                {
                    GetYhdClassInfo(url);
                }
            }

            //string crumb = RegGroupsX<string>(pageInfo,
            //    "<div class=\"mod_search_crumb clearfix\"(?<x>.*?)<div id=\"searchColSub\"");
            //string crumb = RegGroupsX<string>(pageInfo,"<div class=\"crumbClip\">(?<x>.*?)<li class=\"crumb_search search_empty\">|<div class=\"mod_search_crumb clearfix\"(?<x>.*?)<div id=\"searchColSub\"");
            //string crumb = RegGroupsX<string>(pageInfo, "<div class=\"crumbClip\">(?<x>.*?)<li class=\"crumb_search search_empty\">|<div class=\"mod_search_crumb clearfix\"(?<x>.*?)<div id=\"searchColSub\"");

            //if (crumb == null)
            //    return;
            //var list = RegGroupCollection(crumb, "<div class=\"crumb_list\">(?<x>.*?)</div>");
            var list = RegGroupCollection(classInfo, "<li class=\"crumb_list\">(?<x>.*?)</li>");
            if (list == null)
                return;
            string current = RegGroupsX<string>(pageInfo, "<title>(?<x>.*?)品种齐全|<div class=\"guide_title\"><span title=\"(?<x>.*?)\">");
            string pcatUrl = "";
            string pcatName = "";
            string pcatId = "";
            string classCrumble = "";
            int total = RegGroupsX<int>(pageInfo, "共(?<x>\\d+?)条");

            SiteClassInfo catInfo = new SiteClassInfo();

            for (int i = 0; i < list.Count; i++)
            {
                string div = list[i].Groups["x"].Value;
                catInfo.Urlinfo = RegGroupsX<string>(div, "href=\"(?<x>.*?)\"");
                catInfo.ClassId = RegGroupsX<string>(catInfo.Urlinfo, "http://list.yhd.com/(?<x>.*?)/");
                if(string.IsNullOrEmpty(catInfo.Urlinfo))
                    continue;

                if (!catInfo.Urlinfo.Contains("b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k/"))
                    catInfo.Urlinfo += "b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k/";

                catInfo.ClassName = RegGroupsX<string>(div, "\" >(?<x>.*?)</a>|title=\"(?<x>.*?)\"");
                if (catInfo.ClassName== "全部结果")
                    continue;

                if (catInfo.Urlinfo == null || catInfo.ClassId == null || catInfo.ClassName == null)
                {
                    LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1", "AddClassError");
                    continue;
                }
                if (!HasBindClasslist.Exists(p => p.ClassId == catInfo.ClassId) && !HasBindClasslist.Exists(p => p.ClassName == catInfo.ClassName))
                {
                    GetYhdClassInfo(catInfo.Urlinfo);
                }

                catInfo.ParentClass = pcatId;
                catInfo.ParentName = pcatName;
                catInfo.ParentUrl = pcatUrl;
                if (i != 0 && pcatId != "")
                    classCrumble += pcatId + ",";
                catInfo.ClassCrumble = classCrumble.TrimEnd(',');

                if (list.Count - 1 == i)
                {
                    catInfo.HasChild = false;
                    catInfo.TotalProduct = total;
                }

                if (catInfo.ClassName == current)
                {
                    break;
                }
                pcatUrl = catInfo.Urlinfo;
                pcatName = catInfo.ClassName;
                pcatId = catInfo.ClassId;
            }
            if (!string.IsNullOrEmpty(pcatId))
            {
                if (HasBindClasslist.Any(p => p.ClassId == pcatId))
                {
                    GetYhdClassInfo(pcatUrl);
                }
            }

            string childCat = RegGroupsX<string>(pageInfo,"<div class=\"classWrap\">(?<x>.*?)</ul>");
            if (childCat != null)
            {
                var childList = RegGroupCollection(childCat, "href=\"(?<x>.*?)\".*?<span title=\"(?<y>.*?)\">");
                foreach (Match item in childList)
                {
                    string tempUrl = item.Groups["x"].Value;
                    string tempName = item.Groups["y"].Value;
                    string tempid = RegGroupsX<string>(tempUrl, "http://list.yhd.com/(?<x>.*?)/");
                    if (!HasBindClasslist.Exists(c => c.ClassId == tempid) && !HasBindClasslist.Exists(p => p.ClassName == tempName))
                    {
                        GetYhdClassInfo(tempUrl);
                    }
                }
                siteClassInfo.HasChild = true;
            }
            else
            { siteClassInfo.HasChild = false; }

            if (siteClassInfo.ClassId != catInfo.ClassId)
            {

                LogServer.WriteLog("分类id 更改 old id:" + siteClassInfo.Id + siteClassInfo.Id + "oldclass:" + siteClassInfo.ClassId + "newclass:" + catInfo.ClassId);
                if (HasBindClasslist.Exists(c => c.ClassId == catInfo.ClassId))
                {
                    new SiteClassBll().delClass(siteClassInfo);
                    return;
                }

                siteClassInfo.Urlinfo = catInfo.Urlinfo;
                siteClassInfo.ClassId = catInfo.ClassId;

            }

            if (tempcurcatid != siteClassInfo.ClassId && regIsMatch(tempcurcatid, "^(?<x>c\\d+-\\d+(-\\d+)?)$"))
            {
                siteClassInfo.ClassId = tempcurcatid;
                siteClassInfo.Urlinfo =$"http://list.yhd.com/{tempcurcatid}/b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k";
            }

            siteClassInfo.ClassName = catInfo.ClassName;
            siteClassInfo.TotalProduct = catInfo.TotalProduct;
            siteClassInfo.ParentUrl = catInfo.ParentUrl;
            siteClassInfo.ParentClass = catInfo.ParentClass;
            if (regIsMatch(siteClassInfo.ParentClass, "c\\d+-0"))
            {
                string parentpage = Gethtmlcode(catInfo.ParentUrl);
                string pcategoryname = RegGroupsX<string>(parentpage, "var categoryName = '(?<x>.*?)'");
                string pextid = RegGroupsX<string>(parentpage, "var expectCategoryId = \"(?<x>\\d+)\"");
                string ptempcurcatid = "c" + pextid + "-" + pcategoryname;
                if (regIsMatch(ptempcurcatid, "^(?<x>c\\d+-\\d+(-\\d+)?)$"))
                {

                    siteClassInfo.ParentClass = ptempcurcatid;
                    siteClassInfo.ParentUrl =
                        $"http://list.yhd.com/{ptempcurcatid}/b/a-s1-v4-p1-price-d0-f0d-m1-rt0-pid-mid0-k";
                }
            }
            siteClassInfo.HasChild = HasBindClasslist.Exists(p => p.ParentClass == siteClassInfo.ClassId);
            siteClassInfo.ParentName = catInfo.ParentName;

            siteClassInfo.UpdateTime = DateTime.Now;
            new SiteClassBll().UpdateSiteCat(siteClassInfo);
        }
Example #16
0
        public void SaveAllSiteClass()
        {
            HasBindClasslist = new SiteClassInfoDB().getAllSiteCatInfo(Baseinfo.SiteId);

            string page = HtmlAnalysis.Gethtmlcode(domain);

            string content = RegGroupsX<string>(page, "<h2>所有商品分类</h2><div class=\"submenu\">(?<x>.*?)<div class=\"nav\">");

            var catList = RegGroupCollection(content, "<a href=\"(?<x>.*?)\">(?<y>.*?)</a>");
            for (int i = 0; i < catList.Count; i++)
            {
                string tempurl = catList[i].Groups["x"].Value;
                if (string.IsNullOrEmpty(tempurl))
                    continue;
                tempurl = domain + tempurl;
                string tempName = catList[i].Groups["y"].Value;
                string catid = RegGroupsX<string>(tempurl, "/list-(?<x>\\d+)");
                if (!ValidCatId(catid))
                    continue;
                if (!HasBindClasslist.Exists(p => p.ClassId == catid))
                {
                    SiteClassInfo cat = new SiteClassInfo
                    {
                        ParentUrl = "",
                        ParentClass = "",
                        ParentName = "",
                        TotalProduct = 0,
                        Urlinfo = tempurl,
                        ClassId = catid,
                        UpdateTime = DateTime.Now,
                        IsDel = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        IsBind = false,
                        IsHide = false,
                        ClassName = tempName,
                        SiteId = Baseinfo.SiteId,
                        ClassCrumble = "",

                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(cat);
                    shopClasslist.Add(cat);
                }
            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
Example #17
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string page = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            string cromb = RegGroupsX<string>(page, "<span class=\"pos-front\">(?<x>.*?)</div>");
            if (cromb == null)
                return;
            var plist = RegGroupCollection(cromb, "<a href=\"(?<x>.*?)\" alt=\"\" title=\"\">(?<y>.*?)</a></span>");
            if (plist == null)
                return;
            string parentUrl = "";
            string parentName="";
            string parentId = "";
            foreach (Match item in plist)
            {
                if (item.ToString().Contains("首页"))
                    continue;
                parentUrl = item.Groups["x"].Value;
                if (string.IsNullOrEmpty(parentUrl))
                    continue;
                parentUrl = string.Format("http://www.hangowa.com{0}", parentUrl);
                parentName = item.Groups["y"].Value;
                parentId = RegGroupsX<string>(parentUrl, "gallery-(?<x>\\d+?).html");
                if (!HasBindClasslist.Exists(c => c.ClassId == parentId))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = parentName,
                        ClassId = parentId,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = parentUrl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }

            }

            string chlidCat = RegGroupsX<string>(page, "<dt class=\"filter-entries-label\">分类:</dt>\n            <dd class=\"filter-entries-values\">(?<x>.*?)</dd>");
            if (chlidCat != null)
            {
                var blist = RegGroupCollection(chlidCat, "<a href=\"(?<x>.*?)\" class=\"handle action-cat-filter\">(?<y>.*?)</a>");
                if (blist != null)
                {
                    foreach (Match item in blist)
                    {
                        string burl = item.Groups["x"].Value;
                        if (string.IsNullOrEmpty(burl))
                            continue;
                        burl = "http://www.hangowa.com" + burl.TrimEnd('?');
                        string bName = item.Groups["y"].Value;
                        string bId = RegGroupsX<string>(burl, "gallery-(?<x>\\d+?).html");
                        if (!HasBindClasslist.Exists(c => c.ClassId == bId))
                        {
                            SiteClassInfo iteminfo = new SiteClassInfo
                            {
                                ParentClass =siteClassInfo.ClassId,
                                ParentName =siteClassInfo.ClassName,
                                ClassName = bName,
                                ClassId = bId,
                                ParentUrl = siteClassInfo.Urlinfo,
                                IsDel = false,
                                IsBind = false,
                                IsHide = false,
                                BindClassId = 0,
                                BindClassName = "",
                                HasChild = true,
                                ClassCrumble = "",
                                TotalProduct = 0,
                                SiteId = Baseinfo.SiteId,
                                Urlinfo = burl,
                                UpdateTime = DateTime.Now,
                                CreateDate = DateTime.Now
                            };
                            HasBindClasslist.Add(iteminfo);
                            shopClasslist.Add(iteminfo);
                        }

                    }
                }

            }
               if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }

            //if (chlidCat != null && chlidCat.Contains(siteClassInfo.ClassId))
            //    siteClassInfo.HasChild = false;
            //else
            //    siteClassInfo.HasChild = true;

            siteClassInfo.ParentClass = parentId;
            siteClassInfo.ParentName = parentName;
            siteClassInfo.ParentUrl =parentUrl;
            siteClassInfo.UpdateTime = DateTime.Now;
            siteClassInfo.TotalProduct = RegGroupsX<int>(page, "共<b class=\"op-search-result\">(?<x>\\d+)</b>件商品");
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
Example #18
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string pageinfo = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            string crumb = RegGroupsX<string>(pageinfo, "<div class=\"curr_position\">(?<x>.*?)</div>");

            if (crumb == null)
            {
                siteClassInfo.IsDel = true;
                new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);

                LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误\turl:" + siteClassInfo.Urlinfo, "AddClassError");
                return;
            }
            var list = RegGroupCollection(crumb, "<a href=\"(?<x>.*?)\">(?<y>.*?)</a>");
            if (list == null)
                return;
            string parentid = "";
            string parentName = "";
            string parentUrl = "";

            for (int i = 0; i < list.Count; i++)
            {
                string tempName = list[i].Groups["y"].Value;
                if (tempName.Contains("首页"))
                    continue;
                string tempUrl = list[i].Groups["x"].Value;
                if (!tempUrl.Contains(domain))
                    tempUrl = domain + tempUrl;
                string tempid = RegGroupsX<string>(tempUrl, "(?<x>\\d+)");
                if (string.IsNullOrEmpty(tempid))
                {
                    continue;
                }

                if (tempid == siteClassInfo.ClassId)
                    break;
                parentid = tempid;
                parentName = tempName;
                parentUrl = string.Format("http://www.jxdyf.com/category/{0}.html", tempid);

            }
            if (ValidCatId(parentid))
            {
                siteClassInfo.ParentName = parentName;
                siteClassInfo.ParentUrl = parentUrl;
                siteClassInfo.ParentClass = parentid;
            }
            siteClassInfo.TotalProduct = RegGroupsX<int>(pageinfo, "共有(?<x>\\d+)个商品");
            siteClassInfo.UpdateTime = DateTime.Now;

            siteClassInfo.HasChild = HasBindClasslist.Exists(c => c.ParentClass == siteClassInfo.ClassId);
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
            string areaCat = RegGroupsX<string>(pageinfo, "<div class=\"menu outline_01\">(?<x>.*?)<div class=\"web_surfer outline_01\">");

            if (areaCat == null)
                return;

            var catList = RegGroupCollection(areaCat, "<a href=\"/category/(?<x>\\d+).html\" >(?<y>.*?)");
            for (int i = 0; i < catList.Count; i++)
            {
                string tempid = catList[i].Groups["x"].Value;
                if (!ValidCatId(tempid) || HasBindClasslist.Exists(c => c.ClassId == tempid))
                    continue;
                string tempName = catList[i].Groups["y"].Value;
                SiteClassInfo cat = new SiteClassInfo
                {
                    ParentUrl = "",
                    ParentClass = "",
                    ParentName = "",
                    TotalProduct = 0,
                    Urlinfo = string.Format("http://www.jxdyf.com/category/{0}.html", tempid),
                    ClassId = tempid,
                    UpdateTime = DateTime.Now,
                    IsDel = false,
                    BindClassId = 0,
                    BindClassName = "",
                    HasChild = true,
                    IsBind = false,
                    IsHide = false,
                    ClassName = tempName,
                    SiteId = Baseinfo.SiteId,
                    ClassCrumble = "",

                    CreateDate = DateTime.Now
                };
                HasBindClasslist.Add(cat);
                shopClasslist.Add(cat);

            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
Example #19
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string page = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            string cromb = RegGroupsX<string>(page, "您现在的位置: <a href=\"http://www.ehaier.com\">海尔商城</a>(?<x>.*?)</div>");
            if (cromb == null)
                return;
            var plist = RegGroupCollection(cromb, "<a href=\"(?<x>.*?)\">(?<y>.*?)</a>");
            if (plist == null)
                return;
            string parentUrl = "";
            string parentName="";
            string parentId = "";
            foreach (Match item in plist)
            {
                parentUrl = item.Groups["x"].Value;
                parentName = item.Groups["y"].Value;
                parentId = RegGroupsX<string>(parentUrl, "http://www.ehaier.com/l/(?<x>\\d+).html|http://www.ehaier.com/l/(?<x>\\d+-\\d+).html|http://www.ehaier.com/l/(?<x>\\d+-\\d+-\\d+).html");
                if (!ValidCatId(parentId))
                    continue;
                if (!HasBindClasslist.Exists(c => c.ClassId == parentId))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = parentName,
                        ClassId = parentId,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = parentUrl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }

            }

            string brotherCat = RegGroupsX<string>(page, "<div class=\"list-subnav\">(?<x>.*?)<dl class=\"dl-subnav dl-subnav-now\">");
            if (brotherCat != null)
            {
                var blist = RegGroupCollection(brotherCat, "href=\"(?<x>.*?)\">(?<y>.*?)</a>");
                if (blist != null)
                {
                    foreach (Match item in blist)
                    {
                        string burl = item.Groups["x"].Value;
                        string bName = item.Groups["y"].Value;
                        string bId = RegGroupsX<string>(burl, "http://www.ehaier.com/l/(?<x>\\d+).html|http://www.ehaier.com/l/(?<x>\\d+-\\d+).html|http://www.ehaier.com/l/(?<x>\\d+-\\d+-\\d+).html");
                        if (!HasBindClasslist.Exists(c => c.ClassId == bId))
                        {
                            SiteClassInfo iteminfo = new SiteClassInfo
                            {
                                ParentClass = "",
                                ParentName = "",
                                ClassName = bName,
                                ClassId = bId,
                                ParentUrl = "",
                                IsDel = false,
                                IsBind = false,
                                IsHide = false,
                                BindClassId = 0,
                                BindClassName = "",
                                HasChild = true,
                                ClassCrumble = "",
                                TotalProduct = 0,
                                SiteId = Baseinfo.SiteId,
                                Urlinfo = burl,
                                UpdateTime = DateTime.Now,
                                CreateDate = DateTime.Now
                            };
                            HasBindClasslist.Add(iteminfo);
                            shopClasslist.Add(iteminfo);
                        }

                    }
                }

            }
               if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
            if(HasBindClasslist.Exists(c=>c.ParentClass==siteClassInfo.ClassId))
                siteClassInfo.HasChild = true;
            else
                siteClassInfo.HasChild = false;

            siteClassInfo.ParentClass = parentId;
            siteClassInfo.ParentName = parentName;
            siteClassInfo.ParentUrl = parentUrl;
            siteClassInfo.UpdateTime = DateTime.Now;
            siteClassInfo.TotalProduct = RegGroupsX<int>(page, "共<strong class=\"haierred\">(?<x>\\d+)</strong> 件");
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
Example #20
0
        public void SaveAllSiteClass()
        {
            string url = "http://www.jxdyf.com/category";
            List<SiteClassInfo> shopClasslist = new List<SiteClassInfo>();
            HasBindClasslist = new SiteClassInfoDB().getAllSiteCatInfo(Baseinfo.SiteId);
            string popHtml = HtmlAnalysis.Gethtmlcode(url);
            string content = RegGroupsX<string>(popHtml, "<div class=\"fl\">(?<x>.*?)<div id=\"footer\"");
            var catlist = RegGroupCollection(content, "<a href='(?<y>.*?)'( class='.*?')?>(?<x>.*?)</a>");
            for (int i = 0; i < catlist.Count; i++)
            {
                string tempurl = catlist[i].Groups["y"].Value;
                string catid = RegGroupsX<string>(tempurl, "(?<x>\\d+)");
                if (!ValidCatId(catid) || HasBindClasslist.Exists(c => c.ClassId == catid))
                {
                    continue;
                }

                string catName = catlist[i].Groups["x"].Value;
                SiteClassInfo cat = new SiteClassInfo
                {
                    ParentUrl = "",
                    ParentClass = "",
                    ParentName = "",
                    TotalProduct = 0,
                    Urlinfo =domain+ tempurl,
                    ClassId = catid,
                    UpdateTime = DateTime.Now,
                    IsDel = false,
                    BindClassId = 0,
                    BindClassName = "",
                    HasChild = true,
                    IsBind = false,
                    IsHide = false,
                    ClassName = catName,
                    SiteId = Baseinfo.SiteId,
                    ClassCrumble = "",

                    CreateDate = DateTime.Now
                };
                HasBindClasslist.Add(cat);
                shopClasslist.Add(cat);

            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }

            ;
        }
Example #21
0
 public bool BingCatInfo(SiteClassInfo cat)
 {
     using (var db = _dbFactory.OpenDbConnection())
     {
         int res = db.UpdateOnly(cat, u => new {u.IsBind, u.BindClassId,u.BindClassName}, u => u.Id == cat.Id);
         return res > 0;
     }
 }
Example #22
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string page = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            string cromb = RegGroupsX<string>(page, "<div class=\"breadcrumb-area fcn\">(?<x>.*?)</div>");
            if (cromb == null)
                return;
            var plist = RegGroupCollection(cromb, "<a href=\"(?<x>.*?)\" title=\"(?<y>.*?)\">");
            if (plist == null)
                return;
            string parentUrl = "";
            string parentName="";
            string parentId = "";
            foreach (Match item in plist)
            {
                if (item.ToString().Contains("首页"))
                    continue;
                parentUrl = item.Groups["x"].Value;
                parentName = item.Groups["y"].Value;
                parentId = RegGroupsX<string>(parentUrl, "list-(?<x>\\d+)");
                if (!ValidCatId(parentId))
                {
                    parentId = "";
                    continue;
                }

                if (!HasBindClasslist.Exists(c => c.ClassId == parentId))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = parentName,
                        ClassId = parentId,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = string.Format("http://www.vmall.com/{0}", parentUrl),
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }

            }

            string brotherCat = RegGroupsX<string>(page, "<div class=\"p-title\">分类:</div>(?<x>.*?)<div class=\"pro-cate-sort clearfix\">");
            if (brotherCat != null)
            {
                var blist = RegGroupCollection(brotherCat, " <li ><a href=\"(?<x>.*?)\">(?<y>.*?)</a></li>");
                if (blist != null)
                {
                    foreach (Match item in blist)
                    {
                        string burl = item.Groups["x"].Value;
                        string bName = item.Groups["y"].Value;
                        string bId = RegGroupsX<string>(burl, "list-(?<x>\\d+)$");
                        if (!ValidCatId(bId))
                        {
                            continue;
                        }
                        if (!HasBindClasslist.Exists(c => c.ClassId == bId))
                        {
                            SiteClassInfo iteminfo = new SiteClassInfo
                            {
                                ParentClass = "",
                                ParentName = "",
                                ClassName = bName,
                                ClassId = bId,
                                ParentUrl = "",
                                IsDel = false,
                                IsBind = false,
                                IsHide = false,
                                BindClassId = 0,
                                BindClassName = "",
                                HasChild = true,
                                ClassCrumble = "",
                                TotalProduct = 0,
                                SiteId = Baseinfo.SiteId,
                                Urlinfo = string.Format("http://www.vmall.com/{0}", burl),
                                UpdateTime = DateTime.Now,
                                CreateDate = DateTime.Now
                            };
                            HasBindClasslist.Add(iteminfo);
                            shopClasslist.Add(iteminfo);
                        }

                    }
                }

            }
               if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }

            if (brotherCat != null)
                siteClassInfo.HasChild = false;
            else
                siteClassInfo.HasChild = true;

            siteClassInfo.ParentClass = parentId;
            siteClassInfo.ParentName = parentName;
            if (!string.IsNullOrEmpty(parentUrl))
                siteClassInfo.ParentUrl = string.Format("http://www.vmall.com/{0}", parentUrl);
            siteClassInfo.UpdateTime = DateTime.Now;
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
Example #23
0
        public void UpdateSiteClass(SiteClassInfo catinfo)
        {
            _dbFactory = new OrmLiteConnectionFactory(MmbpriceDbConnectionString, SqlServerDialect.Provider);
            if (catinfo == null) throw new ArgumentNullException("catinfo");
            using (var db = _dbFactory.OpenDbConnection())
            {
                try
                {
                    db.Update(catinfo);
                }
                catch (Exception ex)
                {
                    LogServer.WriteLog(ex, "DBError");
                  //  throw;
                }

            }
        }
Example #24
0
        private void GetCatInfo(string directoryHtml)
        {
            string catArea = RegGroupsX<string>(directoryHtml,
                "<ol class=\"category-list\">(?<x>.*?)<a href=\"http://app.vmall.com\" target=\"_blank\"><span>应用市场");
            if (catArea == null)
                return;

            catArea = catArea.Replace("\r", "").Replace("\n", "").Replace("\t", "");

            var list = RegGroupCollection(catArea, "<a href=\"(?<x>.*?)\" (target=\"_blank\")?><span>(?<y>.*?)</span>");

            foreach (Match item in list)
            {
                string tempUrl = item.Groups["x"].Value;
                string tempName = item.Groups["y"].Value;
                string tempid = RegGroupsX<string>(tempUrl, "list-(?<x>\\d+)$");
                if (!HasBindClasslist.Exists(c => c.ClassId == tempid))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = tempName,
                        ClassId = tempid,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = false,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = string.Format("http://www.vmall.com{0}", tempUrl),
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }
            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
Example #25
0
        public void AddSiteClass(SiteClassInfo siteclass)
        {
            if (siteclass == null) throw new ArgumentNullException("siteclass");
            using (var db = _dbFactory.OpenDbConnection())
            {
                try
                {
                    if (db.Exists<SiteClassInfo>(p => p.SiteId == siteclass.SiteId && p.ClassId == siteclass.ClassId))
                        return;
                    db.Insert(siteclass);
                }
                catch (Exception ex)
                {
                    LogServer.WriteLog(ex, "DBError");
                    throw;
                }

            }
        }
Example #26
0
        private void AddNode(string url)
        {
            string classid = RegGroupsX<string>(url, "^http://searchex.yixun.com/(?<x>.*?)\\-");

            if (string.IsNullOrEmpty(classid))
                return;

            if (classid.Contains("t"))
            {
                var tempids = classid.Split('t');
                string catid = tempids[tempids.Length - 1];
                if (HasBindClasslist.Exists(p => p.ClassId == catid))
                    return;
            }
            else
            {

                if (HasBindClasslist.Exists(p => p.ClassId == classid))
                    return;
            }
            string pageinfo = HtmlAnalysis.Gethtmlcode(url);
            if (!pageinfo.Contains("<div class=\"goods\""))
                return;

            string cromp = RegGroupsX<string>(pageinfo, "<div id=\"crumbBox\" class=\"crumb \">(?<x>.*?)<div class=\"crumb_search \">");
            if (string.IsNullOrEmpty(cromp)) return;

            var caplist = RegGroupCollection(cromp, "<a class=\"crumb_lk\" href=\"(?<x>.*?)\" rg=\"[0-9_]+\" ytag=\"\\d+\">(?<y>.*?)</a>");
            if (caplist == null || caplist.Count < 1)
                return;
            string proName = caplist[caplist.Count - 1].Groups["y"].Value;

            string parentUrl = "";
            string parentName = "";
            string parentid = "";
            string classCrumble = "";
            List<string> lessCat = new List<string>();
            for (int i = 0; i < caplist.Count - 1; i++)
            {
                if (i == 0)
                    continue;

                parentUrl = caplist[i].Groups["x"].Value;
                parentName = caplist[i].Groups["y"].Value;
                parentid = RegGroupsX<string>(parentUrl, "path=(?<x>[A-Za-z0-9]+)|^http://searchex.yixun.com/(?<x>.*?)\\-");

                if (!string.IsNullOrEmpty(parentid))
                {
                    if (parentid.Contains(','))
                        parentid = parentid.Substring(parentid.LastIndexOf(',') + 1);
                    classCrumble += parentid + ",";
                    if (!HasBindClasslist.Exists(p => p.ClassId == parentid))
                    {
                        lessCat.Add(string.Format("http://searchex.yixun.com/{0}-1-/", parentid));
                    }
                }

            }

            if (classid.Contains("t"))
            {
                var tempids = classid.Split('t');
                parentid = tempids[0];
                classid = tempids[1];
                if (classCrumble == "")
                    classCrumble = parentid;
            }
            classCrumble = classCrumble.TrimEnd(',');

            if (HasBindClasslist.Exists(p => p.ClassId == classid))
                return;
            int total = RegGroupsX<int>(pageinfo, "共<b>(?<x>\\d+)</b>件商品");
            SiteClassInfo cat = new SiteClassInfo
            {
                ParentUrl = parentUrl,
                ParentClass = parentid,
                ParentName = parentName,
                TotalProduct = total,
                Urlinfo = url,
                ClassId = classid,
                UpdateTime = DateTime.Now,
                IsDel = false,
                BindClassId = 0,
                BindClassName = "",
                HasChild = false,
                IsBind = false,
                IsHide = false,
                ClassName = proName,
                SiteId = Baseinfo.SiteId,
                ClassCrumble = classCrumble,

                CreateDate = DateTime.Now
            };
            HasBindClasslist.Add(cat);
            shopClasslist.Add(cat);

            string catList = RegGroupsX<string>(pageinfo, "<div class=\"cate_bd\">(?<x>.*?)<div id=\"viewedGoods\"");
            var temCats = RegGroupCollection(catList,"href=\"(?<x>.*?)\"");
            if (temCats == null)
                return;
            for (int i = 0; i < temCats.Count; i++)
            {
                string tempCatUrl = temCats[i].Groups["x"].Value;
                string tempcatid = RegGroupsX<string>(tempCatUrl, "path=(?<x>[A-Za-z0-9]+)|^http://searchex.yixun.com/(?<x>.*?)\\-");
                if (string.IsNullOrEmpty(tempcatid))
                    continue;
                if (!HasBindClasslist.Exists(p => p.ClassId == parentid))
                {
                    lessCat.Add(string.Format("http://searchex.yixun.com/{0}-1-/", tempcatid));
                }

            }
            for (int i = 0; i < lessCat.Count; i++)
            {
                AddNode(lessCat[i]);
            }
        }
        private void GetCatInfo(string directoryHtml)
        {
            string catArea = RegGroupsX<string>(directoryHtml,
                "全部商品分类</h2>(?<x>.*?)<div class=\"nav-right\">");
            if (catArea == null)
                return;
            //catArea = catArea.Replace("\t", "").Replace("\r", "").Replace("\n", "");

            var list = RegGroupCollection(catArea, "href=('|\")(?<x>.*?)('|\")\\s*>(?<y>.*?)</a>");

            foreach (Match item in list)
            {
                string tempUrl = item.Groups["x"].Value;
                string tempName = item.Groups["y"].Value;
                if (!string.IsNullOrEmpty(tempName))
                {
                    tempName = tempName.Trim();
                }
                string tempid = RegGroupsX<string>(tempUrl, "category/(?<x>\\d+)-");
                if (ValidCatId(tempid) &&!HasBindClasslist.Exists(c => c.ClassId == tempid))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = tempName,
                        ClassId = tempid,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = false,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = tempUrl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }
            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
Example #28
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string pageinfo = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);

            string crumble = RegGroupsX<string>(pageinfo,
                "<div class=\"crumb_wrap\">(?<x>.*?)<div class=\"crumb_search \">");
            if (crumble == null)
            {
                LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1\turl:" + siteClassInfo.Urlinfo, "AddClassError");
                return;
            }
            var crumblelist = RegGroupCollection(crumble,
                "<a class=\"crumb_lk\" href=\"(?<x>.*?)\" rg=\"\\d+_?\\d+\" ytag=\"\\d+\">(?<y>.*?)</a>");
            siteClassInfo.TotalProduct = RegGroupsX<int>(pageinfo,
                "<div class=\"sort_page_txt\">共<b>(?<x>\\d+)</b>件商品</div>");
            if (siteClassInfo.TotalProduct == 0)
            {
                siteClassInfo.IsDel = true;
                new SiteClassBll().delClass(siteClassInfo);
            }
            if (crumblelist == null || crumblelist.Count == 0)
            {
                LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误2\turl:" + siteClassInfo.Urlinfo, "AddClassError");
                return;
            }

            string pcatUrl = "";
            string pcatName = "";
            string pcatId = "";
            string classCrumble = "";

            foreach (Match item in crumblelist)
            {
                if (item.ToString().Contains("首页"))
                    continue;
                 if (item.ToString().Contains(siteClassInfo.ClassName))
                {
                    string itemurl = item.Groups["x"].Value;

                    string tempid = RegGroupsX<string>(itemurl, "http://searchex.yixun.com/(?<x>.*?)-1-/");
                    if (!ValidCatId(tempid))
                        continue;
                    if (tempid.Contains('t'))
                    {
                        var tempids = tempid.Split('t');
                        string catid = tempids[tempids.Length - 1];
                        siteClassInfo.ClassId = catid;
                        if (tempids.Length > 1)
                            siteClassInfo.ParentClass = tempids[tempids.Length - 2];
                    }
                    else
                    {
                        siteClassInfo.ClassId = tempid;
                    }

                    siteClassInfo.ClassName = item.Groups["y"].Value;
                    siteClassInfo.Urlinfo = itemurl;

                }
                else
                {
                    pcatUrl = item.Groups["x"].Value;
                    pcatName = item.Groups["y"].Value;
                    pcatId = RegGroupsX<string>(pcatUrl, "http://searchex.yixun.com/(?<x>.*?)-1-/");
                    if (!string.IsNullOrEmpty(pcatId))
                        classCrumble += pcatId + ",";
                }
            }

            if ( siteClassInfo.ClassId.Contains('t'))
            {
                siteClassInfo.ParentClass = siteClassInfo.ClassId.Substring(0, siteClassInfo.ClassId.IndexOf('t'));
            }

            string catArea = RegGroupsX<string>(pageinfo, "<div class=\"cate cate_2\" id=\"cateList\">(?<x>.*?)<div id=\"zdmArticle\" class=\"article_relative hide\">");
            var tempcatlist = RegGroupCollection(catArea, "<a class=\"cate_lk2 \" href=\"(?<x>.*?)\" title=(?<y>.*?) navvalue");
            foreach (Match item in tempcatlist)
            {
                string tempurl = item.Groups["x"].Value;
                string tempId = RegGroupsX<string>(tempurl, "http://searchex.yixun.com/(?<x>.*?)-1-/");
                if (tempId.Contains("t"))
                {
                    tempId = tempId.Substring(tempId.IndexOf('t') + 1);
                }
                string tempName = item.Groups["y"].Value;
                if (!HasBindClasslist.Exists(c => c.ClassId == tempId))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = tempName,
                        ClassId = tempId,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = true,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = tempurl,
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }

            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
            //siteClassInfo.ParentUrl = pcatUrl;
            if (pcatName != "")
                siteClassInfo.ParentName = pcatName;

            //if (pcatId!="")
            //siteClassInfo.ParentClass = pcatId;
            siteClassInfo.ClassCrumble = classCrumble;
            siteClassInfo.UpdateTime = DateTime.Now;
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
Example #29
0
        private void UpdateCat(SiteClassInfo siteClassInfo)
        {
            string pageinfo = HtmlAnalysis.Gethtmlcode(siteClassInfo.Urlinfo);
            //if (pageinfo.Contains("很抱歉,没有找到相关的商品。"))
            //    return;
            string crumb = RegGroupsX<string>(pageinfo,"<div class=\"w1200 breadNav\">(?<x>.*?)<div class=\"w1200\">");
            var catlist = RegGroupCollection(crumb, "<a href=\"(?<x>.*?)\">(?<y>.*?)</a>");
            if (catlist == null)
                return;
            int deep = catlist.Count;
            if (deep > 1)
            {
                siteClassInfo.ParentUrl =domain+ catlist[deep - 2].Groups["x"].Value;
                siteClassInfo.ParentName = catlist[deep - 2].Groups["y"].Value;
                siteClassInfo.ParentClass = RegGroupsX<string>(siteClassInfo.ParentUrl, "/list-(?<x>\\d+)");
            }
            siteClassInfo.HasChild = deep <= 1;
            siteClassInfo.TotalProduct = RegGroupsX<int>(pageinfo, "搜索到 <span class=\"red\">(?<x>\\d+)</span> 件相关商品");

            string catArea = RegGroupsX<string>(pageinfo, "<div class=\"sortlist mb10\">(?<x>.*?)<!--左侧产品分类列表 E--");

            if (!string.IsNullOrEmpty(catArea))
            {

                var list = RegGroupCollection(catArea, "<a href=\"(?<x>.*?)\"\\s*>(?<y>.*?)</a>");
                if (list != null)
                {
                    foreach (Match match in list)
                    {
                        string tempurl =domain+ match.Groups["x"].Value;
                        string tempid = RegGroupsX<string>(tempurl, "/list-(?<x>\\d+)");
                        if (!ValidCatId(tempid))
                        {
                            continue;
                        }
                        if (!HasBindClasslist.Exists(p => p.ClassId == tempid))
                        {

                            string tempName = match.Groups["y"].Value;
                            if(string.IsNullOrEmpty(tempName))
                            { continue; }

                            SiteClassInfo cat = new SiteClassInfo
                            {

                                ParentUrl = "",
                                ParentClass = "",
                                ParentName = "",
                                TotalProduct = 0,
                                Urlinfo = tempurl,
                                ClassId = tempid,
                                UpdateTime = DateTime.Now,
                                IsDel = false,
                                BindClassId = 0,
                                BindClassName = "",
                                HasChild = true,
                                IsBind = false,
                                IsHide = false,
                                ClassName = tempName,
                                SiteId = Baseinfo.SiteId,
                                ClassCrumble = "",

                                CreateDate = DateTime.Now
                            };
                            HasBindClasslist.Add(cat);
                            shopClasslist.Add(cat);
                        }

                    }
                    if (shopClasslist.Count > 0)
                    {
                        new SiteClassInfoDB().AddSiteClass(shopClasslist);
                        shopClasslist.Clear();
                    }
                }
            }

            siteClassInfo.UpdateTime = DateTime.Now;
            new mmbSiteClassInfoDB().UpdateSiteClass(siteClassInfo);
        }
Example #30
0
        private void GetCatInfo(string directoryHtml)
        {
            string catArea = RegGroupsX<string>(directoryHtml,
                "<div class=\"brandContent\">(?<x>.*?)<div id=\"div2\"");
            if (catArea == null)
                return;
            catArea = catArea.Replace("\t", "").Replace("\r", "").Replace("\n", "");

            var list = RegGroupCollection(catArea, "<a href=\"(?<x>.*?)\" target=\"_blank\" title=\"(?<y>.*?)\">");

            foreach (Match item in list)
            {
                string tempUrl = item.Groups["x"].Value;
                string tempName = item.Groups["y"].Value;
                if (!string.IsNullOrEmpty(tempName))
                {
                    tempName = tempName.Trim();
                }
                string tempid = RegGroupsX<string>(tempUrl, "/Category/(?<x>\\d+)-");
                if (!HasBindClasslist.Exists(c => c.ClassId == tempid))
                {
                    SiteClassInfo iteminfo = new SiteClassInfo
                    {
                        ParentClass = "",
                        ParentName = "",
                        ClassName = tempName,
                        ClassId = tempid,
                        ParentUrl = "",
                        IsDel = false,
                        IsBind = false,
                        IsHide = false,
                        BindClassId = 0,
                        BindClassName = "",
                        HasChild = false,
                        ClassCrumble = "",
                        TotalProduct = 0,
                        SiteId = Baseinfo.SiteId,
                        Urlinfo = string.Format("http://www.lbxcn.com/{0}", tempUrl),
                        UpdateTime = DateTime.Now,
                        CreateDate = DateTime.Now
                    };
                    HasBindClasslist.Add(iteminfo);
                    shopClasslist.Add(iteminfo);
                }
            }
            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }