コード例 #1
0
        public string RegionUser()
        {
            var smsmanger = new SiteFactory().SmsApiManager;
            string catid = "1773";
            string yaoqing = "YKQcw";
            string phone = smsmanger.GetPhoneNum(catid);
            HtmlAnalysis request1 = new HtmlAnalysis();
            request1.RequestContentType = "application/x-www-form-urlencoded";
            request1.Headers.Add("Cookie", "JSESSIONID=A96903EAC0B275902F08953A3C87C808; RecommendCityStatus=1; burl=https%3a%2f%2fm%2ebenlai%2ecom%2factivity%2fpullNewReceive%3fcallback%3d0%26showtype%3d4%26invitatorCode%3dqVL4Y%26referSysNo%3dDECAA91A6206EDD4; ASP.NET_SessionId=rizc5rofdjadxcytxo3dhizb; uuk=657efcf3-a63d-4e3c-9718-f4d809c92353; userGuid=aafe002b-730b-4c90-8aa3-cad731bb4db720160618014311; _jzqckmp_v2=1/; _jzqckmp=1/; AppCity=*e5*ae*81*e6*b3*a2; curRecommendation=%e5%ae%81%e6%b3%a2; _pk_id.7.2b60=b6589fc6ab0dc82c.1466228584.1.1466228589.1466228584.; _pk_ses.7.2b60=*; recentcNo=\"135, \"; DeliverySysNo=135; WebSiteSysNo=3; CityPY=nb; city=*e5*ae*81*e6*b3*a2; hsc=1; ProvinceSysNo=28; localcity=135; backUrl=https%253A%252F%252Fm.benlai.com%252Factivity%252FpullNewReceive%253Fcallback%253D0%2526showtype%253D4%2526invitatorCode%253DqVL4Y%2526referSysNo%253DDECAA91A6206EDD4; _pk_id.9.2b60=b6589fc6ab0dc82c.1466228593.1.1466228593.1466228593.; _pk_ses.9.2b60=*; _qzja=1.430535276.1466228583963.1466228583963.1466228583963.1466228593005.1466228593012.https%253A%252F%252Fm_benlai_com.0.0.5.1; _qzjb=1.1466228583963.5.0.0.0; _qzjc=1; _qzjto=5.1.0; Hm_lvt_9a7d729a11da2966935bcb2908a98794=1465949409,1465953691,1466042258,1466121250; Hm_lpvt_9a7d729a11da2966935bcb2908a98794=1466228593; Hm_lvt_7feabb06873cfd158820492f754cc70b=1465949409,1465953691,1466042258,1466121250; Hm_lpvt_7feabb06873cfd158820492f754cc70b=1466228593; CSESSIONID=A96903EAC0B275902F08953A3C87C808; source=2");
            request1.RequestUserAgent =
                "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36";
            request1.Headers.Add("X-Requested-With", "XMLHttpRequest");
            request1.Headers.Add("Origin", "https://m.benlai.com");
            request1.RequestReferer = "https://m.benlai.com/showReg?comeFromApp=0";
               string url = "https://m.benlai.com/regPhoneVry?phoneNumber="+ phone;
            request1.RequestMethod = "post";
            var page = request1.HttpRequest(url);
            if (!page.Contains("'短信验证码生成成功"))
            {
                LogServer.WriteLog(page);
                return "";
            }
            var msm = smsmanger.GetValidateMsg(phone,catid);
            string code = Regex.Match(msm, "注册验证码为 (?<x>\\d+) \\(本来生活绝不会索取此验证码", ro).Groups["x"].Value;
            if (string.IsNullOrEmpty(code))
            {
                LogServer.WriteLog(code);
                return "";
            }

            url = "https://m.benlai.com/activity/receiveAndReg?referSysNo=DECAA91A6206EDD4&invitatorCode=qVL4Y&cellphone=" + phone + "&code="+ code + "&unionId=";
            page = request1.HttpRequest(url);
            LogServer.WriteLog(url + "\t" + page);
            LogServer.WriteLog(phone+"\t"+code,"benlaishenhuo");

            var shy = new SmsHistory
            {
                SmsServer = smsmanger.smsManger.ServerName,
                SmsUserName = smsmanger.smsManger.UserName,
                Phone = phone,
                CaseName = "本来生活168活动",
                Summary = "创建帐号并获取满200减40的券",
                MessageInfo = string.Format("[\"phone\":{0},\"pwd\":{1}]",phone,code),
                CreateDate = DateTime.Now
            };
            new SmsHistoryDB().AddSmsHistory(shy);
            return "1";

            url = "https://m.benlai.com/registerPhone?regPhoneNum=" + phone+ "&regPhVerify=" + code+ "&invitationCode="+ yaoqing;
            page = request1.HttpRequest(url);
            string tempurl1 = "https://m.benlai.com/registerByPh?customerID=" + phone + "&invitationCode="+ yaoqing + "&customerPwd=62415109";
            string secc = request1.HttpRequest(tempurl1);
            LogServer.WriteLog(url+"\t"+page);
        }
コード例 #2
0
 public List<CommentInfo> GetCommentsFirstPage(string itemUrl)
 {
     string proid = RegGroupsX<string>(itemUrl, "/product/(?<x>.*?)$");
     string url = string.Format(UrlMode, proid, 0);
     HtmlAnalysis requert = new HtmlAnalysis { RequestReferer = itemUrl };
     string page = requert.HttpRequest(url);
     if (page.Contains("目前还没有用户评论"))
         return null;
     var list = RegGroupCollection(page, "<div style=\"margin-left:0.5em;\">(?<x>.*?)<span\nclass=\"crVotingButtons\">");
     if (list == null)
         return null;
     return (from Match item in list select getCommentNode(item.Groups["x"].Value)).Where(c => c != null).ToList();
 }
コード例 #3
0
ファイル: SfdaProList.cs プロジェクト: chennysnow/SimpleDemo
 public void GetAllSite()
 {
     const string urlmode =
         // "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=96&tableName=TABLE9&title=%CD%F8%C9%CF%D2%A9%B5%EA&bcId=1394682945092808297939426895866&curstart={0}";
         // "http://app1.sfda.gov.cn/datasearch/face3/base.jsp?tableId=96&tableName=TABLE96&title=%CD%F8%C9%CF%D2%A9%B5%EA&bcId=139468294509280829793942689586&curstart={0}";
         //"http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=96&bcId=139468294509280829793942689586&curstart={0}&tableName=TABLE96&State=1&viewtitleName=COLUMN1229&viewsubTitleName=COLUMN1227&State=1&tableView=%25E7%25BD%2591%25E4%25B8%258A%25E8%258D%25AF%25E5%25BA%2597";
         "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=96&State=1&bcId=139468294509280829793942689586&State=1&curstart={0}&State=1&tableName=TABLE96&State=1&viewtitleName=COLUMN1229&State=1&viewsubTitleName=COLUMN1227&State=1&tableView=%25E7%25BD%2591%25E4%25B8%258A%25E8%258D%25AF%25E5%25BA%2597&State=1";
     HtmlAnalysis request = new HtmlAnalysis();
     request.RequestMethod = "post";
     request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
     request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8";
     request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0";
     int page = 1;
     for (int i = 1; i <= page; i++)
     {
         try
         {
             if (i > 1)
                 request.RequestReferer = string.Format(urlmode, i - 1);
             string url = string.Format(urlmode, i);
             string homepage = request.HttpRequest(url);
             if (homepage == "")
                 continue;
             if (page == 1)
             {
                 page = RegGroupsX<int>(homepage, "共(?<x>\\d+)页");
             }
             string content = RegGroupsX<string>(homepage, "<td height=30><p align=left>(?<x>.*?)</table>");
             var list = RegGroupCollection(content, "\\&Id=(?<x>\\d+)");
             if (list == null)
             {
                 LogServer.WriteLog("第" + i + "页\t" + url, "sfda");
                 continue;
             }
             var ids = from Match item in list select item.Groups["x"].Value;
             addSigelSite(ids);
             LogServer.WriteLog("第" + i + "页\t" + url, "sfda");
         }
         catch (Exception ex)
         {
             LogServer.WriteLog(ex);
         }
     }
 }
コード例 #4
0
ファイル: JdComments.cs プロジェクト: chennysnow/SimpleDemo
        public List<CommentInfo> GetCommentsFirstPage(string itemUrl)
        {
            string proid = RegGroupsX<string>(itemUrl, "(?<x>\\d+)");
            string url = "";
            if (string.IsNullOrEmpty(CommentType) || CommentType == "1")
                url = string.Format(UrlMode, proid, 0);
            else if (CommentType == "2")
                url = string.Format(UrlModeimg, proid, 0);

            HtmlAnalysis requert = new HtmlAnalysis {RequestReferer = itemUrl};
            string json = requert.HttpRequest(url);
            if (json.Contains("\"comments\":[],") || !json.Contains("comments"))
                return null;

            JObject obj = JObject.Parse(json);
            if (obj["comments"] == null)
                return null;
            JArray list = (JArray) obj["comments"];

            return list.Select(getCommentNode).Where(c => c != null).ToList();
        }
コード例 #5
0
ファイル: SfdaProList.cs プロジェクト: chennysnow/SimpleDemo
 public void GetAllProducts()
 {
     //const string urlmode = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=25&State=1&bcId=124356560303886909015737447882&State=1&tableName=TABLE25&State=1&viewtitleName=COLUMN167&State=1&viewsubTitleName=COLUMN166,COLUMN170,COLUMN821&State=1&curstart={0}&State=1&tableView=%25E5%259B%25BD%25E4%25BA%25A7%25E8%258D%25AF%25E5%2593%2581&State=1";
     const string urlmode = "http://app1.sfda.gov.cn/datasearch/face3/search.jsp?tableId=36&State=1&bcId=124356651564146415214424405468&State=1&curstart={0}&State=1&tableName=TABLE36&State=1&viewtitleName=COLUMN361&State=1&viewsubTitleName=COLUMN354,COLUMN355,COLUMN356,COLUMN823&State=1&tableView=%25E8%25BF%259B%25E5%258F%25A3%25E8%258D%25AF%25E5%2593%2581&State=1";
     HtmlAnalysis request = new HtmlAnalysis();
     request.RequestMethod = "post";
     request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
     request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8";
     request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0";
     int page = 1;
     for (int i = 1; i <= page; i++)
     {
         try
         {
             string url = string.Format(urlmode, i);
             string homepage = request.HttpRequest(url);
             if (homepage == "")
                 continue;
             if (page == 1)
             {
                 page = RegGroupsX<int>(homepage, "共(?<x>\\d+)页");
             }
             string content = RegGroupsX<string>(homepage, "<td height=30><p align=left>(?<x>.*?)</table>");
             var list = RegGroupCollection(content, "callbackC,'(?<x>.*?)',");
             if (list == null)
             {
                 LogServer.WriteLog("第" + i + "页\t" + url, "sfda");
                 continue;
             }
             var ids = from Match item in list select item.Groups["x"].Value;
             addproducts(ids);
             LogServer.WriteLog("第" + i + "页\t" + url, "sfda");
         }
         catch (Exception ex)
         {
             LogServer.WriteLog(ex);
         }
     }
     LogServer.WriteLog("抓取完毕共抓取到 " + page, "sfda");
 }
コード例 #6
0
ファイル: SfdaProList.cs プロジェクト: chennysnow/SimpleDemo
        private void addSigelSite(IEnumerable<string> ids)
        {
            const string proUrlMode = "http://app1.sfda.gov.cn/datasearch/face3/content.jsp?tableId=96&tableName=TABLE96&tableView=%CD%F8%C9%CF%D2%A9%B5%EA&Id={0}";
            HtmlAnalysis request = new HtmlAnalysis();
            request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
            request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0";
            List<MedicineSiteInfo> list = new List<MedicineSiteInfo>();
            foreach (string page in ids.Select(proid => string.Format(proUrlMode, proid)).Select(request.HttpRequest))
            {
                var sintinfo = RegGroupCollection(page, "<tr>(?<x>.*?)</tr>");
                if (sintinfo.Count < 13)
                    continue ;
                try
                {
                    MedicineSiteInfo msite = new MedicineSiteInfo
                    {
                        CertificateNo = RegGroupsX<string>(sintinfo[0].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        ServerArea = RegGroupsX<string>(sintinfo[1].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        CompanyName = RegGroupsX<string>(sintinfo[2].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        Gerent = RegGroupsX<string>(sintinfo[3].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        CompanyAddress = RegGroupsX<string>(sintinfo[4].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        Province = RegGroupsX<string>(sintinfo[5].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        SiteName = RegGroupsX<string>(sintinfo[6].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        SiteIp = RegGroupsX<string>(sintinfo[7].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        Domian = RegGroupsX<string>(sintinfo[8].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        ReleaseTime = RegGroupsX<DateTime>(sintinfo[9].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        ValidityDate = RegGroupsX<DateTime>(sintinfo[10].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        PostNo = RegGroupsX<string>(sintinfo[11].Groups["x"].Value, "width=83%>(?<x>.*?)</td>"),
                        Remark = RegGroupsX<string>(sintinfo[13].Groups["x"].Value, "000066\">(?<x>.*?)</span>"),
                        CreateTime =DateTime.Now
                    };

                    msite.Domian = msite.Domian.Replace(";", ";");

                    //if (tempcount == 2)
                    //{
                    //    msite.Domian = "www." + msite.Domian;
                    //}
                    //if (tempcount > 3)
                    //{
                    //    msite.Domian = msite.Domian;
                    //}

                    //string tempurl = $"http://{msite.Domian}";
                    //string temppage= HtmlAnalysis.HttpRequest(tempurl);
                    //msite.Usefull = temppage.Contains("page");
                    list.Add(msite);

                }
                catch (Exception ex)
                {
                    LogServer.WriteLog(ex);
                }

            }
            if (list.Count > 0)
            {
                new MedicineSiteDB().AddMedicineSite(list);
                list.Clear();
            }
        }
コード例 #7
0
ファイル: SfdaProList.cs プロジェクト: chennysnow/SimpleDemo
        private void addproducts(IEnumerable<string> ids)
        {
            const string proUrlMode = "http://app1.sfda.gov.cn/datasearch/face3/{0}";
            HtmlAnalysis request = new HtmlAnalysis();
            request.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
            request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0";
            List<MedicineInfo> list = new List<MedicineInfo>();
            foreach (string page in ids.Select(proid => string.Format(proUrlMode, proid)).Select(request.HttpRequest))
            {
                list.Add(getProDetial(page));
                if (list.Count > 49)
                {
                    new MedicineDB().AddMedicineInfo(list);
                    list.Clear();
                }

            }
            if (list.Count > 0)
            {
                new MedicineDB().AddMedicineInfo(list);
                list.Clear();
            }
        }
コード例 #8
0
ファイル: BaseSiteInfo.cs プロジェクト: chennysnow/SimpleDemo
 public BaseSiteInfo()
 {
     HtmlAnalysis = new HtmlAnalysis();
 }
コード例 #9
0
        private void UpdateTmallNode(SiteClassInfo item)
        {
            if(item.ParentClass!="")
                return;

            string url = string.Format("http://list.tmall.com/search_product.htm?cat={0}" , item.ClassId);
            HtmlAnalysis reqest = new HtmlAnalysis();

            reqest.Headers.Add("Cookie", "_med=dw:1440&dh:900&pw:1440&ph:900&ist:0; pnm_cku822=126UW5TcyMNYQwiAiwQRHhBfEF8QXtHcklnMWc%3D%7CUm5Ockt%2BQXVPdUp%2BQH9Dfyk%3D%7CU2xMHDJxPk82UjVOI1h2VnhCbExiPl85VTJMNhhOGA%3D%3D%7CVGhXd1llXGlWYlhiXWlXaFRoX2JAekN3TndMdUB1S3RAeUx0TmA2%7CVWldfS0SMgwzCCgULg4gWz0ReEB2Aix6LA%3D%3D%7CVmhIGCcYJAQ%2FAyMXLRc3DTQNORklHCUYOAwxDCwQKRAtDTgDPmg%2B%7CV25Tbk5zU2xMcEl1VWtTaUlwJg%3D%3D; cq=ccp%3D1; tt=login.taobao.com; res=scroll%3A990*776-client%3A977*290-offset%3A977*290-screen%3A1440*900; hng=; uss=BqRyb7nd5KLIbC5D91VCamaiwt66iy8KP0cAS24EJNQWFeWsxGZv%2FwEo%2BAs%3D; cna=cFJaEEwJdRsCATyy24A1yMNe; l=AkZGKGJIZ/WDVSsY65u6dVSj1jLItYph; isg=Alpa8TTm1nmgf1rVi7OVW5M1rADEst5lFaLZTWTTFO241_oRTBsudSCt8xs0; OZ_1U_2061=vid=v801c15a894bb1.0&ctime=1478143053&ltime=1476512356; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; _tb_token_=eeed7bb353eb5; ck1=; uc1=cookie14=UoW%2FX9QwsnjAzg%3D%3D&lng=zh_CN&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&existShop=false&cookie21=V32FPkk%2FhSg%2F&tag=0&cookie15=UIHiLt3xD8xYTw%3D%3D&pas=0; uc3=sg2=AQI4ctClVx2ycnFp5kyAa%2F3VFKDYjzhZBJFC8KK2LVw%3D&nk2=D9ZNP7htc6w%3D&id2=UU8Lx7%2BmPirPbw%3D%3D&vt3=F8dARHfHI%2BnGtn3VuNA%3D&lg2=UtASsssmOIJ0bQ%3D%3D; lgc=lunce188; tracknick=lunce188; cookie2=10682dca3e46d779e26f299924785699; cookie1=AV0h8l61cg4iTp3AqqPZRlYP3nQGpHHQCAg%2FB5Sm3VI%3D; unb=2731635449; t=65336f3349d3648c68445898ef92bec2; skt=2c4d55251dbb75a9; _nk_=lunce188; _l_g_=Ug%3D%3D; cookie17=UU8Lx7%2BmPirPbw%3D%3D; login=true");
            reqest.RequestUserAgent = "Mozilla/5.0 (SymbianOS/9.3; U; Series60/3.2 NokiaE75-1 /110.48.125 Profile/MIDP-2.1 Configuration/CLDC-1.1 ) AppleWebKit/413 (KHTML, like Gecko) Safari/413";
            string catPage=  reqest.HttpRequest(url);

            Thread.Sleep(new Random().Next(6, 30)*1000);
            //string catPage = HtmlAnalysis.Gethtmlcode("http://list.tmall.com/search_product.htm?cat=" + item.ClassId);
            if (catPage.Contains("ResponseUri:http://www.tmall.com/") && item.UpdateTime.AddDays(15)<DateTime.Now)
            {
                new SiteClassBll().delClass(item);
                return;
            }
            var crumbsList = RegGroupCollection(catPage,
                "<li data-tag=\"cat\">(?<x>.*?)</li>");
            if (crumbsList == null)
                return;
            SiteClassInfo catinfo = new SiteClassInfo();

            string paraInfo = "";
            string paraUrl = "";
            string paraCatId = "";
            string paraName = "";
            for (int i = 0; i < crumbsList.Count; i++)
            {
                catinfo.ParentName = paraName;
                catinfo.ParentClass = paraCatId;
                catinfo.ParentUrl = paraCatId == "" ? "" : "http://list.tmall.com/search_product.htm?cat=" + paraCatId;

                //添加父类
                if (paraCatId != "" && paraCatId != "" &&
                    !HasBindClasslist.Exists(c => c.ClassId == paraCatId))
                {

                    string tempparaInfo = "";
                    string tempparaUrl = "";
                    string tempparaCatId = "";
                    string tempparaName = "";
                    if (i > 1)
                    {
                        Match pnode = crumbsList[i - 2];
                        tempparaInfo = pnode.Groups["x"].Value;
                    //  tempparaUrl = RegGroupsX<string>(tempparaInfo, "href=\"(?<x>.*?)\"");
                        tempparaCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)");
                        tempparaUrl = "http://list.tmall.com/search_product.htm?cat=" + tempparaCatId;
                        tempparaName = RegGroupsX<string>(tempparaInfo, "title=\"(?<x>.*?)\"");
                    }

                    SiteClassInfo catPareInfo = new SiteClassInfo
                    {
                        ClassName = paraName,
                        ClassId = paraCatId,
                        SiteId = Baseinfo.SiteId,
                        CreateDate = DateTime.Now,
                        UpdateTime = DateTime.Now,
                        IsHide = false,
                        ParentUrl = tempparaUrl,
                        ParentName = tempparaName,
                        ClassCrumble = tempparaCatId + ",",

                        ParentClass = tempparaCatId,
                        Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId,
                        IsDel=false,
                        HasChild = true,
                        IsBind = false
                    };
                    catPareInfo.ClassCrumble = catPareInfo.ClassCrumble.TrimEnd(',');
                    HasBindClasslist.Add(catPareInfo);
                    shopClasslist.Add(catPareInfo);
                    //父类的同级分类
                    GetAllBrotherCats(catPareInfo);
                    //子类
                    GetChildCats(catPareInfo, catPage);

                }

                Match node = crumbsList[i];
                paraInfo = node.Groups["x"].Value;
                paraUrl = RegGroupsX<string>(paraInfo, "href=\"(?<x>.*?)\"");
                paraCatId = RegGroupsX<string>(paraUrl, "cat=(?<x>\\d+)");
                paraName = RegGroupsX<string>(paraInfo, "title=\"(?<x>.*?)\"");

                catinfo.ClassName = paraName;
                catinfo.ClassId = paraCatId;
                catinfo.SiteId = Baseinfo.SiteId;
                catinfo.CreateDate = DateTime.Now;
                catinfo.UpdateTime = DateTime.Now;
                catinfo.Urlinfo = "http://list.tmall.com/search_product.htm?cat=" + paraCatId;
                catinfo.TotalProduct = RegGroupsX<int>(catPage, "共<span> (?<x>\\d+)</span>件相关商品");
                catinfo.IsHide = false;
                catinfo.IsBind = false;
                catinfo.IsDel = false;
                catinfo.ClassCrumble += paraCatId + ",";

                GetAllBrotherCats(catinfo);

            }
            if (string.IsNullOrEmpty(catinfo.ClassId))
                return;
            if(regIsMatch(catPage, "<div class=\"cateAttrs\" data-spm=\".*?\">(?<x>.*?)<div class=\"propAttrs\""))
            {
                GetChildCats(catinfo, catPage);
                catinfo.HasChild = true;
            }
            else
                catinfo.HasChild = false;
            catinfo.ClassCrumble = catinfo.ClassCrumble.TrimEnd(',');

            var oldCatInfo = HasBindClasslist.Find(c => c.ClassId == catinfo.ClassId);
            if (oldCatInfo==null)
            {
                catinfo.HasChild = true;
                HasBindClasslist.Add(catinfo);
                shopClasslist.Add(catinfo);
            }
            else
            {
                oldCatInfo.Urlinfo = catinfo.Urlinfo;
                oldCatInfo.ClassId = catinfo.ClassId;
                oldCatInfo.ClassName = catinfo.ClassName;
                oldCatInfo.TotalProduct = catinfo.TotalProduct;
                oldCatInfo.ParentUrl = catinfo.ParentUrl;
                oldCatInfo.ParentClass = catinfo.ParentClass;
                oldCatInfo.ParentUrl = catinfo.ParentUrl;
                oldCatInfo.UpdateTime = DateTime.Now;
                new SiteClassBll().UpdateSiteCat(oldCatInfo);
            }

            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
コード例 #10
0
ファイル: GmClassInfo.cs プロジェクト: chennysnow/SimpleDemo
        /// <summary>
        /// 更新类别
        /// </summary>
        /// <param name="catinfo"></param>
        private void UpdateCat(SiteClassInfo catinfo)
        {
            string pageinfo = HtmlAnalysis.Gethtmlcode(catinfo.Urlinfo);
            catinfo.TotalProduct = RegGroupsX<int>(pageinfo, "共(?<x>\\d+)商品|共 <em id=\"searchTotalNumber\">(?<x>\\d+)</em> 个商品");
            if (catinfo.TotalProduct == 0)
                return;

            var tempar = HasBindClasslist.FirstOrDefault(c => c.ClassId == catinfo.ParentClass);
            if (tempar != null)
            {
                catinfo.ParentName = tempar.ClassName;
            }
            string tempCatUrl = string.Format(AsynSearchMoth, catinfo.ClassId);
            HtmlAnalysis request = new HtmlAnalysis { RequestMethod = "POST" };
            request.RequestUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";
            request.RequestAccept = "application/json, text/javascript, */*; q=0.01";
            request.RequestContentType = "application/x-www-form-urlencoded; charset=UTF-8";
            string catTxt = "";
            int error = 0;
            do
            {
                catTxt = request.HttpRequest(tempCatUrl);
                if (!catTxt.Contains("success"))
                {
                    error++;
                    Thread.Sleep(5000);
                    LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误\r\n" + catTxt, "AddClassError");
                }
                else
                    error = 4;

            } while (error < 3);
            if (string.IsNullOrEmpty(catTxt) || !catTxt.Contains("success"))
                return;
            JObject obj = JObject.Parse(catTxt);
            if (obj == null || obj["response"] == null || obj["response"]["pageJson"] == null ||
                obj["response"]["pageJson"]["content"] == null || obj["response"]["pageJson"]["content"]["catObj"] == null)
            {
                LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误\r\n" + catTxt, "AddClassError");
                return;
            }

            if (obj["response"]["pageJson"]["content"]["catObj"]["brothers"] != null)
            {

                var temp = from item in obj["response"]["pageJson"]["content"]["catObj"]["brothers"]
                    where !HasBindClasslist.Exists(c => c.ClassId == (string) item["catId"])
                    select new SiteClassInfo
                    {
                        ClassId = (string) item["catId"],
                        ParentClass = catinfo.ParentClass ?? "",
                        ClassName = (string) item["catName"],
                        HasChild = true,
                        ParentName = catinfo.ParentName,
                        IsDel = false,
                        IsHide = false,
                        ParentUrl = catinfo.ParentUrl,
                        SiteId = catinfo.SiteId,
                        Urlinfo = string.Format("http://list.gome.com.cn/{0}.html", (string) item["catId"]),
                        CreateDate = DateTime.Now,
                        UpdateTime = DateTime.Now,
                        IsBind = false
                    };
                var siteClassInfos = temp as SiteClassInfo[] ?? temp.ToArray();
                if (siteClassInfos.Any())
                {
                    shopClasslist.AddRange(siteClassInfos);
                    HasBindClasslist.AddRange(siteClassInfos);
                }
            }

            if (obj["response"]["pageJson"]["content"]["catObj"]["children"] != null)
            {
                var temp = from item in obj["response"]["pageJson"]["content"]["catObj"]["children"]
                           where !HasBindClasslist.Exists(c => c.ClassId == (string)item["catId"])
                           select new SiteClassInfo
                           {
                               ClassId = (string)item["catId"],
                               ClassCrumble = "",
                               BindClassId = 0,

                               ParentClass = catinfo.ClassId,
                               ClassName = (string)item["catName"],
                               HasChild = true,
                               ParentName = catinfo.ClassName,
                               IsDel = false,
                               IsHide = false,
                               ParentUrl = catinfo.Urlinfo,
                               SiteId = catinfo.SiteId,
                               Urlinfo = string.Format("http://list.gome.com.cn/{0}.html", (string)item["catId"]),
                               CreateDate = DateTime.Now,
                               UpdateTime = DateTime.Now,
                               IsBind = false
                           };
                var siteClassInfos = temp as SiteClassInfo[] ?? temp.ToArray();
                if (siteClassInfos.Any())
                {
                    shopClasslist.AddRange(siteClassInfos);
                    HasBindClasslist.AddRange(siteClassInfos);
                }

            }
            catinfo.HasChild = HasBindClasslist.Any(c => c.ParentClass == catinfo.ClassId);
            catinfo.IsDel = false;
            catinfo.UpdateTime = DateTime.Now;
            catinfo.ClassName = obj["response"]["pageJson"]["content"]["catObj"]["catName"].ToString();
            catinfo.ClassId = obj["response"]["pageJson"]["content"]["catObj"]["catId"].ToString();
            catinfo.ParentClass = obj["response"]["pageJson"]["content"]["catObj"]["parentId"].ToString();
            catinfo.Urlinfo = string.Format("http://list.gome.com.cn/{0}.html", catinfo.ClassId);
            if (catinfo.ParentClass == "homeStoreRootCategory")
            {
                catinfo.ParentClass = "";
            }
            new SiteClassBll().UpdateSiteCat(catinfo);

            if (shopClasslist.Any())
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
コード例 #11
0
        /// <summary>
        /// 更新分类,有问题需优化
        /// </summary>
        /// <param name="siteClassInfo"></param>
        private void UpdateCat( HtmlAnalysis html,SiteClassInfo siteClassInfo)
        {
            string url =string.Format(CatListMode,siteClassInfo.ClassId);

            //html.RequestAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
            //html.RequestHeaders.Add("x-amz-id-1", "12SNRGFVN7F3GN9TZQ6A");
            //html.RequestHeaders.Add("x-amz-id-2", "HHlJ6ZQcJqqihTf2uhlh7xPqV4kx2CyUKZOeVXInz/IdAUgpQDfMInz6tG5ia6VKvlmZXjpDTPz 2WwRGyFE9A==");
            //html.RequestHeaders.Add("X-Frame-Options", "SAMEORIGIN");
            //html.RequestHeaders.Add("x-ua-compatible", "IE=edge");
            //html.RequestHeaders.Add("Cookie", "x-wl-uid=1ABOedPNZYW+nIQbl1b30JM+0+4U4bA6yQoNrlasdwDpl1IqkNfHVhbzaL5/qqOVMsWtwsc2i3RM=; amznacsleftnav-99427612=1,2; amznacsleftnav-100213812=1; s_nr=1440467727734-Repeat; s_vnum=1869792751635%26vn%3D4; s_dslv=1440467727736; 5SnMamzvowels.time.4=1440664276731; 5SnMamzvowels.time.0=1441181856266; amznacsleftnav-99204312=1; amznacsleftnav-100100372=1; 5SnMamzvowels.time.1=1441508587108; 5SnMamzvowels.time.2=1441523642117; 5SnMamzvowels.pos=4; 5SnMamzvowels.time.3=1442287567488; session-token=\"1GYzzhtBqbos2Ftk+KCgTA9f5yRCkSZjj6lDEmlXHPNRua8TAmIztTkzk9RooM3be2CPBhr21h404qe4Gy3e+0zCarS/5xlXuC6Xy1C2GeGy0sFr8nGgWp1iGD+VjAuyOrdQH+mqy1Ie9/ELizvIqfF5PmoPqTy4vp92B/sNMX8xqOeHkYqdWAWHB6mJW0TAgqRgYhTgIGt8CBKxi/8Bhg==\"; x-amz-captcha-1=1444963265190387; x-amz-captcha-2=GcvGzFcFHl5jmwSO4uF/Zg==; UserPref=T/z6Om64hhzqQdB81xsMYv0BMLVePVAtO8NUr8jzM/2BocRpX2Zd+SKZAUfRAIURECn/REj5CtQscHUNwkr+LIJr0IOKTpqjXQwnd1ojeIeEJ8uB0tDMjvJhI2wjoBG5yoZuWZdEO7fGaBCDJwMi5qmump/VskQ6wtpK0Dcid8ozeNIhZvSdzwcFNYxsFxYYiS+aPd+0ZpXUn74/4YD4RAH+s8YPC06fOt1R3/XVpBPVhRq+7YhjPlQAg1RFaH02WYtSbZJV9d5sAmAJzCyK9T1yOKwuHCDsnBjPHo8FLlOptd49AkRa4xXApgrufn7Hhqecje92i3QU9X6cShPi1QIX93mkef+uArPXUMQYU7kgkvSjVkjfyYlhOQUSpDgg1j9G/PRUrY+Eb7hezUw1lvlzgb1p4nQnipeJK63vuJJCeRyfc/r4lLNdDBv3NnHV; session-id-time=2082729601l; session-id=479-8884718-9124366; ubid-acbcn=479-6055231-7810538; csm-hit=0MG3ENSCMXYNMQY80BH8+s-0MG3ENSCMXYNMQY80BH8|1446517791183");
            string pageinfo = html.HttpRequest(url);
            //string pageinfo = HtmlAnalysis.Gethtmlcode(url, "utf8", false);

            if (pageinfo.Contains("没有找到任何与") || pageinfo.Contains("<h2 id=\"s-result-count\" class=\"a-size-base a-text-normal\"><b>&nbsp;</b>\n                    </h2></div>\n        </div><div class=\"a-column a-span4 a-text-right a-spacing-none a-span-last\"><div class=\"a-row a-spacing-micro a-spacing-top-micro\"><div class=\"s-last-column\">\n                    </div>\n            </div></div></div></div><script type=\"text/javascript\">\n    \n    function viewCompleteImageLoaded(image, time, resultCount, shouldUseCSMScopes) {\n         if (image) {\n           image.onload = image.onerror = image.onabort = null;\n         }\n\n         amzn.sx.utils.jsDepMgr.when('clickToViewLogger', 'viewCompleteImageLoaded',\n           function(clickToViewLogger) {\n            if (typeof ctvcL == 'undefined') {ctvcL = new clickToViewLogger.ClickToViewCompleteLogger(\"false\");}\n            ctvcL.iL(image, time, resultCount, shouldUseCSMScopes);\n           }\n         );\n    };\n</script>\n<div class=\"img_header hdr noborder\" id=\"bottomBar\">\n    <div id=\"pagn\" class=\"pagnHy\" >\n            <br clear=\"all\" />"))
            {
                new SiteClassBll().delClass(siteClassInfo);
                return;
            }
            if (pageinfo.Contains("ResponseUri:http://www.amazon.cn/") || pageinfo.Contains("没有找到任何与") || pageinfo.Contains("请输入您在下方看到的字符"))
                return;

            string crumble = RegGroupsX<string>(pageinfo, "<div class=\"s-first-column\">(?<x>.*?)<span id=\"breadcrumbSearchSeperator\">|<h2 id=\"s-result-count\"(?<x>.*?)</h2>");
            if (crumble == null)
            {
                url = url.Replace("page=1", "page=2");
                pageinfo = HtmlAnalysis.Gethtmlcode(url);
                crumble = RegGroupsX<string>(pageinfo, "<div class=\"s-first-column\">(?<x>.*?)<span id=\"breadcrumbSearchSeperator\">|<h2 id=\"s-result-count\"(?<x>.*?)</h2>");
                if (crumble == null)
                {
                    LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1\turl:" + url, "AddClassError");
                    return;
                }
            }

            string currentName = RegGroupsX<string>(crumble, "<span class=\"a-color-state a-text-bold\">(?<x>.*?)</span>|<strong>(?<x>.*?)$");

            if (!ValidCatName(currentName))
            {
                LogServer.WriteLog(Baseinfo.SiteName + "分类抓名称匹配错误\turl:" + url, "AddClassError");
                return;
            }
            var crumlist = RegGroupCollection(crumble, "<a class=\"a-link-normal a-color-base a-text-bold a-text-normal\" href=\"(?<x>.*?)\">(?<y>.*?)</a>|<a href=\"(?<x>.*?)\">(?<y>.*?)</a>");
            //if(crumlist==null)
            //{
            //    LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误2\turl:" + url, "AddClassError");
            //    return;
            //}
            string pcatUrl = "";
            string pcatName = "";
            string pcatId = "";
            string classCrumble = "";

            if (crumlist!=null)
            foreach (Match match in crumlist)
            {
                pcatUrl = match.Groups["x"].Value;
                if (string.IsNullOrEmpty(pcatUrl))
                    continue;
                if (!pcatUrl.Contains("http"))
                    pcatUrl = "http://www.amazon.cn" + pcatUrl;

                var pcatList = RegGroupCollection(pcatUrl, "%3A(?<x>\\d+)");
                if (pcatList != null)
                {
                    foreach (Match item in pcatList)
                    {
                        pcatId = item.Groups["x"].Value;
                        if (!string.IsNullOrEmpty(pcatId) && !HasBindClasslist.Exists(p => p.ClassId == pcatId))
                        {
                            GetAmazonNode(string.Format(CatListMode, pcatId));
                        }

                    }
                }
                else
                {
                    pcatId = RegGroupsX<string>(pcatUrl,
                        "3A(?<x>\\d+)\\&bbn|/s/ref=sr_hi_\\d+\\?rh=n%3A(?<x>\\d+)|rh=n%3A(?<x>\\d+)\\&amp");

                    if (pcatId == null)
                        pcatId = RegGroupsX<string>(pcatUrl, "node=(?<x>\\d+)|nodeId=(?<x>\\d+)|bbn=(?<x>\\d+)");
                }
                if (!string.IsNullOrEmpty(pcatId))
                {
                    classCrumble += pcatId + ",";
                }
                pcatName = WordCenter.FilterHtml(match.Groups["y"].Value);

            }

            SiteClassInfo catInfo = new SiteClassInfo();
            catInfo.ClassName = currentName;
            catInfo.ParentName = pcatName;
            catInfo.ParentClass = pcatId;
            catInfo.ParentUrl = pcatUrl;
            catInfo.ClassCrumble = classCrumble;
            string count =
              RegGroupsX<string>(pageinfo,
                  "<span>显示(?<x>.*?)个结果|<span>显示所有(?<x>.*?)个结果</span>|共(?<x>.*?)条|<div id=\"resultCount\" class=\"toTheEdge searchListHeader\">\n(?<x>.*?) 条结果</div>");
            if (count != null)
            {
                int procount;
                int.TryParse(count.Replace(" ", ""), out procount);
                catInfo.TotalProduct = procount;
            }
            catInfo.Urlinfo = url;
            catInfo.ClassId = RegGroupsX<string>(url, "node=(?<x>\\d+)|nodeId=(?<x>\\d+)|bbn=(?<x>\\d+)|rh=n%3A(?<x>\\d+)%2Cp_\\d|ref=lp_(?<x>\\d+)_pg");
            if (!ValidCatId(catInfo.ClassId))
            {
                LogServer.WriteLog("ClassId:" + catInfo.ClassId + "验证失败\turl:" + url, "AddClassError");
                return;
            }
            if (siteClassInfo.ClassId != catInfo.ClassId)
            {
                LogServer.WriteLog(Baseinfo.SiteName + "抓取分类id不一致 old:" + siteClassInfo.ClassId+"new:"+ catInfo.ClassId);
                return;
            }
            //更新当前分类
            siteClassInfo.Urlinfo = catInfo.Urlinfo;
            siteClassInfo.ClassId = catInfo.ClassId;
            siteClassInfo.ClassName = catInfo.ClassName;
            siteClassInfo.TotalProduct = catInfo.TotalProduct;
            siteClassInfo.ParentUrl = catInfo.ParentUrl;
            siteClassInfo.ParentClass = catInfo.ParentClass;
            siteClassInfo.ParentUrl = catInfo.ParentUrl;
            siteClassInfo.UpdateTime = DateTime.Now;
            new SiteClassBll().UpdateSiteCat(siteClassInfo);

            string classList;
            if (pageinfo.Contains("data-typeid=\"n\""))
            {
                classList = RegGroupsX<string>(pageinfo, "<ul id=\"ref_\\d+\" data-typeid=\"n\"(?<x>.*?)</ul>");
            }
            else
            {
                classList = RegGroupsX<string>(pageinfo, "<div class=\"sbDepartmentLabel\">(?<x>.*?)<form id=\"bottomSearchForm\"|<div class=\"categoryRefinementsSection\">(?<x>.*?)<div class=\"shoppingEngineSectionHeaders\">");
            }
            if (classList == null)
            {
                LogServer.WriteLog(Baseinfo.SiteName + "分类抓取错误1\turl:" + url, "AddClassError");
                return;
            }
            var classInfo = RegGroupCollection(classList, "<span class=\"deptLevel\\d+( deptCurrent)?\">(?<x>.*?)</span>|<li.*?>(?<x>.*?)</li>");
            if (classInfo == null || classInfo.Count == 0)
                return;
            for (int i = 0; i < classInfo.Count; i++)
            {

                var item = classInfo[i].ToString();
                string tempUrl = RegGroupsX<string>(item, "href=\"(?<x>.*?)\"");

                var pcatList = RegGroupCollection(tempUrl, "n%3A(?<x>\\d{7,16})");
                if (pcatList != null)
                {
                    foreach (Match obj in pcatList)
                    {
                        var tempcatId = obj.Groups["x"].Value;
                        if (!string.IsNullOrEmpty(pcatId) && !HasBindClasslist.Exists(p => p.ClassId == tempcatId))
                        {
                            GetAmazonNode(string.Format(CatListMode, tempcatId));
                        }

                    }
                }
                else
                {
                    string tempid = "";
                    if (tempUrl != null)
                    {
                        tempUrl = "http://www.amazon.cn" + tempUrl.Replace("&amp;", "&");
                        tempid = RegGroupsX<string>(tempUrl, "3A(?<x>\\d{7,16})\\&bbn");
                        if (tempid == null)
                            tempid = RegGroupsX<string>(item, "node=(?<x>\\d+)|nodeId=(?<x>\\d+)|bbn=(?<x>\\d+)");
                    }
                    if (!string.IsNullOrEmpty(tempid) && !HasBindClasslist.Exists(p => p.ClassId == tempid))
                    {
                        GetAmazonNode(tempUrl);
                    }
                }
            }

            if (shopClasslist.Count > 0)
            {
                new SiteClassInfoDB().AddSiteClass(shopClasslist);
                shopClasslist.Clear();
            }
        }
コード例 #12
0
 public void UpdateSiteCat()
 {
     HasBindClasslist =
         new SiteClassInfoDB().getAllSiteCatInfo(Baseinfo.SiteId).OrderBy(p => p.UpdateTime).ToList();
     HtmlAnalysis html = new HtmlAnalysis();
     for (int i = 0; i < HasBindClasslist.Count; i++)
     {
         try
         {
             UpdateCat(html,HasBindClasslist[i]);
         }
         catch (Exception ex)
         {
             LogServer.WriteLog(ex);
         }
     }
 }
コード例 #13
0
ファイル: TengXunBll.cs プロジェクト: chennysnow/SimpleDemo
        public void write(string contents)
        {
            string paramlist = "content=" + contents + Baseparam;

            string url = "https://open.t.qq.com/api/t/add?" + paramlist;
            const string url1 =
                "http://open.t.qq.com/api/friends/fanslist?format=json&reqnum=20&startindex=0&mode=0&install=0&sex=0&oauth_consumer_key=" +
                ClientId + "&access_token=" + AccessToken + "&openid=" + Openid +
                "&clientip=122.245.206.107&oauth_version=2.a";
            HtmlAnalysis analysis = new HtmlAnalysis { RequestMethod = "POST" };
            analysis.RequestMethod = "GET";
            var cc = analysis.HttpRequest(url1);

            var bb = analysis.HttpRequest(url);

            string result = HtmlAnalysis.HttpRequestFromPost(url, paramlist, "utf-8");

            //const string url1 = "http://open.t.qq.com/api/friends/fanslist?format=json&reqnum=20&startindex=0&mode=0&install=0&sex=0&oauth_consumer_key=" + ClientId + "&access_token=" + AccessToken + "&openid=" + Openid + "&clientip=122.245.206.107&oauth_version=2.a";
            HtmlAnalysis.HttpRequestFromPost(url1, paramlist, "utf-8");
        }