예제 #1
0
 private void ProcessTradeData()
 {
     while (true)
     {
         if (_conQueue.Count > 0 && taskThreshold < taskScale)
         {
             try
             {
                 IPMetaDataItem mdi = null;
                 IPItem         di  = null;
                 lock (objec)
                 {
                     if (_conQueue.TryDequeue(out di))
                     {
                         mdi = di.Convert2IPMetaDataItem();
                     }
                 }
                 Task.Run(
                     () =>
                 {
                     taskThreshold++;
                     try
                     {
                         SpiderProcess(mdi);
                     }
                     catch (Exception ex1)
                     {
                         //_logger.Error("优信拍 数据抓取任务异常:{0} mdi{1}", ex1, JsonConvert.SerializeObject(mdi));
                     }
                     finally
                     {
                         taskThreshold--;
                     }
                 }
                     );
                 //Thread.Sleep(30);
                 if (ConfigUtls.proxy_rate_open == 0)
                 {
                     Thread.Sleep(random.Next(30));
                 }
             }
             catch (Exception ex)
             {
                 //_logger.Error("优信拍 抓取主过程 异常:{0}", ex);
             }
             if (internalSingleton.RequestCount == internalSingleton.RequestTotal || internalSingleton.RequestCount > internalSingleton.RequestTotal)
             {
                 Console.ReadKey();
             }
         }
         else
         {
             Thread.Sleep(procInterval);
         }
     }
 }
예제 #2
0
        /// <summary>
        /// 获取动态代理
        /// </summary>
        /// <param name="proxyIPport"></param>
        /// <returns></returns>
        private HttpClient GetProxyHttpClientMulti(IPMetaDataItem mdi)
        {
            var proxy = new WebProxy(string.Format("http://{0}", mdi.ipwithport));
            HttpClientHandler httpClientHandler = new HttpClientHandler()
            {
                Proxy = proxy
            };
            var httpCient = new HttpClient(httpClientHandler);

            // 增加头部
            httpCient.DefaultRequestHeaders.Add("Accept", "image/webp,image/apng,image/*,*/*;q=0.8");
            httpCient.DefaultRequestHeaders.Add("Accept-Encoding", "gzip, deflate, br");
            httpCient.DefaultRequestHeaders.Add("Accept-Language", "zh-CN,zh;q=0.9");
            string guidStr  = Guid.NewGuid().ToString().ToUpper();
            string guidStr1 = Guid.NewGuid().ToString().ToUpper();
            string dateTime = DateTime.Now.AddDays(-random.Next(10)).ToString();
            string cookie   = string.Format("fvlid={1}; " +
                                            "sessionid={2}{6} " +
                                            "autoid={7}; " +
                                            "ahpau=1; __ah_uuid_ng=c_{3}; " +
                                            "sessionip={0}; " +
                                            "sessionvid={8}; " +
                                            "area=119999; v_no=0; visit_info_ad={4}||{9}||-1||-1||4; ref={5}"
                                            , mdi.outip, mdi.fvlid,
                                            guidStr, guidStr, guidStr,
                                            HttpUtility.UrlEncode(string.Format("0|0|0|0|{0}|{1}", DateTime.Now.ToString(), dateTime))
                                            , HttpUtility.UrlEncode(string.Format("||{0}||0; ", dateTime)),
                                            Z.GetMD5LowerString(guidStr),
                                            guidStr1, guidStr1
                                            );

            //string cookie = string.Format("fvlid={1}; " +
            //  "sessionid=C0B882EA-FEDC-43CA-8E9F-B322F35528C8%7C%7C2020-07-25+12%3A38%3A50.381%7C%7C0; " +
            //  "autoid=c933fac8868713f3f0e2d3d4b83f16b0; " +
            //  "ahpau=1; __ah_uuid_ng=c_C0B882EA-FEDC-43CA-8E9F-B322F35528C8; " +
            //  "sessionip={0}; " +
            //  "sessionvid=681B9A1B-2EC5-4194-8437-33DEFB091DEA; " +
            //  "area=119999; v_no=0; visit_info_ad=C0B882EA-FEDC-43CA-8E9F-B322F35528C8||681B9A1B-2EC5-4194-8437-33DEFB091DEA||-1||-1||4; ref=0%7C0%7C0%7C0%7C2020-08-02+08%3A00%3A14.918%7C2020-07-25+12%3A38%3A50.381"
            //  , mdi.outip, mdi.fvlid);
            httpCient.DefaultRequestHeaders.Add("Cookie", cookie);
            httpCient.DefaultRequestHeaders.Add("Host", "al.autohome.com.cn");
            httpCient.DefaultRequestHeaders.Add("Referer", ConfigUtls.mda_pv_init_referer);
            httpCient.DefaultRequestHeaders.Add("User-Agent", string.Format("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.{0}.77 Safari/537.36", 3000 + random1.Next()));
            return(httpCient);
        }
예제 #3
0
        private void SetCookie(HttpClient httpClient, RequestUrlAndReferer requestUrlAndReferer, IPMetaDataItem mdi)
        {
            httpClient.DefaultRequestHeaders.Remove("Cookie");
            string guidStr  = Guid.NewGuid().ToString().ToUpper();
            string guidStr1 = Guid.NewGuid().ToString().ToUpper();
            string dateTime = DateTime.Now.AddDays(-random.Next(10)).ToString();
            string cookie   = string.Format("fvlid={1}; " +
                                            "sessionid={2}{6} " +
                                            "autoid={7}; " +
                                            "ahpau=1; __ah_uuid_ng=c_{3}; " +
                                            "sessionip={0}; " +
                                            "sessionvid={8}; " +
                                            "area=119999; v_no=0; visit_info_ad={4}||{9}||-1||-1||4; ref={5}"
                                            , mdi.outip, requestUrlAndReferer.fvlid,
                                            guidStr, guidStr, guidStr,
                                            HttpUtility.UrlEncode(string.Format("0|0|0|0|{0}|{1}", DateTime.Now.ToString(), dateTime))
                                            , HttpUtility.UrlEncode(string.Format("||{0}||0; ", dateTime)),
                                            Z.GetMD5LowerString(guidStr),
                                            guidStr1, guidStr1
                                            );

            httpClient.DefaultRequestHeaders.Add("Cookie", cookie);
            httpClient.DefaultRequestHeaders.Remove("Referer");
            httpClient.DefaultRequestHeaders.Add("Referer", requestUrlAndReferer.referer);
        }
예제 #4
0
 private void SpiderProcessMulti(IPMetaDataItem mdi)
 {
     if (mdi != null)
     {
         if (string.IsNullOrWhiteSpace(mdi.ipwithport) || mdi.ipwithport.Contains("{"))
         {
             return;
         }
         //string proxy = mdi.ipwithport;
         //RefreshIESettings(proxy);
         //IEProxy ie = new IEProxy(proxy);
         //browser.RequestWeb(mdi.requesturl);
         //if (GatherTradeData(mdi, null))
         //{
         //    GatherCarData(mdi, null);
         //    _storeQueue.Enqueue(mdi);
         //
         //if (internalSingleton.RequestIpCount > internalSingleton.RequestTotal &&
         //        (_conQueue.Count == 0))
         //{
         //    return;
         //}
         if (!string.IsNullOrWhiteSpace(mdi.ipwithport))
         {
             HttpClient httpClient = GetProxyHttpClientMulti(mdi);
             if (ConfigUtls.proxy_rate_open > 0)
             {
                 if (mdi.requestUrlAndReferers != null && mdi.requestUrlAndReferers.Count > 0)
                 {
                     try
                     {
                         foreach (var item in mdi.requestUrlAndReferers)
                         {
                             if (item != null && !string.IsNullOrWhiteSpace(item.requesturl))
                             {
                                 string httpResult  = "";
                                 string tempRequest = HttpUtility.UrlDecode(item.requesturl, System.Text.Encoding.UTF8);
                                 try
                                 {
                                     SetCookie(httpClient, item, mdi);
                                     httpClient.GetAsync(tempRequest);
                                     httpResult = "dddd" + httpResult;
                                     this.internalSingleton.RequestCount++;
                                     //Console.WriteLine(string.Format("*执行总数totalCount:{0}/{1}当前执行数量 /获取资源数量{2}/消耗资源数量{5}/开始时间{3} 结束时间{4}",
                                     //    this.internalSingleton.RequestTotal,
                                     //    internalSingleton.RequestCount,
                                     //    internalSingleton.RequestIpCount,
                                     //    internalSingleton.BeginTime.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                     //    DateTime.Now.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                     //    this.internalSingleton.ConsumerIpCount));
                                 }
                                 catch (Exception e1)
                                 {
                                     //Console.WriteLine(string.Format("{0}\n{1}", mdi.ipwithport, e1.StackTrace.ToString()));
                                 }
                                 //参照间隔时间Thread.Sleep(20);
                                 //Thread.Sleep(6);
                                 Thread.Sleep(ConfigUtls.time_space_multi);
                             }
                         }
                         if (ConfigUtls.probe_switch > 0 && DateTime.Now.Minute == 29 && 28 < DateTime.Now.Second && DateTime.Now.Second < 31)
                         {
                             string tempRequest = HttpUtility.UrlDecode("http://wetopic.api.autohome.com.cn/api/test", System.Text.Encoding.UTF8);
                             var    response    = httpClient.GetAsync(tempRequest).Result;
                             mdi.result = string.Format("Ipport {3} -{4} Datetime {2} IsSuccessStatusCode {0} StatusCode {1}"
                                                        , response.IsSuccessStatusCode, response.StatusCode, DateTime.Now, mdi.ipwithport, mdi.outip);
                             this._storeQueue.Enqueue(mdi);
                         }
                         Console.WriteLine(string.Format("*执行总数totalCount:{0}/{1}当前执行数量 /获取资源数量{2}/消耗资源数量{5}/队列长度{6}/开始时间{3} 结束时间{4}",
                                                         this.internalSingleton.RequestTotal,
                                                         internalSingleton.RequestCount,
                                                         internalSingleton.RequestIpCount,
                                                         internalSingleton.BeginTime.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                                         DateTime.Now.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                                         this.internalSingleton.ConsumerIpCount, this._conQueue.Count));
                     }
                     catch (Exception)
                     {
                     }
                     finally
                     {
                         this.internalSingleton.ConsumerIpCount++;
                         httpClient.Dispose();
                         httpClient = null;
                         mdi        = null;
                     }
                 }
             }
             else
             {
                 if (mdi.requestUrlAndReferers != null && mdi.requestUrlAndReferers.Count > 0)
                 {
                     try
                     {
                         foreach (var item in mdi.requestUrlAndReferers)
                         {
                             if (item != null && !string.IsNullOrWhiteSpace(item.requesturl))
                             {
                                 string httpResult  = "";
                                 string tempRequest = HttpUtility.UrlDecode(item.requesturl, System.Text.Encoding.UTF8);
                                 try
                                 {
                                     SetCookie(httpClient, item, mdi);
                                     httpClient.GetAsync(tempRequest);
                                     httpResult = "dddd" + httpResult;
                                     this.internalSingleton.RequestCount++;
                                     //Console.WriteLine(string.Format("*执行总数totalCount:{0}/{1}当前执行数量 /获取资源数量{2}/消耗资源数量{5}/开始时间{3} 结束时间{4}",
                                     //     this.internalSingleton.RequestTotal,
                                     //     internalSingleton.RequestCount,
                                     //     internalSingleton.RequestIpCount,
                                     //     internalSingleton.BeginTime.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                     //     DateTime.Now.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                     //     this.internalSingleton.ConsumerIpCount));
                                 }
                                 catch (Exception e1)
                                 {
                                     //Console.WriteLine(string.Format("{0}\n{1}", mdi.ipwithport, e1.StackTrace.ToString()));
                                 }
                             }
                         }
                         Console.WriteLine(string.Format("*执行总数totalCount:{0}/{1}当前执行数量 /获取资源数量{2}/消耗资源数量{5}/队列长度{6}/开始时间{3} 结束时间{4}",
                                                         this.internalSingleton.RequestTotal,
                                                         internalSingleton.RequestCount,
                                                         internalSingleton.RequestIpCount,
                                                         internalSingleton.BeginTime.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                                         DateTime.Now.ToString("yyyy-MM-dd hh:mm:ss fff"),
                                                         this.internalSingleton.ConsumerIpCount, this._conQueue.Count));
                     }
                     catch (Exception)
                     {
                     }
                     finally
                     {
                         this.internalSingleton.ConsumerIpCount++;
                         httpClient.Dispose();
                         httpClient = null;
                         mdi        = null;
                     }
                 }
             }
         }
     }
 }
예제 #5
0
        private void SetCookie(HttpClient httpClient, RequestUrlAndReferer requestUrlAndReferer, IPMetaDataItem mdi)
        {
            httpClient.DefaultRequestHeaders.Remove("Cookie");

            //this.guidStr = Guid.NewGuid().ToString().ToUpper();
            ////string guidStr1 = Guid.NewGuid().ToString().ToUpper();
            //string dateTime = DateTime.Now.AddDays(-random.Next(10)).ToString();
            //string cookie = string.Format("fvlid={1}; " +
            //    "sessionid={2}{6} " +
            //    "autoid={7}; " +
            //    "ahpau=1; __ah_uuid_ng=c_{3}; " +
            //    "sessionip={0}; " +
            //    "sessionvid=681B9A1B-2EC5-4194-8437-33DEFB091DEA; " +
            //    "area=119999; v_no=0; visit_info_ad={4}||681B9A1B-2EC5-4194-8437-33DEFB091DEA||-1||-1||4; ref={5}"
            //    , mdi.outip, mdi.fvlid,
            //    this.guidStr, this.guidStr, this.guidStr,
            //    HttpUtility.UrlEncode(string.Format("0|0|0|0|{0}|{1}", DateTime.Now.ToString(), dateTime))
            //    , HttpUtility.UrlEncode(string.Format("||{0}||0; ", dateTime)),
            //    Z.GetMD5LowerString(this.guidStr)
            //    );

            //string cookie = string.Format("fvlid={1}; " +
            //    "sessionid=C0B882EA-FEDC-43CA-8E9F-B322F35528C8%7C%7C2020-07-25+12%3A38%3A50.381%7C%7C0; " +
            //    "autoid=c933fac8868713f3f0e2d3d4b83f16b0; " +
            //    "ahpau=1; __ah_uuid_ng=c_C0B882EA-FEDC-43CA-8E9F-B322F35528C8; " +
            //    "sessionip={0}; " +
            //    "sessionvid=681B9A1B-2EC5-4194-8437-33DEFB091DEA; " +
            //    "area=119999; v_no=0; visit_info_ad=C0B882EA-FEDC-43CA-8E9F-B322F35528C8||681B9A1B-2EC5-4194-8437-33DEFB091DEA||-1||-1||4; ref=0%7C0%7C0%7C0%7C2020-08-02+08%3A00%3A14.918%7C2020-07-25+12%3A38%3A50.381"
            //    , mdi.outip, requestUrlAndReferer.fvlid);

            //this.guidStr = Guid.NewGuid().ToString().ToUpper();
            //this.guidStr1 = Guid.NewGuid().ToString().ToUpper();
            //string dateTime = DateTime.Now.AddDays(-random.Next(10)).ToString();
            //string cookie = string.Format("fvlid={1}; " +
            //    "sessionid={2}{6} " +
            //    "autoid={7}; " +
            //    "ahpau=1; __ah_uuid_ng=c_{3}; " +
            //    "sessionip={0}; " +
            //    "sessionvid={8}; " +
            //    "area=330199; v_no=1; visit_info_ad={4}||{9}||-1||-1||1; ref={5}"
            //    , mdi.outip, mdi.fvlid,
            //    this.guidStr, this.guidStr, this.guidStr,
            //    HttpUtility.UrlEncode(string.Format("0|0|0|0|{0}|{1}", DateTime.Now.ToString(), dateTime))
            //    , HttpUtility.UrlEncode(string.Format("||{0}||0; ", dateTime)),
            //    Z.GetMD5LowerString(this.guidStr),
            //     this.guidStr1, this.guidStr1
            //    );

            //string cookie = string.Format("fvlid={1}; " +
            //   "sessionid=EC51C026-F4BB-4AB0-9872-993F6CFF34A8%7C%7C2020-10-14+18%3A20%3A47.654%7C%7C0; " +
            //   "autoid=31bf984e655c9fe48f14c7176521ee08; " +
            //   "ahpau=1; __ah_uuid_ng=c_EC51C026-F4BB-4AB0-9872-993F6CFF34A8; " +
            //   "sessionip={0}; " +
            //   "sessionvid=52CD437F-1054-4AD5-A7F4-345B803C41AA; " +
            //   "area=330199; v_no=1; visit_info_ad=EC51C026-F4BB-4AB0-9872-993F6CFF34A8||52CD437F-1054-4AD5-A7F4-345B803C41AA||-1||-1||1; ref=0%7C0%7C0%7C0%7C2020-12-13+15%3A16%3A28.794%7C2020-10-14+18%3A20%3A47.654"
            //   , mdi.outip, mdi.fvlid);
            //httpClient.DefaultRequestHeaders.Add("Cookie", cookie);
            httpClient.DefaultRequestHeaders.Remove("Referer");
            //httpClient.DefaultRequestHeaders.Add("Referer", requestUrlAndReferer.referer);
        }