public override void ResetDuplicateCheck() { RedialManagerUtils.Execute("rds-rs", () => { Db.KeyDelete(SetKey); }); }
public long GetLeftRequestsCount() { return(RedialManagerUtils.Execute("rds-lc", () => { return Db.ListLength(QueueKey); })); }
public override void Init(ISpider spider) { RedialManagerUtils.Execute("rds-init", () => { _db.SortedSetAdd(TaskList, spider.Identity, DateTimeUtils.GetCurrentTimeStamp()); }); }
public void IncreaseErrorCounter() { RedialManagerUtils.Execute("rds-iec", () => { Db.HashIncrement(ErrorCountKey, IdentityMd5, 1); }); }
public override void ResetDuplicateCheck(ISpider spider) { RedialManagerUtils.Execute("rds-reset", () => { _db.KeyDelete(GetSetKey(spider.Identity)); }); }
public void IncreaseSuccessCounter() { RedialManagerUtils.Execute("rds-isc", () => { Db.HashIncrement(SuccessCountKey, IdentityMd5, 1); }); }
public long GetTotalRequestsCount() { return(RedialManagerUtils.Execute("rds-tc", () => { return Db.SetLength(SetKey); })); }
//[MethodImpl(MethodImplOptions.Synchronized)] public override Request Poll() { return(RedialManagerUtils.Execute("rds-pl", () => { return SafeExecutor.Execute(30, () => { var value = Db.ListRightPop(QueueKey); if (!value.HasValue) { return null; } string field = value.ToString(); string json = Db.HashGet(ItemKey, field); if (!string.IsNullOrEmpty(json)) { var result = JsonConvert.DeserializeObject <Request>(json); Db.HashDelete(ItemKey, field); return result; } return null; }); })); }
public long GetErrorRequestsCount() { return(RedialManagerUtils.Execute("rds-erc", () => { var result = Db.HashGet(ErrorCountKey, IdentityMd5);; return result.HasValue ? (long)result : 0; })); }
public int GetLeftRequestsCount(ISpider spider) { return(RedialManagerUtils.Execute("rds-getleftcount", () => { long size = _db.ListLength(GetQueueKey(spider.Identity)); return (int)size; })); }
public override Page Download(Request request, ISpider spider) { WebDriverItem driverService = null; try { driverService = Pool.Get(); lock (this) { if (!_isLogined && Login != null) { _isLogined = Login.Invoke(driverService.WebDriver as RemoteWebDriver); if (!_isLogined) { throw new SpiderExceptoin("Login failed. Please check your login codes."); } } } //中文乱码URL Uri uri = request.Url; string query = uri.Query; string realUrl = uri.Scheme + "://" + uri.DnsSafeHost + ":" + uri.Port + uri.AbsolutePath + (string.IsNullOrEmpty(query) ? "" : ("?" + HttpUtility.UrlPathEncode(uri.Query.Substring(1, uri.Query.Length - 1)))); if (UrlFormat != null) { realUrl = UrlFormat(realUrl); } RedialManagerUtils.Execute("webdriverdownloader-download", () => { driverService.WebDriver.Navigate().GoToUrl(realUrl); }); Thread.Sleep(_webDriverWaitTime); AfterNavigate?.Invoke((RemoteWebDriver)driverService.WebDriver); Page page = new Page(request, spider.Site.ContentType); page.Content = _fiddlerClient.ResponseBodyString; _fiddlerClient.Clear(); page.Url = request.Url.ToString(); page.TargetUrl = driverService.WebDriver.Url; page.Title = driverService.WebDriver.Title; ValidatePage(page, spider); // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务 request.PutExtra(Request.CycleTriedTimes, null); return(page); } finally { Pool.ReturnToPool(driverService); } }
public int GetTotalRequestsCount(ISpider spider) { return(RedialManagerUtils.Execute("rds-gettotalcount", () => { long size = _db.SetLength(GetSetKey(spider.Identity)); return (int)size; })); }
public void Push(Request request) { lock (this) { RedialManagerUtils.Execute("sp", () => { DoPush(request); }); } }
public void Push(Request request, ISpider spider) { lock (this) { RedialManagerUtils.Execute("scheduler-push", () => { DoPush(request, spider); }); } }
public override void Init(ISpider spider) { base.Init(spider); var md5 = Encrypt.Md5Encrypt(spider.Identity); ItemKey += md5; SetKey += md5; QueueKey = md5; ErrorCountKey += md5; SuccessCountKey += md5; IdentityMd5 = md5; RedialManagerUtils.Execute("rds-in", () => { Db.SortedSetAdd(TaskList, spider.Identity, (long)DateTimeUtils.GetCurrentTimeStamp()); }); }
public virtual void Initialize() { if (Mode == PipelineMode.Update) { return; } RedialManagerUtils.Execute("db-init", () => { using (DbConnection conn = CreateConnection()) { conn.Open(); var command = conn.CreateCommand(); command.CommandText = GetCreateSchemaSql(); command.CommandType = CommandType.Text; command.ExecuteNonQuery(); command.CommandText = GetCreateTableSql(); command.CommandType = CommandType.Text; command.ExecuteNonQuery(); conn.Close(); } }); }
public void Process(List <JObject> datas, ISpider spider) { RedialManagerUtils.Execute("pipeline-", () => { switch (Mode) { case PipelineMode.Insert: { using (var conn = CreateConnection()) { var cmd = conn.CreateCommand(); cmd.CommandText = GetInsertSql(); cmd.CommandType = CommandType.Text; conn.Open(); foreach (var data in datas) { cmd.Parameters.Clear(); List <DbParameter> parameters = new List <DbParameter>(); foreach (var column in Columns) { var parameter = CreateDbParameter(); parameter.ParameterName = $"@{column.Name}"; parameter.Value = data.SelectToken($"{column.Name}")?.Value <string>(); parameter.DbType = Convert(column.DataType); parameters.Add(parameter); } cmd.Parameters.AddRange(parameters.ToArray()); cmd.ExecuteNonQuery(); } conn.Close(); } break; } case PipelineMode.Update: { using (var conn = CreateConnection()) { var cmd = conn.CreateCommand(); cmd.CommandText = GetUpdateSql(); cmd.CommandType = CommandType.Text; conn.Open(); foreach (var data in datas) { cmd.Parameters.Clear(); List <DbParameter> parameters = new List <DbParameter>(); foreach (var column in UpdateColumns) { var parameter = CreateDbParameter(); parameter.ParameterName = $"@{column.Name}"; parameter.Value = data.SelectToken($"{column.Name}")?.Value <string>(); parameter.DbType = Convert(column.DataType); parameters.Add(parameter); } foreach (var column in Primary) { var parameter = CreateDbParameter(); parameter.ParameterName = $"@{column.Name}"; parameter.Value = data.SelectToken($"{column.Name}")?.Value <string>(); parameter.DbType = Convert(column.DataType); parameters.Add(parameter); } cmd.Parameters.AddRange(parameters.ToArray()); cmd.ExecuteNonQuery(); } conn.Close(); } break; } } }); }
public override Page Download(Request request, ISpider spider) { if (spider.Site == null) { return(null); } Site site = spider.Site; ICollection <int> acceptStatCode = site.AcceptStatCode; var charset = site.Encoding; //Logger.InfoFormat("Downloading page {0}", request.Url); int statusCode = 0; HttpWebResponse response = null; try { if (CustomizeRequestBeforeGenerate != null) { SingleExecutor.Execute(() => { CustomizeRequestBeforeGenerate(request); }); } var httpWebRequest = GetHttpWebRequest(request, site); response = RedialManagerUtils.Execute("downloader-download", h => { HttpWebRequest tmpHttpWebRequest = (HttpWebRequest)h; if (HttpConstant.Method.Post.Equals(request.Method) && !string.IsNullOrEmpty(request.PostBody)) { var data = spider.Site.Encoding.GetBytes(request.PostBody); #if !NET_CORE tmpHttpWebRequest.ContentLength = data.Length; using (Stream newStream = tmpHttpWebRequest.GetRequestStream()) { newStream.Write(data, 0, data.Length); newStream.Close(); } #else using (Stream newStream = tmpHttpWebRequest.GetRequestStreamAsync().Result) { newStream.Write(data, 0, data.Length); newStream.Dispose(); } #endif } #if !NET_CORE return((HttpWebResponse)tmpHttpWebRequest?.GetResponse()); #else return((HttpWebResponse)tmpHttpWebRequest?.GetResponseAsync().Result); #endif }, httpWebRequest); statusCode = (int)response.StatusCode; request.PutExtra(Request.StatusCode, statusCode); if (StatusAccept(acceptStatCode, statusCode)) { Page page = HandleResponse(request, charset, response, statusCode, site); //page.SetRawText(File.ReadAllText(@"C:\Users\Lewis\Desktop\taobao.html")); // 这里只要是遇上登录的, 则在拨号成功之后, 全部抛异常在Spider中加入Scheduler调度 // 因此如果使用多线程遇上多个Warning Custom Validate Failed不需要紧张, 可以考虑用自定义Exception分开 ValidatePage(page); // 结束后要置空, 这个值存到Redis会导置无限循环跑单个任务 request.PutExtra(Request.CycleTriedTimes, null); #if !NET_CORE httpWebRequest.ServicePoint.ConnectionLimit = int.MaxValue; #endif return(page); } else { throw new SpiderExceptoin("Download failed."); } //正常结果在上面已经Return了, 到此处必然是下载失败的值. //throw new SpiderExceptoin("Download failed."); } catch (Exception e) { if (!(e is RedialException)) { Page page = new Page(request, site.ContentType) { Exception = e }; ValidatePage(page); } throw; } finally { // 先Close Response, 避免前面语句异常导致没有关闭. try { //ensure the connection is released back to pool //check: //EntityUtils.consume(httpResponse.getEntity()); #if !NET_CORE response?.Close(); #else response?.Dispose(); #endif } catch (Exception e) { Logger.Warn("Close response fail.", e); } request.PutExtra(Request.StatusCode, statusCode); } }
public override Page Download(Request request, ISpider spider) { if (spider.Site == null) { return(null); } Site site = spider.Site; var acceptStatCodes = site.AcceptStatCode; //Logger.InfoFormat("Downloading page {0}", request.Url); HttpResponseMessage response = null; var proxy = site.GetHttpProxyFromPool(); request.PutExtra(Request.Proxy, proxy); int statusCode = 200; try { if (PostBodyGenerator != null) { SingleExecutor.Execute(() => { PostBodyGenerator(spider.Site, request); }); } var httpMessage = GenerateHttpRequestMessage(request, site); response = RedialManagerUtils.Execute("downloader-download", (m) => { var message = (HttpRequestMessage)m; return(httpClient.SendAsync(message).Result); }, httpMessage); AddRequestCount(); response.EnsureSuccessStatusCode(); if (!site.AcceptStatCode.Contains(response.StatusCode)) { throw new DownloadException($"下载 {request.Url} 失败. Code: {response.StatusCode}"); } statusCode = (int)response.StatusCode; request.PutExtra(Request.StatusCode, statusCode); Page page = HandleResponse(request, response, statusCode, site); // need update page.TargetUrl = request.Url.ToString(); //page.SetRawText(File.ReadAllText(@"C:\Users\Lewis\Desktop\taobao.html")); // 这里只要是遇上登录的, 则在拨号成功之后, 全部抛异常在Spider中加入Scheduler调度 // 因此如果使用多线程遇上多个Warning Custom Validate Failed不需要紧张, 可以考虑用自定义Exception分开 ValidatePage(page, spider); // 结束后要置空, 这个值存到Redis会导致无限循环跑单个任务 request.PutExtra(Request.CycleTriedTimes, null); //#if !NET_CORE // httpWebRequest.ServicePoint.ConnectionLimit = int.MaxValue; //#endif return(page); //正常结果在上面已经Return了, 到此处必然是下载失败的值. //throw new SpiderExceptoin("Download failed."); } catch (RedialException) { throw; } catch (Exception e) { Page page = new Page(request, site.ContentType) { Exception = e }; ValidatePage(page, spider); throw; } finally { // 先Close Response, 避免前面语句异常导致没有关闭. try { //ensure the connection is released back to pool //check: //EntityUtils.consume(httpResponse.getEntity()); response?.Dispose(); } catch (Exception e) { spider.Logger.Warn("Close response fail.", e); } } }
//[MethodImpl(MethodImplOptions.Synchronized)] public override Request Poll(ISpider spider) { return(RedialManagerUtils.Execute("rds-poll", () => DoPoll(spider))); }