public async Task TestLimitsCancel() { var observer = CreateObserver(); var app = CreateAppDailyLimit(100); var appInfo = AppInfo.Create(app.Name); var secondAppInfo = AppInfo.Create("some app"); var logInfoObservable = Observable.ToObservable(new List <AppChangedArgs>() { new AppChangedArgs(LogInfo.Create(appInfo, "")), new AppChangedArgs(LogInfo.Create(secondAppInfo, "")) }); logInfoObservable = Observable.ToObservable(await logInfoObservable.Buffer(TimeSpan.FromMilliseconds(100))); windowChangedNotifier.Setup(n => n.AppChangedObservable).Returns(logInfoObservable); repository.Setup(d => d.GetFiltered <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >(), It.IsAny <Expression <Func <Aplication, object> > >())) .Returns(new List <Aplication> { app }); repository.Setup(d => d.GetFilteredAsync <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >())).ReturnsAsync(new List <Aplication>() { app }); appDurationCalc.Setup(t => t.GetDuration(app.Name, LimitSpan.Day)).ReturnsAsync(0); observer.Initialize(new Setting() { TrackingEnabled = true }); await Task.Delay(50); limitHandler.Verify(h => h.Handle(It.IsAny <AppLimit>()), Times.Never); }
public async Task TestDelayedLimitHandle() { var observer = CreateObserver(); var app = CreateAppDailyLimit(100); var appInfo = AppInfo.Create(app.Name); var logInfoObservable = Observable.Return(new AppChangedArgs(LogInfo.Create(appInfo, ""))); windowChangedNotifier.Setup(n => n.AppChangedObservable).Returns(logInfoObservable); repository.Setup(d => d.GetFiltered <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >(), It.IsAny <Expression <Func <Aplication, object> > >())) .Returns(new List <Aplication> { app }); repository.Setup(d => d.GetFilteredAsync <Aplication>(It.IsAny <Expression <Func <Aplication, bool> > >())).ReturnsAsync(new List <Aplication>() { app }); appDurationCalc.Setup(t => t.GetDuration(app.Name, LimitSpan.Day)).ReturnsAsync(0); observer.Initialize(new Setting() { TrackingEnabled = true }); limitHandler.Verify(h => h.Handle(app.Limits.First()), Times.Never); await Task.Delay(200); limitHandler.Verify(h => h.Handle(app.Limits.First()), Times.Once); }
public override void Process(ResultItems resultItems) { try { string filePath = $"{BasePath}{PathSeperator}{Spider.Identity}{PathSeperator}{Encrypt.Md5Encrypt(resultItems.Request.Url.ToString())}.fd"; FileInfo file = PrepareFile(filePath); using (StreamWriter printWriter = new StreamWriter(file.OpenWrite(), Encoding.UTF8)) { printWriter.WriteLine("url:\t" + resultItems.Request.Url); foreach (var entry in resultItems.Results) { var value = entry.Value as IList; if (value != null) { IList list = value; printWriter.WriteLine(entry.Key + ":"); foreach (var o in list) { printWriter.WriteLine(o); } } else { printWriter.WriteLine(entry.Key + ":\t" + entry.Value); } } } } catch (Exception e) { Logger.SaveLog(LogInfo.Create("Write file error.", Logger.Name, Spider, LogLevel.Warn, e)); throw; } }
public override string Formate(string value) { try { var name = Path.GetFileName(value); if (name != null) { var fileData = Client.GetByteArrayAsync(value).Result; string file = Path.Combine(SpiderEnviroment.GlobalDirectory, "images", name); if (File.Exists(file)) { return(value); } var stream = BasePipeline.PrepareFile(file).OpenWrite(); foreach (var b in fileData) { stream.WriteByte(b); } stream.Flush(); stream.Dispose(); } return(value); } catch (Exception e) { Logger.SaveLog(LogInfo.Create($"Download file: {value} failed.", Logger.Name, null, LogLevel.Error, e)); throw; } }
protected override dynamic FormateValue(dynamic value) { var name = Path.GetFileName(value); if (name != null) { Task <byte[]> task = Client.GetByteArrayAsync(value); task.ContinueWith(t => { if (t.Exception != null) { Logger.SaveLog(LogInfo.Create($"下载文件: {value} 失败.", Logger.Name, null, LogLevel.Warn, t.Exception)); return; } var fileData = t.Result; string file = Path.Combine(SpiderEnviroment.GlobalDirectory, "images", name); if (!File.Exists(file)) { var stream = BasePipeline.PrepareFile(file).OpenWrite(); foreach (var b in fileData) { stream.WriteByte(b); } stream.Flush(); stream.Dispose(); } }); } return(name); }
public override void InitPipeline(ISpider spider) { if (!IsEnabled) { return; } if (string.IsNullOrEmpty(ConnectString)) { if (UpdateConnectString == null) { throw new SpiderException("Can't find ConnectString or IUpdateConnectString."); } else { for (int i = 0; i < 5; ++i) { try { ConnectString = UpdateConnectString.GetNew(); break; } catch (Exception e) { Logger.SaveLog(LogInfo.Create("Update ConnectString failed.", Logger.Name, spider, LogLevel.Error, e)); Thread.Sleep(1000); } } if (string.IsNullOrEmpty(ConnectString)) { throw new SpiderException("Can't updadate ConnectString via IUpdateConnectString."); } } } base.InitPipeline(spider); if (Mode == PipelineMode.Update) { return; } NetworkCenter.Current.Execute("db-init", () => { using (DbConnection conn = CreateConnection()) { var command = conn.CreateCommand(); command.CommandText = GetCreateSchemaSql(); command.CommandType = CommandType.Text; command.ExecuteNonQuery(); command.CommandText = GetCreateTableSql(); command.CommandType = CommandType.Text; command.ExecuteNonQuery(); conn.Close(); } }); }
private void NotifyAppChanged() { var appInfo = AppInfo.Create(activeWindowHandle); var logInfo = LogInfo.Create(appInfo, activeWindowTitle); var args = new AppChangedArgs(logInfo); subject.OnNext(args); }
public override void InitiEntity(EntityMetadata metadata) { if (metadata.Schema == null) { Logger.SaveLog(LogInfo.Create("Miss pipeline because: Schema is necessary", Logger.Name, Spider, LogLevel.Warn)); IsEnabled = false; } }
public override void InitiEntity(EntityMetadata metadata) { if (metadata.Schema == null) { Logger.SaveLog(LogInfo.Create("Miss pipeline because: Schema is necessary", Logger.Name, Spider, LogLevel.Warn)); IsEnabled = false; return; } Schema = BaseEntityDbPipeline.GenerateSchema(metadata.Schema); MongoClient client = new MongoClient(ConnectString); var db = client.GetDatabase(metadata.Schema.Database); _collection = db.GetCollection <BsonDocument>(metadata.Schema.TableName); }
private void HandleVerifyCollectData() { if (VerifyCollectedData == null) { return; } string key = "locker-validate-" + Identity; try { bool needInitStartRequest = true; if (Redis != null) { while (!Db.LockTake(key, "0", TimeSpan.FromMinutes(10))) { Thread.Sleep(1000); } var lockerValue = Db.HashGet(ValidateStatusName, Identity); needInitStartRequest = lockerValue != "verify finished"; } if (needInitStartRequest) { Logger.SaveLog(LogInfo.Create("开始执行数据验证...", Logger.Name, this, LogLevel.Info)); VerifyCollectedData(); } Logger.SaveLog(LogInfo.Create("数据验证已完成.", Logger.Name, this, LogLevel.Info)); if (needInitStartRequest && Redis != null) { Db.HashSet(ValidateStatusName, Identity, "verify finished"); } } catch (Exception e) { Logger.Error(e, e.Message); throw; } finally { if (Redis != null) { Db.LockRelease(key, 0); } } }
public override void Process(ResultItems resultItems) { try { string path = $"{BasePath}{PathSeperator}{Spider.Identity}{PathSeperator}{Encrypt.Md5Encrypt(resultItems.Request.Url.ToString())}.json"; FileInfo file = PrepareFile(path); using (StreamWriter printWriter = new StreamWriter(file.OpenWrite(), Encoding.UTF8)) { printWriter.WriteLine(JsonConvert.SerializeObject(resultItems.Results)); } } catch (IOException e) { Logger.SaveLog(LogInfo.Create("Write data to json file failed.", Logger.Name, Spider, LogLevel.Warn, e)); throw; } }
protected override Page DowloadContent(Request request, ISpider spider) { Site site = spider.Site; HttpResponseMessage response = null; var proxy = site.GetHttpProxy(); request.PutExtra(Request.Proxy, proxy); try { var httpMessage = GenerateHttpRequestMessage(request, site); response = NetworkCenter.Current.Execute("http", m => { HttpClient httpClient = new HttpClient(new GlobalRedirectHandler(new HttpClientHandler() { AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip, UseCookies = false, UseProxy = true, Proxy = proxy })); var message = (HttpRequestMessage)m; var requestTask = httpClient.SendAsync(message); requestTask.Wait(site.Timeout / 1000); if (requestTask.Status == TaskStatus.RanToCompletion) { return(requestTask.Result); } else { return(new HttpResponseMessage(HttpStatusCode.RequestTimeout)); } }, httpMessage); response.EnsureSuccessStatusCode(); if (!site.AcceptStatCode.Contains(response.StatusCode)) { throw new DownloadException($"下载 {request.Url} 失败. Code: {response.StatusCode}"); } var httpStatusCode = response.StatusCode; request.PutExtra(Request.StatusCode, httpStatusCode); Page page = HandleResponse(request, response, httpStatusCode, site); // need update page.TargetUrl = request.Url.ToString(); //page.SetRawText(File.ReadAllText(@"C:\Users\Lewis\Desktop\taobao.html")); // 这里只要是遇上登录的, 则在拨号成功之后, 全部抛异常在Spider中加入Scheduler调度 // 因此如果使用多线程遇上多个Warning Custom Validate Failed不需要紧张, 可以考虑用自定义Exception分开 // 结束后要置空, 这个值存到Redis会导致无限循环跑单个任务 request.PutExtra(Request.CycleTriedTimes, null); //#if !NET_CORE // httpWebRequest.ServicePoint.ConnectionLimit = int.MaxValue; //#endif return(page); //正常结果在上面已经Return了, 到此处必然是下载失败的值. //throw new SpiderExceptoin("Download failed."); } catch (DownloadException) { throw; } catch (Exception e) { Page page = new Page(request, site.ContentType) { Exception = e }; return(page); } finally { // 先Close Response, 避免前面语句异常导致没有关闭. try { //ensure the connection is released back to pool //check: //EntityUtils.consume(httpResponse.getEntity()); response?.Dispose(); } catch (Exception e) { Logger.SaveLog(LogInfo.Create("Close response fail.", Logger.Name, spider, LogLevel.Warn, e)); } } }
public override void InitiEntity(EntityMetadata metadata) { if (metadata.Schema == null) { Logger.SaveLog(LogInfo.Create("Miss pipeline because: Schema is necessary", Logger.Name, Spider, LogLevel.Warn)); IsEnabled = false; return; } Schema = GenerateSchema(metadata.Schema); foreach (var f in metadata.Entity.Fields) { if (!string.IsNullOrEmpty(((Field)f).DataType)) { Columns.Add((Field)f); } } var primary = metadata.Primary; if (primary != null) { foreach (var p in primary) { var col = Columns.FirstOrDefault(c => c.Name == p); if (col == null) { throw new SpiderException("Columns set as primary is not a property of your entity."); } else { Primary.Add(col); } } } if (Mode == PipelineMode.Update) { if (Primary == null || Primary.Count == 0) { throw new SpiderException("Set Primary in the Indexex attribute."); } if (metadata.Updates != null && metadata.Updates.Count > 0) { foreach (var column in metadata.Updates) { var col = Columns.FirstOrDefault(c => c.Name == column); if (col == null) { throw new SpiderException("Columns set as update is not a property of your entity."); } else { UpdateColumns.Add(col); } } UpdateColumns.RemoveAll(c => Primary.Contains(c)); if (UpdateColumns.Count == 0) { throw new SpiderException("Can't update primary key."); } } else { UpdateColumns = Columns; UpdateColumns.RemoveAll(c => Primary.Contains(c)); if (UpdateColumns.Count == 0) { throw new SpiderException("Can't update primary key."); } } } AutoIncrement = metadata.AutoIncrement; if (metadata.Indexes != null) { foreach (var index in metadata.Indexes) { List <string> tmpIndex = new List <string>(); foreach (var i in index) { var col = Columns.FirstOrDefault(c => c.Name == i); if (col == null) { throw new SpiderException("Columns set as index is not a property of your entity."); } else { tmpIndex.Add(col.Name); } } if (tmpIndex.Count != 0) { Indexs.Add(tmpIndex); } } } if (metadata.Uniques != null) { foreach (var unique in metadata.Uniques) { List <string> tmpUnique = new List <string>(); foreach (var i in unique) { var col = Columns.FirstOrDefault(c => c.Name == i); if (col == null) { throw new SpiderException("Columns set as unique is not a property of your entity."); } else { tmpUnique.Add(col.Name); } } if (tmpUnique.Count != 0) { Uniques.Add(tmpUnique); } } } }
/// <summary> /// Executes this task. /// </summary> /// <param name="parameter"></param> public void Execute(object parameter) { if (CanExecute(new object())) { if (this._taskState.Synchron == (short)(eTaskState.Done)) { this._restoreRequest.Synchron = true; System.Threading.Thread.Sleep(50); } this._invokeRequest.Synchron = true; TcoAppDomain.Current.Logger.Information($"Task '{LogInfo.NameOrSymbol(this)}' invoked. {{@sender}}", LogInfo.Create(this)); RecordTaskAction?.Invoke(this.CodeProvider); } }
private void InitCommands() { this._enabled.Subscribe(ValidateCanExecute); this._isServiceable.Subscribe(ValidateCanExecute); CanExecuteChanged += TcoTask_CanExecuteChanged; Abort = new RelayCommand(AbortTask, x => CanAbortTask(), () => TcoAppDomain.Current.Logger.Information($"Task '{LogInfo.NameOrSymbol(this)}' aborted. {{@sender}}", LogInfo.Create(this))); Restore = new RelayCommand(RestoreTask, x => CanRestoreTask(), () => TcoAppDomain.Current.Logger.Information($"Task '{LogInfo.NameOrSymbol(this)}' restored. {{@sender}}", LogInfo.Create(this))); this._isServiceable.Subscribe(ValidateCanExecuteAbortRestore); }
public override void Run(params string[] arguments) { InitEnvorimentAndVerify(); try { #if !NET_CORE if (CookieInterceptor != null) { Logger.SaveLog(LogInfo.Create("尝试获取 Cookie...", Logger.Name, this, LogLevel.Info)); var cookie = CookieInterceptor.GetCookie(); if (cookie == null) { Logger.SaveLog(LogInfo.Create("获取 Cookie 失败, 爬虫无法继续.", Logger.Name, this, LogLevel.Error)); return; } else { Site.CookiesStringPart = cookie.CookiesStringPart; Site.Cookies = cookie.CookiesDictionary; } } #endif Logger.SaveLog(LogInfo.Create("创建爬虫...", Logger.Name, this, LogLevel.Info)); EntityProcessor processor = new EntityProcessor(this); foreach (var entity in Entities) { processor.AddEntity(entity); } PageProcessor = processor; foreach (var entity in Entities) { string entiyName = entity.Entity.Name; var pipelines = new List <BaseEntityPipeline>(); foreach (var pipeline in EntityPipelines) { var newPipeline = pipeline.Clone(); newPipeline.InitiEntity(entity); if (newPipeline.IsEnabled) { pipelines.Add(newPipeline); } } if (pipelines.Count > 0) { Pipelines.Add(new EntityPipeline(entiyName, pipelines)); } } CheckIfSettingsCorrect(); bool needInitStartRequest = true; string key = "locker-" + Identity; if (Db != null) { while (!Db.LockTake(key, "0", TimeSpan.FromMinutes(10))) { Thread.Sleep(1000); } var lockerValue = Db.HashGet(InitStatusSetName, Identity); needInitStartRequest = lockerValue != "init finished"; } if (arguments.Contains("rerun")) { Scheduler.Init(this); Scheduler.Clear(); //DELETE verify record. Db?.HashDelete(ValidateStatusName, Identity); needInitStartRequest = true; } Logger.SaveLog(LogInfo.Create("构建内部模块、准备爬虫数据...", Logger.Name, this, LogLevel.Info)); InitComponent(); if (needInitStartRequest) { if (PrepareStartUrls != null) { for (int i = 0; i < PrepareStartUrls.Length; ++i) { var prepareStartUrl = PrepareStartUrls[i]; Logger.SaveLog(LogInfo.Create($"[步骤 {i + 2}] 添加链接到调度中心.", Logger.Name, this, LogLevel.Info)); prepareStartUrl.Build(this, null); } } } SpiderMonitor.Register(this); Db?.LockRelease(key, 0); RegisterControl(this); if (!arguments.Contains("running-test")) { base.Run(); } else { IsExited = true; } TaskFinished(); HandleVerifyCollectData(); } finally { Dispose(); SpiderMonitor.Dispose(); } }
private void InitEnvorimentAndVerify() { if (Entities == null || Entities.Count == 0) { Logger.SaveLog(LogInfo.Create("Count of entity is 0.", Logger.Name, this, LogLevel.Error)); throw new SpiderException("Count of entity is 0."); } if (EntityPipelines == null || EntityPipelines.Count == 0) { Logger.SaveLog(LogInfo.Create("Need at least one entity pipeline.", Logger.Name, this, LogLevel.Error)); throw new SpiderException("Need at least one entity pipeline."); } if (RedialExecutor != null) { RedialExecutor.Init(); NetworkCenter.Current.Executor = RedialExecutor; } if (string.IsNullOrEmpty(RedisHost)) { RedisHost = Configuration.GetValue("redisHost"); RedisPassword = Configuration.GetValue("redisPassword"); int port; RedisPort = int.TryParse(Configuration.GetValue("redisPort"), out port) ? port : 6379; } if (!string.IsNullOrEmpty(RedisHost)) { var confiruation = new ConfigurationOptions() { ServiceName = "DotnetSpider", Password = RedisPassword, ConnectTimeout = 65530, KeepAlive = 8, ConnectRetry = 3, ResponseTimeout = 3000 }; #if NET_CORE if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { // Lewis: This is a Workaround for .NET CORE can't use EndPoint to create Socket. var address = Dns.GetHostAddressesAsync(RedisHost).Result.FirstOrDefault(); if (address == null) { Logger.SaveLog(LogInfo.Create($"Can't resovle host: {RedisHost}", Logger.Name, this, LogLevel.Error)); throw new SpiderException($"Can't resovle host: {RedisHost}"); } confiruation.EndPoints.Add(new IPEndPoint(address, RedisPort)); } else { confiruation.EndPoints.Add(new DnsEndPoint(RedisHost, RedisPort)); } #else confiruation.EndPoints.Add(new DnsEndPoint(RedisHost, RedisPort)); #endif Redis = ConnectionMultiplexer.Connect(confiruation); Db = Redis.GetDatabase(1); } }