protected override async Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetItems(); if (items == null || items.Count == 0) { return(DataFlowResult.Success); } foreach (var item in items) { var tableMetadata = (TableMetadata)context[item.Key]; if (!_cache.ContainsKey(tableMetadata.Schema.Database)) { _cache.TryAdd(tableMetadata.Schema.Database, _client.GetDatabase(tableMetadata.Schema.Database)); } var db = _cache[tableMetadata.Schema.Database]; var collection = db.GetCollection <BsonDocument>(tableMetadata.Schema.Table); var bsonDocs = new List <BsonDocument>(); foreach (var data in item.Value) { bsonDocs.Add(BsonDocument.Create(data)); } await collection.InsertManyAsync(bsonDocs); } return(DataFlowResult.Success); }
protected override Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetItems(); Console.WriteLine(JsonConvert.SerializeObject(items)); return(Task.FromResult(DataFlowResult.Success)); }
protected override async Task <DataFlowResult> Store(DataFlowContext context) { //var file = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.html"); var file = Path.Combine(GetDataFolder(context["ProjectName"] ?? context.Response.Request.OwnerId), $"{context.Response.Request.PageIndex}_{context.Response.Request.Hash}.html"); using (var writer = new StreamWriter(File.OpenWrite(file), Encoding.UTF8)) { try { var items = context.GetItems(); //await writer.WriteLineAsync("Page: " + context.Response.Request.PageIndex.ToString()); foreach (var item in items) { await writer.WriteLineAsync(item.Value); //await Writer.WriteLineAsync(items.ToString()); } } finally { //Writer.Close(); //Writer.Dispose(); } } return(DataFlowResult.Success); }
protected override async Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetItems(); var file = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.json"); CreateFile(file); await Writer.WriteLineAsync(JsonConvert.SerializeObject(items)); return(DataFlowResult.Success); }
protected override async Task <DataFlowResult> Store(DataFlowContext context) { //var file = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.html"); var extension = "html"; if (!string.IsNullOrWhiteSpace(_definition.FileFormat)) { extension = Path.GetExtension(_definition.FileFormat).Replace(".", ""); } if (string.IsNullOrWhiteSpace(extension)) { extension = "html"; } var folder = _definition.FileStorage; if (string.IsNullOrWhiteSpace(folder)) { folder = _definition.ProjectName; if (string.IsNullOrWhiteSpace(folder)) { folder = context.Response.Request.OwnerId; } folder = GetDataFolder(folder); } if (!Directory.Exists(folder)) { Directory.CreateDirectory(folder); } var file = Path.Combine(folder, $"{context.Response.Request.PageIndex}_{context.Response.Request.Depth}_{context.Response.Request.Hash}.{extension}"); using (var writer = new StreamWriter(File.OpenWrite(file), Encoding.UTF8)) { try { var items = context.GetItems(); //await writer.WriteLineAsync("Page: " + context.Response.Request.PageIndex.ToString()); foreach (var item in items) { await writer.WriteLineAsync(item.Value); //await Writer.WriteLineAsync(items.ToString()); } } finally { //Writer.Close(); //Writer.Dispose(); } } return(DataFlowResult.Success); }
protected override Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetItems(); foreach (var item in items) { foreach (var data in item.Value) { Console.WriteLine(JsonConvert.SerializeObject(data)); } } return(Task.FromResult(DataFlowResult.Success)); }
private async Task HandleMessage(string message) { if (string.IsNullOrWhiteSpace(message)) { _logger.LogWarning($"任务 {Id} 接收到空消息"); return; } _lastRequestedTime = DateTime.Now; var responses = JsonConvert.DeserializeObject <List <Response> >(message); if (responses.Count == 0) { _logger.LogWarning($"任务 {Id} 接收到空回复"); return; } var agentId = responses.First().AgentId; var successResponses = responses.Where(x => x.Success).ToList(); // 统计下载成功 if (successResponses.Count > 0) { var elapsedMilliseconds = successResponses.Sum(x => x.ElapsedMilliseconds); await _statisticsService.IncrementDownloadSuccessAsync(agentId, successResponses.Count, elapsedMilliseconds); } // 处理下载成功的请求 Parallel.ForEach(successResponses, async response => { _logger.LogInformation($"任务 {Id} 下载 {response.Request.Url} 成功"); var context = new DataFlowContext(_services.CreateScope().ServiceProvider); context.AddResponse(response); try { bool success = true; foreach (var dataFlow in _dataFlows) { var dataFlowResult = await dataFlow.HandleAsync(context); switch (dataFlowResult) { case DataFlowResult.Success: { continue; } case DataFlowResult.Failed: { _logger.LogError($"任务 {Id} 数据流处理器 {dataFlow.GetType().Name} 失败"); success = false; break; } case DataFlowResult.Terminated: { break; } } } var resultItems = context.GetItems(); // 如果解析结果为空,重试 if ((resultItems == null || resultItems.Sum(x => x.Value == null ? 0 : x.Value.Count) == 0) && RetryWhenResultIsEmpty) { response.Request.RetriedTimes++; response.Request.ComputeHash(); // 不需要添加总计 _scheduler.Enqueue(new[] { response.Request.Clone() }); } // 解析的目标请求 var followRequests = context.GetTargetRequests(); if (followRequests != null && followRequests.Count > 0) { var requests = new List <Request>(); foreach (var followRequest in followRequests) { followRequest.Depth = response.Request.Depth + 1; if (followRequest.Depth <= Depth) { requests.Add(followRequest); } } var count = _scheduler.Enqueue(requests); if (count > 0) { await _statisticsService.IncrementTotalAsync(Id, count); } } if (success) { await _statisticsService.IncrementSuccessAsync(Id); } else { await _statisticsService.IncrementFailedAsync(Id); } var result = success ? "成功" : $"失败: {context.Result}"; _logger.LogInformation($"任务 {Id} 处理 {response.Request.Url} {result}"); } catch (Exception e) { _logger.LogInformation($"任务 {Id} 处理 {response.Request.Url} 失败: {e}"); } }); var retryResponses = responses.Where(x => !x.Success && x.Request.RetriedTimes < RetryDownloadTimes) .ToList(); retryResponses.ForEach(x => { x.Request.RetriedTimes++; _logger.LogInformation($"任务 {Id} 下载 {x.Request.Url} 失败: {x.Exception}"); }); var failedRequests = responses.Where(x => !x.Success) .ToList(); // 统计下载失败 if (failedRequests.Count > 0) { await _statisticsService.IncrementFailedAsync(Id); await _statisticsService.IncrementDownloadFailedAsync(agentId, failedRequests.Count); } var retryCount = _scheduler.Enqueue(retryResponses.Select(x => x.Request.Clone())); if (retryCount > 0) { await _statisticsService.IncrementTotalAsync(Id, retryCount); } }
protected override async Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetItems(); if (items == null || items.Count == 0) { return(DataFlowResult.Success); } IDbConnection conn = TryCreateDbConnection(context); using (conn) { foreach (var item in items) { var tableMetadata = (TableMetadata)context[item.Key]; SqlStatements sqlStatements = GetSqlStatements(tableMetadata); lock (this) { EnsureDatabaseAndTableCreated(conn, sqlStatements); } for (int i = 0; i < RetryTimes; ++i) { IDbTransaction transaction = null; try { if (UseTransaction) { transaction = conn.BeginTransaction(); } var list = (List <dynamic>)item.Value; switch (StorageType) { case StorageType.Insert: { await conn.ExecuteAsync(sqlStatements.InsertSql, list); break; } case StorageType.InsertIgnoreDuplicate: { await conn.ExecuteAsync(sqlStatements.InsertIgnoreDuplicateSql, list); break; } case StorageType.Update: { if (string.IsNullOrWhiteSpace(sqlStatements.UpdateSql)) { throw new SpiderException("未能生成更新 SQL"); } else { await conn.ExecuteAsync(sqlStatements.UpdateSql, list); break; } } case StorageType.InsertAndUpdate: { await conn.ExecuteAsync(sqlStatements.InsertAndUpdateSql, list); break; } } transaction?.Commit(); } catch (Exception ex) { Logger?.LogError($"尝试插入数据失败: {ex}"); try { transaction?.Rollback(); } catch (Exception e) { Logger?.LogError($"数据库回滚失败: {e}"); } } finally { transaction?.Dispose(); } } } } return(DataFlowResult.Success); }