예제 #1
0
        protected override async Task <DataFlowResult> Store(DataFlowContext context)
        {
            var items = context.GetItems();

            if (items == null || items.Count == 0)
            {
                return(DataFlowResult.Success);
            }

            foreach (var item in items)
            {
                var tableMetadata = (TableMetadata)context[item.Key];

                if (!_cache.ContainsKey(tableMetadata.Schema.Database))
                {
                    _cache.TryAdd(tableMetadata.Schema.Database, _client.GetDatabase(tableMetadata.Schema.Database));
                }

                var db         = _cache[tableMetadata.Schema.Database];
                var collection = db.GetCollection <BsonDocument>(tableMetadata.Schema.Table);

                var bsonDocs = new List <BsonDocument>();
                foreach (var data in item.Value)
                {
                    bsonDocs.Add(BsonDocument.Create(data));
                }

                await collection.InsertManyAsync(bsonDocs);
            }

            return(DataFlowResult.Success);
        }
예제 #2
0
        protected override Task <DataFlowResult> Store(DataFlowContext context)
        {
            var items = context.GetItems();

            Console.WriteLine(JsonConvert.SerializeObject(items));
            return(Task.FromResult(DataFlowResult.Success));
        }
예제 #3
0
        protected override async Task <DataFlowResult> Store(DataFlowContext context)
        {
            //var file = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.html");
            var file = Path.Combine(GetDataFolder(context["ProjectName"] ?? context.Response.Request.OwnerId), $"{context.Response.Request.PageIndex}_{context.Response.Request.Hash}.html");

            using (var writer = new StreamWriter(File.OpenWrite(file), Encoding.UTF8))
            {
                try
                {
                    var items = context.GetItems();
                    //await writer.WriteLineAsync("Page: " + context.Response.Request.PageIndex.ToString());
                    foreach (var item in items)
                    {
                        await writer.WriteLineAsync(item.Value);

                        //await Writer.WriteLineAsync(items.ToString());
                    }
                }
                finally
                {
                    //Writer.Close();
                    //Writer.Dispose();
                }
            }
            return(DataFlowResult.Success);
        }
        protected override async Task <DataFlowResult> Store(DataFlowContext context)
        {
            var items = context.GetItems();
            var file  = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.json");

            CreateFile(file);
            await Writer.WriteLineAsync(JsonConvert.SerializeObject(items));

            return(DataFlowResult.Success);
        }
예제 #5
0
        protected override async Task <DataFlowResult> Store(DataFlowContext context)
        {
            //var file = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.html");
            var extension = "html";

            if (!string.IsNullOrWhiteSpace(_definition.FileFormat))
            {
                extension = Path.GetExtension(_definition.FileFormat).Replace(".", "");
            }
            if (string.IsNullOrWhiteSpace(extension))
            {
                extension = "html";
            }
            var folder = _definition.FileStorage;

            if (string.IsNullOrWhiteSpace(folder))
            {
                folder = _definition.ProjectName;
                if (string.IsNullOrWhiteSpace(folder))
                {
                    folder = context.Response.Request.OwnerId;
                }
                folder = GetDataFolder(folder);
            }
            if (!Directory.Exists(folder))
            {
                Directory.CreateDirectory(folder);
            }
            var file = Path.Combine(folder, $"{context.Response.Request.PageIndex}_{context.Response.Request.Depth}_{context.Response.Request.Hash}.{extension}");

            using (var writer = new StreamWriter(File.OpenWrite(file), Encoding.UTF8))
            {
                try
                {
                    var items = context.GetItems();
                    //await writer.WriteLineAsync("Page: " + context.Response.Request.PageIndex.ToString());
                    foreach (var item in items)
                    {
                        await writer.WriteLineAsync(item.Value);

                        //await Writer.WriteLineAsync(items.ToString());
                    }
                }
                finally
                {
                    //Writer.Close();
                    //Writer.Dispose();
                }
            }
            return(DataFlowResult.Success);
        }
예제 #6
0
        protected override Task <DataFlowResult> Store(DataFlowContext context)
        {
            var items = context.GetItems();

            foreach (var item in items)
            {
                foreach (var data in item.Value)
                {
                    Console.WriteLine(JsonConvert.SerializeObject(data));
                }
            }

            return(Task.FromResult(DataFlowResult.Success));
        }
예제 #7
0
        private async Task HandleMessage(string message)
        {
            if (string.IsNullOrWhiteSpace(message))
            {
                _logger.LogWarning($"任务 {Id} 接收到空消息");
                return;
            }

            _lastRequestedTime = DateTime.Now;
            var responses = JsonConvert.DeserializeObject <List <Response> >(message);

            if (responses.Count == 0)
            {
                _logger.LogWarning($"任务 {Id} 接收到空回复");
                return;
            }

            var agentId = responses.First().AgentId;

            var successResponses = responses.Where(x => x.Success).ToList();

            // 统计下载成功
            if (successResponses.Count > 0)
            {
                var elapsedMilliseconds = successResponses.Sum(x => x.ElapsedMilliseconds);
                await _statisticsService.IncrementDownloadSuccessAsync(agentId, successResponses.Count,
                                                                       elapsedMilliseconds);
            }

            // 处理下载成功的请求
            Parallel.ForEach(successResponses, async response =>
            {
                _logger.LogInformation($"任务 {Id} 下载 {response.Request.Url} 成功");

                var context = new DataFlowContext(_services.CreateScope().ServiceProvider);
                context.AddResponse(response);
                try
                {
                    bool success = true;
                    foreach (var dataFlow in _dataFlows)
                    {
                        var dataFlowResult = await dataFlow.HandleAsync(context);
                        switch (dataFlowResult)
                        {
                        case DataFlowResult.Success:
                            {
                                continue;
                            }

                        case DataFlowResult.Failed:
                            {
                                _logger.LogError($"任务 {Id} 数据流处理器 {dataFlow.GetType().Name} 失败");
                                success = false;
                                break;
                            }

                        case DataFlowResult.Terminated:
                            {
                                break;
                            }
                        }
                    }


                    var resultItems = context.GetItems();
                    // 如果解析结果为空,重试
                    if ((resultItems == null || resultItems.Sum(x => x.Value == null ? 0 : x.Value.Count) == 0) &&
                        RetryWhenResultIsEmpty)
                    {
                        response.Request.RetriedTimes++;
                        response.Request.ComputeHash();
                        // 不需要添加总计
                        _scheduler.Enqueue(new[] { response.Request.Clone() });
                    }

                    // 解析的目标请求
                    var followRequests = context.GetTargetRequests();
                    if (followRequests != null && followRequests.Count > 0)
                    {
                        var requests = new List <Request>();
                        foreach (var followRequest in followRequests)
                        {
                            followRequest.Depth = response.Request.Depth + 1;
                            if (followRequest.Depth <= Depth)
                            {
                                requests.Add(followRequest);
                            }
                        }

                        var count = _scheduler.Enqueue(requests);
                        if (count > 0)
                        {
                            await _statisticsService.IncrementTotalAsync(Id, count);
                        }
                    }

                    if (success)
                    {
                        await _statisticsService.IncrementSuccessAsync(Id);
                    }
                    else
                    {
                        await _statisticsService.IncrementFailedAsync(Id);
                    }

                    var result = success ? "成功" : $"失败: {context.Result}";
                    _logger.LogInformation($"任务 {Id} 处理 {response.Request.Url} {result}");
                }
                catch (Exception e)
                {
                    _logger.LogInformation($"任务 {Id} 处理 {response.Request.Url} 失败: {e}");
                }
            });

            var retryResponses =
                responses.Where(x => !x.Success && x.Request.RetriedTimes < RetryDownloadTimes)
                .ToList();

            retryResponses.ForEach(x =>
            {
                x.Request.RetriedTimes++;
                _logger.LogInformation($"任务 {Id} 下载 {x.Request.Url} 失败: {x.Exception}");
            });

            var failedRequests =
                responses.Where(x => !x.Success)
                .ToList();

            // 统计下载失败
            if (failedRequests.Count > 0)
            {
                await _statisticsService.IncrementFailedAsync(Id);

                await _statisticsService.IncrementDownloadFailedAsync(agentId, failedRequests.Count);
            }

            var retryCount = _scheduler.Enqueue(retryResponses.Select(x => x.Request.Clone()));

            if (retryCount > 0)
            {
                await _statisticsService.IncrementTotalAsync(Id, retryCount);
            }
        }
        protected override async Task <DataFlowResult> Store(DataFlowContext context)
        {
            var items = context.GetItems();

            if (items == null || items.Count == 0)
            {
                return(DataFlowResult.Success);
            }

            IDbConnection conn = TryCreateDbConnection(context);

            using (conn)
            {
                foreach (var item in items)
                {
                    var tableMetadata = (TableMetadata)context[item.Key];

                    SqlStatements sqlStatements = GetSqlStatements(tableMetadata);

                    lock (this)
                    {
                        EnsureDatabaseAndTableCreated(conn, sqlStatements);
                    }

                    for (int i = 0; i < RetryTimes; ++i)
                    {
                        IDbTransaction transaction = null;
                        try
                        {
                            if (UseTransaction)
                            {
                                transaction = conn.BeginTransaction();
                            }

                            var list = (List <dynamic>)item.Value;
                            switch (StorageType)
                            {
                            case StorageType.Insert:
                            {
                                await conn.ExecuteAsync(sqlStatements.InsertSql, list);

                                break;
                            }

                            case StorageType.InsertIgnoreDuplicate:
                            {
                                await conn.ExecuteAsync(sqlStatements.InsertIgnoreDuplicateSql, list);

                                break;
                            }

                            case StorageType.Update:
                            {
                                if (string.IsNullOrWhiteSpace(sqlStatements.UpdateSql))
                                {
                                    throw new SpiderException("未能生成更新 SQL");
                                }
                                else
                                {
                                    await conn.ExecuteAsync(sqlStatements.UpdateSql, list);

                                    break;
                                }
                            }

                            case StorageType.InsertAndUpdate:
                            {
                                await conn.ExecuteAsync(sqlStatements.InsertAndUpdateSql, list);

                                break;
                            }
                            }


                            transaction?.Commit();
                        }
                        catch (Exception ex)
                        {
                            Logger?.LogError($"尝试插入数据失败: {ex}");
                            try
                            {
                                transaction?.Rollback();
                            }
                            catch (Exception e)
                            {
                                Logger?.LogError($"数据库回滚失败: {e}");
                            }
                        }
                        finally
                        {
                            transaction?.Dispose();
                        }
                    }
                }
            }

            return(DataFlowResult.Success);
        }