private Task DynamicDownloadAsync(dynamic message) { var requests = message as Request[]; // JsonConvert.DeserializeObject<Request[]>(message); if (requests == null) { var request = message as Request; if (request != null) { requests = new Request[] { request } } ; } if (requests != null && requests.Length > 0) { // 超时 60 秒的不再下载 // 下载中心下载请求批量传送,因此反序列化的请求需要按拥有者标号分组。 // For the same task should be sequential download. TODO: Because it is using multi-threading, is it guaranteed that the order will not be activated at this time? var groupings = requests.Where(x => (DateTime.Now - x.CreationTime).TotalSeconds < 60) .GroupBy(x => x.OwnerId).ToDictionary(x => x.Key, y => y.ToList()); foreach (var grouping in groupings) { foreach (var request in grouping.Value) { Task.Factory.StartNew(async() => { var response = await DownloadAsync(request); if (response != null) { await _dmq.PublishAsync($"{Framework.ResponseHandlerTopic}{grouping.Key}", Framework.DownloadCommand, new Response[] { response }); } }).ConfigureAwait(false).GetAwaiter(); } } } else { Logger.LogWarning("下载请求数: 0"); } #if NETFRAMEWORK return(DotnetSpider.Core.Framework.CompletedTask); #else return(Task.CompletedTask); #endif }
/// <summary> /// Push download request to the downloader queue /// </summary> /// <param name="ownerId">任务标识</param> /// <param name="requests">请求</param> /// <returns></returns> protected virtual async Task EnqueueRequests(string ownerId, Request[] requests) { if (requests == null || requests.Length == 0) { return; } // If it is not in the cache, it is taken from the database. This scenario only occurs when the download center switches AllocatedAgents.AddOrUpdate(ownerId, new Tuple <AllocateDownloaderMessage, string[]>( JsonConvert.DeserializeObject <AllocateDownloaderMessage>( await DownloaderAgentStore.GetAllocateDownloaderMessageAsync(ownerId)), (await DownloaderAgentStore.GetAllocatedListAsync(ownerId)).Select(x => x.AgentId).ToArray() ), (s, tuple) => tuple); var allocateDownloaderMessage = AllocatedAgents[ownerId].Item1; var agentIds = AllocatedAgents[ownerId].Item2; if (agentIds.Length <= 0) { Logger.LogError($"任务 {ownerId} 未分配到下载器代理"); } var onlineAgents = new List <DownloaderAgent>(); // 判断分配的下载器代理是否活跃,因为在下载中心发生切换时需要靠数据库同步数据,则同步了 2 次(15 秒同步一次)都依然没有可用节点则退出 // Determine whether the assigned downloader agent is active, because the database needs to synchronize data when switching occurs in the download center, then it is synchronized 2 times (15 seconds synchronization) and there are still no nodes available to exit. for (var i = 0; i < 35; ++i) { foreach (var agentId in agentIds) { if (Agents.ContainsKey(agentId) && (DateTime.Now - Agents[agentId].LastModificationTime).TotalSeconds < 12) { onlineAgents.Add(Agents[agentId]); } } if (onlineAgents.Count == 0) { Thread.Sleep(1000); } else { break; } } if (onlineAgents.Count == 0) { // 直接退出即可。爬虫因为没有分配,触发无回应退出事件。 Logger.LogError($"任务 {ownerId} 分配的下载器代理都已下线"); return; } switch (allocateDownloaderMessage.DownloadPolicy) { case DownloadPolicy.Random: { foreach (var request in requests) { var agent = onlineAgents.Random(); if (Dmq == null) { var json = JsonConvert.SerializeObject(new[] { request }); var message = $"|{Framework.DownloadCommand}|{json}"; await Mq.PublishAsync(agent.Id, message); } else { await Dmq.PublishAsync(agent.Id, Framework.DownloadCommand, new[] { request }); } } break; } case DownloadPolicy.Chained: { foreach (var request in requests) { if (string.IsNullOrWhiteSpace(request.AgentId)) { var agent = onlineAgents.Random(); if (Dmq == null) { var json = JsonConvert.SerializeObject(new[] { request }); var message = $"|{Framework.DownloadCommand}|{json}"; await Mq.PublishAsync(agent.Id, message); } else { await Dmq.PublishAsync(agent.Id, Framework.DownloadCommand, new[] { request }); } } else { var agent = onlineAgents.FirstOrDefault(x => x.Id == request.AgentId); if (agent == null) { Logger.LogError($"任务 {ownerId} 分配的下载器代理 {request.AgentId} 已下线"); } else { if (Dmq == null) { var json = JsonConvert.SerializeObject(new[] { request }); var message = $"|{Framework.DownloadCommand}|{json}"; await Mq.PublishAsync(request.AgentId, message); } else { await Dmq.PublishAsync(agent.Id, Framework.DownloadCommand, new[] { request }); } } } } break; } } }