示例#1
0
        private Task DynamicDownloadAsync(dynamic message)
        {
            var requests = message as Request[];            // JsonConvert.DeserializeObject<Request[]>(message);

            if (requests == null)
            {
                var request = message as Request;
                if (request != null)
                {
                    requests = new Request[] { request }
                }
                ;
            }
            if (requests != null && requests.Length > 0)
            {
                // 超时 60 秒的不再下载
                // 下载中心下载请求批量传送,因此反序列化的请求需要按拥有者标号分组。
                // For the same task should be sequential download. TODO: Because it is using multi-threading, is it guaranteed that the order will not be activated at this time?
                var groupings = requests.Where(x => (DateTime.Now - x.CreationTime).TotalSeconds < 60)
                                .GroupBy(x => x.OwnerId).ToDictionary(x => x.Key, y => y.ToList());
                foreach (var grouping in groupings)
                {
                    foreach (var request in grouping.Value)
                    {
                        Task.Factory.StartNew(async() =>
                        {
                            var response = await DownloadAsync(request);
                            if (response != null)
                            {
                                await _dmq.PublishAsync($"{Framework.ResponseHandlerTopic}{grouping.Key}", Framework.DownloadCommand,
                                                        new Response[] { response });
                            }
                        }).ConfigureAwait(false).GetAwaiter();
                    }
                }
            }
            else
            {
                Logger.LogWarning("下载请求数: 0");
            }

#if NETFRAMEWORK
            return(DotnetSpider.Core.Framework.CompletedTask);
#else
            return(Task.CompletedTask);
#endif
        }
示例#2
0
        /// <summary>
        /// Push download request to the downloader queue
        /// </summary>
        /// <param name="ownerId">任务标识</param>
        /// <param name="requests">请求</param>
        /// <returns></returns>
        protected virtual async Task EnqueueRequests(string ownerId, Request[] requests)
        {
            if (requests == null || requests.Length == 0)
            {
                return;
            }

            // If it is not in the cache, it is taken from the database. This scenario only occurs when the download center switches
            AllocatedAgents.AddOrUpdate(ownerId, new Tuple <AllocateDownloaderMessage, string[]>(
                                            JsonConvert.DeserializeObject <AllocateDownloaderMessage>(
                                                await DownloaderAgentStore.GetAllocateDownloaderMessageAsync(ownerId)),
                                            (await DownloaderAgentStore.GetAllocatedListAsync(ownerId)).Select(x => x.AgentId).ToArray()
                                            ), (s, tuple) => tuple);

            var allocateDownloaderMessage = AllocatedAgents[ownerId].Item1;
            var agentIds = AllocatedAgents[ownerId].Item2;

            if (agentIds.Length <= 0)
            {
                Logger.LogError($"任务 {ownerId} 未分配到下载器代理");
            }

            var onlineAgents = new List <DownloaderAgent>();

            // 判断分配的下载器代理是否活跃,因为在下载中心发生切换时需要靠数据库同步数据,则同步了 2 次(15 秒同步一次)都依然没有可用节点则退出
            // Determine whether the assigned downloader agent is active, because the database needs to synchronize data when switching occurs in the download center, then it is synchronized 2 times (15 seconds synchronization) and there are still no nodes available to exit.
            for (var i = 0; i < 35; ++i)
            {
                foreach (var agentId in agentIds)
                {
                    if (Agents.ContainsKey(agentId) &&
                        (DateTime.Now - Agents[agentId].LastModificationTime).TotalSeconds < 12)
                    {
                        onlineAgents.Add(Agents[agentId]);
                    }
                }

                if (onlineAgents.Count == 0)
                {
                    Thread.Sleep(1000);
                }
                else
                {
                    break;
                }
            }

            if (onlineAgents.Count == 0)
            {
                // 直接退出即可。爬虫因为没有分配,触发无回应退出事件。
                Logger.LogError($"任务 {ownerId} 分配的下载器代理都已下线");
                return;
            }

            switch (allocateDownloaderMessage.DownloadPolicy)
            {
            case DownloadPolicy.Random:
            {
                foreach (var request in requests)
                {
                    var agent = onlineAgents.Random();
                    if (Dmq == null)
                    {
                        var json    = JsonConvert.SerializeObject(new[] { request });
                        var message = $"|{Framework.DownloadCommand}|{json}";
                        await Mq.PublishAsync(agent.Id, message);
                    }
                    else
                    {
                        await Dmq.PublishAsync(agent.Id, Framework.DownloadCommand, new[] { request });
                    }
                }

                break;
            }

            case DownloadPolicy.Chained:
            {
                foreach (var request in requests)
                {
                    if (string.IsNullOrWhiteSpace(request.AgentId))
                    {
                        var agent = onlineAgents.Random();
                        if (Dmq == null)
                        {
                            var json    = JsonConvert.SerializeObject(new[] { request });
                            var message = $"|{Framework.DownloadCommand}|{json}";
                            await Mq.PublishAsync(agent.Id, message);
                        }
                        else
                        {
                            await Dmq.PublishAsync(agent.Id, Framework.DownloadCommand, new[] { request });
                        }
                    }
                    else
                    {
                        var agent = onlineAgents.FirstOrDefault(x => x.Id == request.AgentId);
                        if (agent == null)
                        {
                            Logger.LogError($"任务 {ownerId} 分配的下载器代理 {request.AgentId} 已下线");
                        }
                        else
                        {
                            if (Dmq == null)
                            {
                                var json    = JsonConvert.SerializeObject(new[] { request });
                                var message = $"|{Framework.DownloadCommand}|{json}";
                                await Mq.PublishAsync(request.AgentId, message);
                            }
                            else
                            {
                                await Dmq.PublishAsync(agent.Id, Framework.DownloadCommand, new[] { request });
                            }
                        }
                    }
                }

                break;
            }
            }
        }