Exemplo n.º 1
0
        private async Task <Tuple <Request, Response, Item> > NextRequestFromScheduler()
        {
            var request = this._scheduler.Pop();

            if (request == null)
            {
                return(null);
            }

            try
            {
                this._actives.Add(request);
                var response = await this._downloader.AsyncFetch(request, _spider);

                if (response == null)
                {
                    return(null);
                }

                var spiderResponse = _spider.Extract(response);
                var tasks          = new List <Task>();
                if (spiderResponse.NewRequests != null)
                {
                    tasks.Add(Task.Run(() =>
                    {
                        foreach (var newRequest in spiderResponse.NewRequests)
                        {
                            newRequest.Deep = request.Deep + 1;
                            this._scheduler.Push(newRequest);
                        }
                    }));
                }

                if (this._piplelines != null && this._piplelines.Length > 0 && spiderResponse.Item != null)
                {
                    tasks.Add(Task.Run(async() =>
                    {
                        foreach (var pipleline in this._piplelines)
                        {
                            await pipleline.Process(spiderResponse.Item, this._spider);
                        }
                    }));
                }

                if (tasks.Count > 0)
                {
                    await Task.WhenAll(tasks);
                }

                return(Tuple.Create(request, response, spiderResponse.Item));
            }
            catch (Exception ex)
            {
                _logger.Error(new EngineException(ex.Message + ("\n URL:" + request.Url), ex));
                this._uncompleted.Add(request);
                return(null);
            }
            finally
            {
                this._actives.TryTake(out request);
            }
        }