Beispiel #1
0
        private void GetDataFromWebServer(string runningId, ScraperDataWrapper item, int retryCount = 0)
        {
            try
            {
                //Create response object
                var response = ScrapperMapper.ToResponse(item);

                //Raise on dequeue event
                item.OnDequeue?.Invoke(response);

                switch (item.ScraperType)
                {
                case ScraperType.String:
                    ProcessAsHtml(runningId, item, response, retryCount);
                    return;

                case ScraperType.Binary:
                    ProcessAsBinary(runningId, item, response, retryCount);
                    return;

                default:
                    throw new Exception("ScraperType " + item.ScraperType + " not valid");
                }
            }
            catch (Exception ex)
            {
                ExceptionHandlerOnDownloadData(ex, item, runningId, retryCount);
            }
        }
Beispiel #2
0
        private void ExceptionHandlerOnDownloadData(Exception ex, ScraperDataWrapper item, string runningId, int retryCount)
        {
            var response = ScrapperMapper.ToResponse(item);

            response.Exception = ex;

            if (ex is WebException)
            {
                if (retryCount < MaxRetryCount)
                {
                    Thread.Sleep(2000);
                    GetDataFromWebServer(runningId, item, retryCount + 1);
                }
                else
                {
                    if (item.OnThrownException != null)
                    {
                        item.OnThrownException?.Invoke(response);
                        RemoveItemFromRunningCollection(item, runningId);
                    }
                }
            }
            else
            {
                if (item.OnThrownException != null)
                {
                    item.OnThrownException?.Invoke(response);
                    RemoveItemFromRunningCollection(item, runningId);
                }
            }
        }
Beispiel #3
0
        /// <summary>
        /// Enqueue a response. Start process if was manually stoped
        /// </summary>
        /// <exception cref="ArgumentException">If Url or OnDataArrived is not provider</exception>
        /// <param name="data">Item to scraper</param>
        public void Enqueue(ScraperData data)
        {
            if (string.IsNullOrWhiteSpace(data.Url))
            {
                throw new ArgumentException("URL is required.");
            }

            if (data.OnDataArrived == null)
            {
                throw new ArgumentException("OnDataArrived is required.");
            }

            Uri uri;

            if (!Uri.TryCreate(data.Url, UriKind.RelativeOrAbsolute, out uri))
            {
                throw new ArgumentException("URL '{0}' is invalid", data.Url);
            }

            //gets the domain
            var domain = uri.Authority.ToLower();

            //If enqueue method was called in parallel, with no lock
            //could exists multiple consume threads for the same domain
            //With lock we fix this problem.
            lock (LockerObj)
            {
                //Check if exists a queue from domain
                if (Queues.Any(x => x.Key == domain))
                {
                    var queue = Queues[domain];
                    queue.Enqueue(ScrapperMapper.ToWrapper(data, domain, uri));
                }
                else
                {
                    var queue = new ConcurrentQueue <ScraperDataWrapper>();
                    queue.Enqueue(ScrapperMapper.ToWrapper(data, domain, uri));
                    if (!Queues.TryAdd(domain, queue))
                    {
                        if (!Queues.Any(x => x.Key == domain))
                        {
                            throw new Exception("Unexpected error when try to create a new Queue for domain " + domain);
                        }
                    }

                    //start a new queue process
                    var t = Task.Factory.StartNew(() => ConsumeFromQueue(domain, queue));
                    if (!_queueThreads.TryAdd(domain, t))
                    {
                        if (!_queueThreads.Any(x => x.Key == domain))
                        {
                            throw new Exception("Unexpected error when try to add a task of queue on QueueThreads for domain " + domain);
                        }
                    }
                }
            }
        }
Beispiel #4
0
        private void RemoveItemFromRunningCollection(ScraperData item, string key, int retryCount = 0)
        {
            string dummyValue;
            var    response = ScrapperMapper.ToResponse(item);

            if (!Running.TryRemove(key, out dummyValue))
            {
                if (retryCount < MaxRetryCount)
                {
                    RemoveItemFromRunningCollection(item, key, retryCount + 1);
                }
                else
                {
                    response.Exception = new Exception("The scraper data response cannot be deleted from running collection.");
                    item.OnThrownException?.Invoke(response);
                }
            }
        }