Beispiel #1
0
        private async Task ProcessAllWaitingWebsites()
        {
            var allTasks = new List <Task>();

            var websites = _waitingWebsites.ToList();

            foreach (var waitingWebsite in websites)
            {
                _emptyRunsCounter = 0;
                await SemaphoreDownload.WaitAsync();

                waitingWebsite.Crawled          = true;
                waitingWebsite.CrawledTimestamp = DateTime.Now;
                await _context.SaveChangesAsync();

                allTasks.Add(
                    Task.Run(async() =>
                {
                    try
                    {
                        _logger.LogInformation($"Processing webpage: {waitingWebsite.Url}");

                        _downloadStrategy.Download(waitingWebsite);

                        await _entityManager.Persist(waitingWebsite);
                        await _entityManager.Flush();
                    }
                    catch (Exception e)
                    {
                        _logger.LogError(e.Message);
                    }
                    finally
                    {
                        SemaphoreDownload.Release();
                    }
                })
                    );
            }

            try
            {
                await Task.WhenAll(allTasks.ToArray());
            }
            catch (Exception e)
            {
                _logger.LogError(e.Message);
            }
        }
Beispiel #2
0
        public void Process(string sources)
        {
            //TODO : Move this logic to separate windows service and make the service listen on Message Queue like Rabbitmq

            logger.AddInformationLog($"sources value: {sources}");

            //Data validations
            if (string.IsNullOrWhiteSpace(sources))
            {
                throw new ArgumentException($"The sources Is null or empty string.");
            }

            string delimiter = ConfigurationManager.AppSettings["Delimiter"];

            logger.AddInformationLog($"Delimiter config value: {delimiter}");

            string sourcesParsingDelimiter = string.IsNullOrWhiteSpace(delimiter) ? "," : delimiter;

            logger.AddInformationLog($"Sources parsing delimiter: {sourcesParsingDelimiter}");

            string localPath = ConfigurationManager.AppSettings["LocalDirectory"];

            logger.AddInformationLog($"LocalDirectory config value: {localPath}");

            List <string> soursesUrls = parser.Parse(sources, sourcesParsingDelimiter).ToList();

            logger.AddInformationLog($"sourses Urls counts after parsing: {soursesUrls?.Count}");

            ConcurrentQueue <Exception> exceptions = new ConcurrentQueue <Exception>();

            using (repoDownloadedFile)
            {
                Parallel.ForEach(soursesUrls, url =>
                {
                    try
                    {
                        IDownloadStrategy downloadStrategy = downloadStrategyFactory.Build(url);

                        string physicalPath = downloadStrategy.Download(url, localPath);
                        string virtualPath  = $"{localPath.Replace('\\', '/')}/{Path.GetFileName(physicalPath)}";
                        logger.AddInformationLog($"Local path is: {localPath} for URL: {url} ");

                        repoDownloadedFile.Add(new DomainModels.DownloadedFile()
                        {
                            FileRemotePath   = url,
                            LocalPath        = virtualPath,
                            ProcessingStatus = (int)DownloadStatutes.ReadyForProcessing,
                        });

                        repoDownloadedFile.SaveChanges();

                        logger.AddInformationLog($"Source :{url} saved to the database with status ready for processing");
                    }
                    catch (Exception ex)
                    {
                        exceptions.Enqueue(ex);
                        logger.AddErrorLog(ex);
                    }
                });

                if (exceptions.Any())
                {
                    throw new AggregateException(exceptions);
                }
            }
        }
Beispiel #3
0
 public (IStatusCode, string) Download(IDownloadStrategy strategy, IUrl toDownload)
 {
     (IStatusCode, string)result = strategy.Download(toDownload);
     return(result.Item1, result.Item2);
 }