Esempio n. 1
0
        public async Task SendAsync(CrawlerConfig config, CancellationToken cancellationToken = default)
        {
            var message = _jsonProcessor.Serialize(config);

            await SendAsync(message, cancellationToken);
        }
Esempio n. 2
0
        FetchUrlsAsync(IEnumerable <string> urls, string urlFilter, bool screenShots, int maxRetries = 10,
                       CancellationToken cancellationToken = default)
        {
            var config = new CrawlerConfig {
                TakeScreenShots     = screenShots,
                FollowInternalLinks = false,
                UrlFilter           = urlFilter
            };

            foreach (var url in urls)
            {
                config.RequestQueue.Add(url);
            }

            config.MaxRequestsPerCrawl = config.RequestQueue.Count * 110; // count + 10%

            var edges = new List <CrawlerResponseEdge>();
            var nodes = new List <CrawlerResponseNode>();
            var eot   = false;

            void OnEotAction()
            {
                eot = true;
            }

            void OnEdgesAction(CrawlerResponseEdges newEdges)
            {
                edges.AddRange(edges);
            }

            void OnNodeAction(CrawlerResponseNode newNode)
            {
                nodes.Add(newNode);
            }

            void OnStatusAction(CrawlerResponseStatus crawlerStatus = null)
            {
                var statusMsg = $"Crawler3Client Status: {nodes.Count}/{config.RequestQueue.Count} Nodes";

                _logger?.LogInfo(statusMsg);
            }

            while (!cancellationToken.IsCancellationRequested && !eot)
            {
                try {
                    using var socket = _websocketClientFactory();
                    try {
                        edges.Clear();
                        nodes.Clear();
                        socket.OnNode   += OnNodeAction;
                        socket.OnEot    += OnEotAction;
                        socket.OnEdges  += OnEdgesAction;
                        socket.OnStatus += OnStatusAction;
                        await socket.SendAsync(config, cancellationToken);

                        var exception = await socket.ReceiveAllAsync(cancellationToken : cancellationToken);

                        if (exception != null && !eot)
                        {
                            throw exception;
                        }
                    }
                    finally {
                        socket.OnNode   -= OnNodeAction;
                        socket.OnEot    -= OnEotAction;
                        socket.OnEdges  -= OnEdgesAction;
                        socket.OnStatus -= OnStatusAction;
                    }
                }
                catch (Exception ex) {
                    maxRetries--;
                    if (maxRetries <= 0)
                    {
                        throw;
                    }
                    _logger.LogInfo("Retrying", ex);
                }
            }

            return(edges, nodes);
        }
Esempio n. 3
0
 public void Send(CrawlerConfig config, CancellationToken cancellationToken = default) => SendAsync(config, cancellationToken).GetAwaiter().GetResult();