Beispiel #1
0
 public async Task ConnectAsync(CancellationToken cancellationToken = default)
 {
     if (_socket.State == WebSocketState.Open)
     {
         return;
     }
     _logger?.LogInfo("Websocket Connected");
     await _socket.ConnectAsync(_socketUrl, cancellationToken);
 }
Beispiel #2
0
        public void ProcessMessage(string message)
        {
            if (message is null || message.Length < 3 || message[0] != '!')
            {
                message ??= "<null>";
                if (message.StartsWith("Error", StringComparison.InvariantCultureIgnoreCase))
                {
                    _logger?.LogError(message);
                }
                else if (message.StartsWith("Warn", StringComparison.InvariantCultureIgnoreCase))
                {
                    _logger?.LogWarn(message);
                }
                else
                {
                    _logger?.LogInfo(message);
                }
                return;
            }
            message = message.TrimStart('!');
            try {
                var responseBase = Deserialize <CrawlerResponseBase>(message);
                switch (responseBase.Type)
                {
                case "eot": OnEot?.Invoke(); break;

                case "status": OnStatus?.Invoke(Deserialize <CrawlerResponseStatus>(message)); break;

                case "edges": OnEdges?.Invoke(Deserialize <CrawlerResponseEdges>(message)); break;

                case "node": OnNode?.Invoke(Deserialize <CrawlerResponseNode>(message)); break;

                default: _logger?.LogWarn($"Unknown Message Type `{responseBase.Type ?? "<null>"}`"); break;
                }
            }
            catch (JsonException ex) { _logger?.LogWarn($"invalid json: {message}", ex); }
        }
Beispiel #3
0
        FetchUrlsAsync(IEnumerable <string> urls, string urlFilter, bool screenShots, int maxRetries = 10,
                       CancellationToken cancellationToken = default)
        {
            var config = new CrawlerConfig {
                TakeScreenShots     = screenShots,
                FollowInternalLinks = false,
                UrlFilter           = urlFilter
            };

            foreach (var url in urls)
            {
                config.RequestQueue.Add(url);
            }

            config.MaxRequestsPerCrawl = config.RequestQueue.Count * 110; // count + 10%

            var edges = new List <CrawlerResponseEdge>();
            var nodes = new List <CrawlerResponseNode>();
            var eot   = false;

            void OnEotAction()
            {
                eot = true;
            }

            void OnEdgesAction(CrawlerResponseEdges newEdges)
            {
                edges.AddRange(edges);
            }

            void OnNodeAction(CrawlerResponseNode newNode)
            {
                nodes.Add(newNode);
            }

            void OnStatusAction(CrawlerResponseStatus crawlerStatus = null)
            {
                var statusMsg = $"Crawler3Client Status: {nodes.Count}/{config.RequestQueue.Count} Nodes";

                _logger?.LogInfo(statusMsg);
            }

            while (!cancellationToken.IsCancellationRequested && !eot)
            {
                try {
                    using var socket = _websocketClientFactory();
                    try {
                        edges.Clear();
                        nodes.Clear();
                        socket.OnNode   += OnNodeAction;
                        socket.OnEot    += OnEotAction;
                        socket.OnEdges  += OnEdgesAction;
                        socket.OnStatus += OnStatusAction;
                        await socket.SendAsync(config, cancellationToken);

                        var exception = await socket.ReceiveAllAsync(cancellationToken : cancellationToken);

                        if (exception != null && !eot)
                        {
                            throw exception;
                        }
                    }
                    finally {
                        socket.OnNode   -= OnNodeAction;
                        socket.OnEot    -= OnEotAction;
                        socket.OnEdges  -= OnEdgesAction;
                        socket.OnStatus -= OnStatusAction;
                    }
                }
                catch (Exception ex) {
                    maxRetries--;
                    if (maxRetries <= 0)
                    {
                        throw;
                    }
                    _logger.LogInfo("Retrying", ex);
                }
            }

            return(edges, nodes);
        }