コード例 #1
0
ファイル: UriParser.cs プロジェクト: ChrisCarrAu/webcrawler
        public async Task Crawl(Anchor anchor)
        {
            try
            {
                _logger.LogInformation($">>> Start Crawling {anchor.Uri.ToString()}");
                var data = await _webClient.DownloadDataTaskAsync(anchor.Uri);

                anchor.Headers = _webClient.ResponseHeaders;

                if (anchor.Headers["content-type"].StartsWith(@"text/", StringComparison.Ordinal))
                {
                    var contents = await _webClient.DownloadStringTaskAsync(anchor.Uri);

                    var htmlDocument = new HtmlDocument();
                    htmlDocument.LoadHtml(contents);

                    foreach (var _anchor in htmlDocument.AnchorReferences)
                    {
                        var node = new Anchor
                        {
                            Uri    = new Uri(anchor.Uri, _anchor),
                            Parent = anchor
                        };

                        _observers.ForEach(observer => observer.OnNext(node));
                    }
                }
            }
            catch (WebException e)
            {
                _logger.LogError($"{anchor.Uri.ToString()}");
                _logger.LogError($"{e.ToString()}");
                _observers.ForEach(observer => observer.OnError(e));
                anchor.Exception = e;
            }
            catch (ArgumentException arge)
            {
                _logger.LogError($"{anchor.Uri.ToString()}");
                _logger.LogError($"{arge.ToString()}");
                _observers.ForEach(observer => observer.OnError(arge));
                anchor.Exception = arge;
            }
            finally
            {
                _logger.LogInformation($">>> Crawl Complete {anchor.Uri.ToString()}");
                _observers.ForEach(observer => observer.OnCompleted());
            }
        }