示例#1
0
        private bool ParseDownloadedPage(DownloaderResult downloaderResult, CrawlerPage page, CrawlerTask crawlerTask)
        {
            page.Data   = downloaderResult.Content.Bytes;
            page.IsHtml = downloaderResult.Content.IsHtmlContent;
            page.Html   = downloaderResult.Content.HtmlText;
            if (!page.IsHtml)
            {
                return(false);
            }
            try
            {
                page.HtmlDoc = new HtmlDocument();
                page.HtmlDoc.LoadHtml(downloaderResult.Content.HtmlText);

                page.Links = _webPageLinkManager.GetAllLinks(crawlerTask.BaseUri, page.HtmlDoc);
            }
            catch (Exception e)
            {
                Log.Warn()
                .Message("Error while process [{0}]", downloaderResult.Uri.AbsoluteUri)
                .Exception(e)
                .Write();

                return(false);
            }

            return(true);
        }
示例#2
0
        private DownloaderResult MakeRequest(Uri uri)
        {
            var             result = new DownloaderResult(uri);
            HttpWebRequest  request;
            HttpWebResponse response = null;

            try
            {
                request  = BuildRequestObject(uri);
                response = (HttpWebResponse)request.GetResponse();
                ProcessResponseObject(response);
            }
            catch (WebException e)
            {
                result.SetException(e);

                if (e.Response != null)
                {
                    response = (HttpWebResponse)e.Response;
                }

                Log.Debug()
                .Message("Error occurred requesting url [{0}]", uri.AbsoluteUri)
                .Exception(e)
                .Write();
            }
            catch (Exception e)
            {
                result.SetException(e);

                Log.Debug()
                .Message("Error occurred requesting url [{0}]", uri.AbsoluteUri)
                .Exception(e)
                .Write();
            }
            finally
            {
                try
                {
                    result.SetResponseParams(response);
                    result.SetResponseData(_contentExtractor.GetContent(response));

                    response?.Close();
                }
                catch (Exception e)
                {
                    result.SetException(e);

                    Log.Info()
                    .Message("Error occurred finalizing requesting url [{0}]", uri.AbsoluteUri)
                    .Exception(e)
                    .Write();
                }
            }
            return(result);
        }
示例#3
0
        internal void CopyFrom(DownloaderResult result)
        {
            Debug.Assert(Uri.Equals(result.Uri));
            Debug.Assert(!_waitCompleteTsc.Task.IsCompleted);

            WebException = result.WebException;
            HasError     = result.HasError;

            Content         = result.Content;
            DownloadTimeout = result.DownloadTimeout;

            result.WaitCompliteTask.ContinueWith(t => _waitCompleteTsc.TrySetResult(t.Result));
        }
示例#4
0
        public DownloaderResult AddToDownloadQueue(string uri)
        {
            var result = new DownloaderResult(uri);

            if (_linkDataStorage.TryGetLinkContent(uri, out var content))
            {
                result.SetResponseData(content);
                return(result);
            }

            _queue.Add(result);
            return(result);
        }