Esempio n. 1
0
        public override void Complete()
        {
            try
            {
                base.Complete();
                _cancellationTokenSource.Cancel();

                base.Completion.Wait();
                HtmlRenderers.Complete();
                FailedProcessingResults.Complete();
            }
            catch (Exception exception)
            {
                if (exception.IsAcknowledgingOperationCancelledException(_cancellationTokenSource.Token))
                {
                    return;
                }
                _log.Error($"One or more errors occurred while completing {nameof(HtmlRendererBlock)}.", exception);
            }
        }
Esempio n. 2
0
        protected override Resource Transform(Resource resource)
        {
            try
            {
                if (resource == null)
                {
                    throw new ArgumentNullException(nameof(resource));
                }

                if (!_resourceVerifier.TryVerify(resource, _cancellationToken, out var verificationResult))
                {
                    SendOutFailedProcessingResult();
                    _log.Info($"Failed to be verified {nameof(Resource)} was discarded: {JsonConvert.SerializeObject(resource)}.");
                    return(null);
                }

                var isOrphanedUri = verificationResult.StatusCode == StatusCode.OrphanedUri;
                if (isOrphanedUri)
                {
                    SendOutFailedProcessingResult();
                    _log.Info($"{nameof(Resource)} with orphaned URL was discarded: {JsonConvert.SerializeObject(resource)}.");
                    return(null);
                }

                var uriSchemeNotSupported = verificationResult.StatusCode == StatusCode.UriSchemeNotSupported;
                if (uriSchemeNotSupported)
                {
                    SendOutFailedProcessingResult();
                    _log.Info($"{nameof(Resource)} with unsupported scheme was discarded: {JsonConvert.SerializeObject(resource)}.");
                    return(null);
                }

                DoStatistics();
                SendOutVerificationResult();
                SendOutResourceVerifiedEvent();

                return(resource);

                void DoStatistics()
                {
                    if (resource.StatusCode.IsWithinBrokenRange())
                    {
                        _statistics.IncrementBrokenUrlCount();
                    }
                    else
                    {
                        _statistics.IncrementValidUrlCount();
                    }
                }
                void SendOutVerificationResult()
                {
                    if (!VerificationResults.Post(verificationResult))
                    {
                        _log.Error($"Failed to post data to buffer block named [{nameof(VerificationResults)}].");
                    }
                }
                void SendOutResourceVerifiedEvent()
                {
                    var resourceVerifiedEvent = new Event
                    {
                        EventType = EventType.ResourceVerified,
                        Message   = $"{verificationResult.StatusCode:D} - {verificationResult.VerifiedUrl}"
                    };

                    if (!Events.Post(resourceVerifiedEvent))
                    {
                        _log.Error($"Failed to post data to buffer block named [{nameof(Events)}].");
                    }
                }
            }
            catch (Exception exception)
            {
                SendOutFailedProcessingResult();
                _log.Error($"One or more errors occurred while verifying: {JsonConvert.SerializeObject(resource)}.", exception);
                return(null);
            }

            void SendOutFailedProcessingResult()
            {
                if (!FailedProcessingResults.Post(new FailedProcessingResult {
                    ProcessedResource = resource
                }))
                {
                    _log.Error($"Failed to post data to buffer block named [{nameof(FailedProcessingResults)}].");
                }
            }
        }
Esempio n. 3
0
        protected override RenderingResult Transform(Tuple <IHtmlRenderer, Resource> htmlRendererAndResource)
        {
            var(htmlRenderer, resource) = htmlRendererAndResource ?? throw new ArgumentNullException(nameof(htmlRendererAndResource));
            try
            {
                if (!resource.StatusCode.IsWithinBrokenRange())
                {
                    var resourceSizeInMb = resource.Size / 1024f / 1024f;
                    if (resourceSizeInMb > Configurations.RenderableResourceSizeInMb)
                    {
                        return(FailedProcessingResult(
                                   $"Resource was not queued for rendering because it was too big ({resourceSizeInMb} MB): {resource.ToJson()}",
                                   LogLevel.Debug
                                   ));
                    }

                    var resourceTypeIsNotRenderable = !(ResourceType.Html | ResourceType.Unknown).HasFlag(resource.ResourceType);
                    if (!resource.IsInternal || !resource.IsExtractedFromHtmlDocument || resourceTypeIsNotRenderable)
                    {
                        return(FailedProcessingResult(null, LogLevel.None));
                    }
                }

                var renderingFailed = !htmlRenderer.TryRender(
                    resource,
                    out var htmlText,
                    out var millisecondsPageLoadTime,
                    out var capturedResources,
                    _cancellationTokenSource.Token
                    );

                if (renderingFailed)
                {
                    return(FailedProcessingResult(
                               $"Failed to render {nameof(Resource)}: {resource.ToJson()}",
                               LogLevel.Information
                               ));
                }

                if (resource.StatusCode.IsWithinBrokenRange())
                {
                    return(FailedProcessingResult(null, LogLevel.None));
                }

                return(new RenderingResult
                {
                    RenderedResource = resource,
                    CapturedResources = capturedResources,
                    MillisecondsPageLoadTime = millisecondsPageLoadTime,
                    HtmlDocument = new HtmlDocument {
                        Uri = resource.Uri, HtmlText = htmlText
                    }
                });
            }
            catch (Exception exception) when(!exception.IsAcknowledgingOperationCancelledException(_cancellationTokenSource.Token))
            {
                return(FailedProcessingResult(
                           $"One or more errors occurred while rendering: {resource.ToJson()}.",
                           LogLevel.Error,
                           exception
                           ));
            }
            finally
            {
                if (!HtmlRenderers.Post(htmlRenderer) && !HtmlRenderers.Completion.IsCompleted)
                {
                    _log.Error($"Failed to post data to buffer block named [{nameof(HtmlRenderers)}].");
                }
            }

            #region Local Functions

            RenderingResult FailedProcessingResult(string logMessage, LogLevel logLevel, Exception exception = null)
            {
                var failedProcessingResult = new FailedProcessingResult {
                    ProcessedResource = resource
                };

                if (!FailedProcessingResults.Post(failedProcessingResult) && !FailedProcessingResults.Completion.IsCompleted)
                {
                    _log.Error($"Failed to post data to buffer block named [{nameof(FailedProcessingResults)}].");
                }

                switch (logLevel)
                {
                case LogLevel.None:
                    break;

                case LogLevel.Trace:
                case LogLevel.Debug:
                    _log.Debug(logMessage, exception);
                    break;

                case LogLevel.Information:
                    _log.Info(logMessage, exception);
                    break;

                case LogLevel.Warning:
                    _log.Warn(logMessage, exception);
                    break;

                case LogLevel.Error:
                    _log.Error(logMessage, exception);
                    break;

                case LogLevel.Critical:
                    _log.Fatal(logMessage, exception);
                    break;

                default:
                    throw new ArgumentOutOfRangeException(nameof(logLevel), logLevel, null);
                }

                return(null);
            }

            #endregion
        }
Esempio n. 4
0
        protected override RenderingResult Transform(Resource resource)
        {
            IHtmlRenderer htmlRenderer      = null;
            var           capturedResources = new List <Resource>();

            try
            {
                if (resource == null)
                {
                    throw new ArgumentNullException(nameof(resource));
                }

                htmlRenderer = _htmlRenderers.Take(_cancellationToken);
                htmlRenderer.OnResourceCaptured += CaptureResource;

                var oldStatusCode   = resource.StatusCode;
                var renderingFailed = !htmlRenderer.TryRender(
                    resource,
                    out var htmlText,
                    out var millisecondsPageLoadTime,
                    _cancellationToken
                    );

                if (renderingFailed)
                {
                    SendOutFailedProcessingResult();
                    _log.Info($"Failed to render {nameof(Resource)} was discarded: {JsonConvert.SerializeObject(resource)}");
                    return(null);
                }

                UpdateStatusCodeIfChanged();
                DoStatisticsIfHasPageLoadTime();

                if (!resource.StatusCode.IsWithinBrokenRange())
                {
                    return new RenderingResult
                           {
                               RenderedResource  = resource,
                               CapturedResources = capturedResources,
                               HtmlDocument      = new HtmlDocument {
                                   Uri = resource.Uri, HtmlText = htmlText
                               }
                           }
                }
                ;

                SendOutFailedProcessingResult();
                _log.Info($"Broken {nameof(Resource)} was discarded: {JsonConvert.SerializeObject(resource)}");
                return(null);

                void UpdateStatusCodeIfChanged()
                {
                    var newStatusCode = resource.StatusCode;

                    if (oldStatusCode == newStatusCode)
                    {
                        return;
                    }
                    if (!VerificationResults.Post(resource.ToVerificationResult()))
                    {
                        _log.Error($"Failed to post data to buffer block named [{nameof(VerificationResults)}].");
                    }
                }
                void DoStatisticsIfHasPageLoadTime()
                {
                    if (!millisecondsPageLoadTime.HasValue)
                    {
                        return;
                    }
                    _statistics.IncrementSuccessfullyRenderedPageCount();
                    _statistics.IncrementTotalPageLoadTimeBy(millisecondsPageLoadTime.Value);
                }
            }
            catch (Exception exception) when(!exception.IsAcknowledgingOperationCancelledException(_cancellationToken))
            {
                SendOutFailedProcessingResult();
                _log.Error($"One or more errors occurred while rendering: {JsonConvert.SerializeObject(resource)}.", exception);
                return(null);
            }
            finally
            {
                if (htmlRenderer != null)
                {
                    htmlRenderer.OnResourceCaptured -= CaptureResource;
                    _htmlRenderers.Add(htmlRenderer, CancellationToken.None);
                }
            }

            void CaptureResource(Resource capturedResource)
            {
                capturedResources.Add(capturedResource);
            }

            void SendOutFailedProcessingResult()
            {
                if (!FailedProcessingResults.Post(new FailedProcessingResult {
                    ProcessedResource = resource
                }))
                {
                    _log.Error($"Failed to post data to buffer block named [{nameof(FailedProcessingResults)}].");
                }
            }
        }