public override void Complete() { try { base.Complete(); _cancellationTokenSource.Cancel(); base.Completion.Wait(); HtmlRenderers.Complete(); FailedProcessingResults.Complete(); } catch (Exception exception) { if (exception.IsAcknowledgingOperationCancelledException(_cancellationTokenSource.Token)) { return; } _log.Error($"One or more errors occurred while completing {nameof(HtmlRendererBlock)}.", exception); } }
protected override Resource Transform(Resource resource) { try { if (resource == null) { throw new ArgumentNullException(nameof(resource)); } if (!_resourceVerifier.TryVerify(resource, _cancellationToken, out var verificationResult)) { SendOutFailedProcessingResult(); _log.Info($"Failed to be verified {nameof(Resource)} was discarded: {JsonConvert.SerializeObject(resource)}."); return(null); } var isOrphanedUri = verificationResult.StatusCode == StatusCode.OrphanedUri; if (isOrphanedUri) { SendOutFailedProcessingResult(); _log.Info($"{nameof(Resource)} with orphaned URL was discarded: {JsonConvert.SerializeObject(resource)}."); return(null); } var uriSchemeNotSupported = verificationResult.StatusCode == StatusCode.UriSchemeNotSupported; if (uriSchemeNotSupported) { SendOutFailedProcessingResult(); _log.Info($"{nameof(Resource)} with unsupported scheme was discarded: {JsonConvert.SerializeObject(resource)}."); return(null); } DoStatistics(); SendOutVerificationResult(); SendOutResourceVerifiedEvent(); return(resource); void DoStatistics() { if (resource.StatusCode.IsWithinBrokenRange()) { _statistics.IncrementBrokenUrlCount(); } else { _statistics.IncrementValidUrlCount(); } } void SendOutVerificationResult() { if (!VerificationResults.Post(verificationResult)) { _log.Error($"Failed to post data to buffer block named [{nameof(VerificationResults)}]."); } } void SendOutResourceVerifiedEvent() { var resourceVerifiedEvent = new Event { EventType = EventType.ResourceVerified, Message = $"{verificationResult.StatusCode:D} - {verificationResult.VerifiedUrl}" }; if (!Events.Post(resourceVerifiedEvent)) { _log.Error($"Failed to post data to buffer block named [{nameof(Events)}]."); } } } catch (Exception exception) { SendOutFailedProcessingResult(); _log.Error($"One or more errors occurred while verifying: {JsonConvert.SerializeObject(resource)}.", exception); return(null); } void SendOutFailedProcessingResult() { if (!FailedProcessingResults.Post(new FailedProcessingResult { ProcessedResource = resource })) { _log.Error($"Failed to post data to buffer block named [{nameof(FailedProcessingResults)}]."); } } }
protected override RenderingResult Transform(Tuple <IHtmlRenderer, Resource> htmlRendererAndResource) { var(htmlRenderer, resource) = htmlRendererAndResource ?? throw new ArgumentNullException(nameof(htmlRendererAndResource)); try { if (!resource.StatusCode.IsWithinBrokenRange()) { var resourceSizeInMb = resource.Size / 1024f / 1024f; if (resourceSizeInMb > Configurations.RenderableResourceSizeInMb) { return(FailedProcessingResult( $"Resource was not queued for rendering because it was too big ({resourceSizeInMb} MB): {resource.ToJson()}", LogLevel.Debug )); } var resourceTypeIsNotRenderable = !(ResourceType.Html | ResourceType.Unknown).HasFlag(resource.ResourceType); if (!resource.IsInternal || !resource.IsExtractedFromHtmlDocument || resourceTypeIsNotRenderable) { return(FailedProcessingResult(null, LogLevel.None)); } } var renderingFailed = !htmlRenderer.TryRender( resource, out var htmlText, out var millisecondsPageLoadTime, out var capturedResources, _cancellationTokenSource.Token ); if (renderingFailed) { return(FailedProcessingResult( $"Failed to render {nameof(Resource)}: {resource.ToJson()}", LogLevel.Information )); } if (resource.StatusCode.IsWithinBrokenRange()) { return(FailedProcessingResult(null, LogLevel.None)); } return(new RenderingResult { RenderedResource = resource, CapturedResources = capturedResources, MillisecondsPageLoadTime = millisecondsPageLoadTime, HtmlDocument = new HtmlDocument { Uri = resource.Uri, HtmlText = htmlText } }); } catch (Exception exception) when(!exception.IsAcknowledgingOperationCancelledException(_cancellationTokenSource.Token)) { return(FailedProcessingResult( $"One or more errors occurred while rendering: {resource.ToJson()}.", LogLevel.Error, exception )); } finally { if (!HtmlRenderers.Post(htmlRenderer) && !HtmlRenderers.Completion.IsCompleted) { _log.Error($"Failed to post data to buffer block named [{nameof(HtmlRenderers)}]."); } } #region Local Functions RenderingResult FailedProcessingResult(string logMessage, LogLevel logLevel, Exception exception = null) { var failedProcessingResult = new FailedProcessingResult { ProcessedResource = resource }; if (!FailedProcessingResults.Post(failedProcessingResult) && !FailedProcessingResults.Completion.IsCompleted) { _log.Error($"Failed to post data to buffer block named [{nameof(FailedProcessingResults)}]."); } switch (logLevel) { case LogLevel.None: break; case LogLevel.Trace: case LogLevel.Debug: _log.Debug(logMessage, exception); break; case LogLevel.Information: _log.Info(logMessage, exception); break; case LogLevel.Warning: _log.Warn(logMessage, exception); break; case LogLevel.Error: _log.Error(logMessage, exception); break; case LogLevel.Critical: _log.Fatal(logMessage, exception); break; default: throw new ArgumentOutOfRangeException(nameof(logLevel), logLevel, null); } return(null); } #endregion }
protected override RenderingResult Transform(Resource resource) { IHtmlRenderer htmlRenderer = null; var capturedResources = new List <Resource>(); try { if (resource == null) { throw new ArgumentNullException(nameof(resource)); } htmlRenderer = _htmlRenderers.Take(_cancellationToken); htmlRenderer.OnResourceCaptured += CaptureResource; var oldStatusCode = resource.StatusCode; var renderingFailed = !htmlRenderer.TryRender( resource, out var htmlText, out var millisecondsPageLoadTime, _cancellationToken ); if (renderingFailed) { SendOutFailedProcessingResult(); _log.Info($"Failed to render {nameof(Resource)} was discarded: {JsonConvert.SerializeObject(resource)}"); return(null); } UpdateStatusCodeIfChanged(); DoStatisticsIfHasPageLoadTime(); if (!resource.StatusCode.IsWithinBrokenRange()) { return new RenderingResult { RenderedResource = resource, CapturedResources = capturedResources, HtmlDocument = new HtmlDocument { Uri = resource.Uri, HtmlText = htmlText } } } ; SendOutFailedProcessingResult(); _log.Info($"Broken {nameof(Resource)} was discarded: {JsonConvert.SerializeObject(resource)}"); return(null); void UpdateStatusCodeIfChanged() { var newStatusCode = resource.StatusCode; if (oldStatusCode == newStatusCode) { return; } if (!VerificationResults.Post(resource.ToVerificationResult())) { _log.Error($"Failed to post data to buffer block named [{nameof(VerificationResults)}]."); } } void DoStatisticsIfHasPageLoadTime() { if (!millisecondsPageLoadTime.HasValue) { return; } _statistics.IncrementSuccessfullyRenderedPageCount(); _statistics.IncrementTotalPageLoadTimeBy(millisecondsPageLoadTime.Value); } } catch (Exception exception) when(!exception.IsAcknowledgingOperationCancelledException(_cancellationToken)) { SendOutFailedProcessingResult(); _log.Error($"One or more errors occurred while rendering: {JsonConvert.SerializeObject(resource)}.", exception); return(null); } finally { if (htmlRenderer != null) { htmlRenderer.OnResourceCaptured -= CaptureResource; _htmlRenderers.Add(htmlRenderer, CancellationToken.None); } } void CaptureResource(Resource capturedResource) { capturedResources.Add(capturedResource); } void SendOutFailedProcessingResult() { if (!FailedProcessingResults.Post(new FailedProcessingResult { ProcessedResource = resource })) { _log.Error($"Failed to post data to buffer block named [{nameof(FailedProcessingResults)}]."); } } }