Exemple #1
0
        public Resource Enrich(Resource resource)
        {
            if (resource == null)
            {
                throw new ArgumentNullException();
            }
            if (resource.StatusCode != default && resource.Uri == null)
            {
                throw new InvalidConstraintException();
            }

            resource.Id = _incrementalIdGenerator.GetNext();
            if (resource.StatusCode == default)
            {
                if (!TryCreateAbsoluteUri())
                {
                    resource.StatusCode = StatusCode.MalformedUri;
                }
                else if (UriSchemeIsNotSupported())
                {
                    resource.StatusCode = StatusCode.UriSchemeNotSupported;
                }
                else if (IsOrphanedUri())
                {
                    resource.StatusCode = StatusCode.OrphanedUri;
                }
                else
                {
                    StripFragment();
                }
            }

            if (resource.Uri != null)
            {
                resource.IsInternal = _resourceScope.IsInternalResource(resource);
            }
            return(resource);

            bool TryCreateAbsoluteUri()
            {
                if (!Uri.TryCreate(resource.OriginalUrl, UriKind.RelativeOrAbsolute, out var relativeOrAbsoluteUri))
                {
                    return(false);
                }
                if (relativeOrAbsoluteUri.IsAbsoluteUri)
                {
                    resource.Uri = relativeOrAbsoluteUri;
                    return(true);
                }

                if (!Uri.TryCreate(resource.ParentUri, resource.OriginalUrl, out var absoluteUri))
                {
                    return(false);
                }
                resource.Uri = absoluteUri;
                return(true);
            }

            bool UriSchemeIsNotSupported()
            {
                return(resource.Uri.Scheme != "http" && resource.Uri.Scheme != "https");
            }

            bool IsOrphanedUri()
            {
                // TODO: Investigate where those orphaned Uri-s came from.
                return(resource.ParentUri == null && !_resourceScope.IsStartUri(resource.Uri));
            }

            void StripFragment()
            {
                if (string.IsNullOrWhiteSpace(resource.Uri.Fragment))
                {
                    return;
                }
                resource.Uri = new UriBuilder(resource.Uri)
                {
                    Fragment = string.Empty
                }.Uri;
            }
        }
Exemple #2
0
        public HtmlRenderer(Configurations configurations, IResourceScope resourceScope, IIncrementalIdGenerator incrementalIdGenerator,
                            IHttpContentTypeToResourceTypeDictionary httpContentTypeToResourceTypeDictionary, ILog log, IWebBrowser webBrowser)
        {
            _log                        = log;
            _webBrowser                 = webBrowser;
            _configurations             = configurations;
            _webBrowser.BeforeRequest  += EnsureInternal;
            _webBrowser.BeforeResponse += CaptureNetworkTraffic;

            _objectDisposed = false;
            _takeScreenshot = false;

            #region Local Functions

            Task EnsureInternal(object _, SessionEventArgs networkTraffic)
            {
                return(Task.Run(() =>
                {
                    Interlocked.Increment(ref _activeHttpTrafficCount);
                    try
                    {
                        networkTraffic.HttpClient.Request.RequestUri = resourceScope.Localize(networkTraffic.HttpClient.Request.RequestUri);
                        networkTraffic.HttpClient.Request.Host = networkTraffic.HttpClient.Request.RequestUri.Host;
                    }
                    finally { Interlocked.Decrement(ref _activeHttpTrafficCount); }
                }, _networkTrafficCts.Token));
            }

            Task CaptureNetworkTraffic(object _, SessionEventArgs networkTraffic)
            {
                var request  = networkTraffic.HttpClient.Request;
                var response = networkTraffic.HttpClient.Response;
                var originalResponseStatusCode = response.StatusCode;

                return(Task.Run(() =>
                {
                    Interlocked.Increment(ref _activeHttpTrafficCount);
                    try
                    {
                        if (request.Method.ToUpperInvariant() != "GET")
                        {
                            return;
                        }

                        var resourceSize = response.ContentLength;
                        var resourceType = httpContentTypeToResourceTypeDictionary[response.ContentType];
                        if (!_uriBeingRenderedWasFoundInCapturedNetworkTraffic)
                        {
                            if (!TryFindUriBeingRendered())
                            {
                                return;
                            }
                            if (TryFollowRedirects())
                            {
                                return;
                            }

                            UpdateStatusCodeIfChanged();
                            TakeScreenshotIfConfigured();
                            _uriBeingRenderedWasFoundInCapturedNetworkTraffic = true;

                            var resourceSizeIsTooBig = resourceSize / 1024f / 1024f > Configurations.RenderableResourceSizeInMb;
                            var resourceTypeIsNotRenderable = !(ResourceType.Html | ResourceType.Unknown).HasFlag(resourceType);
                            var responseStatusCodeIsOk = originalResponseStatusCode == (int)StatusCode.Ok;
                            if (responseStatusCodeIsOk && (resourceSizeIsTooBig || resourceTypeIsNotRenderable))
                            {
                                response.StatusCode = (int)StatusCode.NoContent;
                            }

                            return;
                        }

                        if (_resourceBeingRendered.StatusCode.IsWithinBrokenRange())
                        {
                            return;
                        }
                        if (!configurations.IncludeRedirectUrlsInReport && IsRedirectResponse())
                        {
                            return;
                        }
                        _capturedResources.Add(
                            new Resource
                            (
                                incrementalIdGenerator.GetNext(),
                                request.Url,
                                _resourceBeingRendered.Uri,
                                false
                            )
                        {
                            Size = resourceSize,
                            Uri = request.RequestUri,
                            ResourceType = resourceType,
                            StatusCode = (StatusCode)originalResponseStatusCode
                        }
                            );
                    }
                    finally { Interlocked.Decrement(ref _activeHttpTrafficCount); }
                }, _networkTrafficCts.Token));

                #region Local Functions

                bool TryFollowRedirects()
                {
                    if (!IsRedirectResponse())
                    {
                        return(false);
                    }
                    if (!response.Headers.Headers.TryGetValue("Location", out var locationHeader))
                    {
                        _log.Info(
                            "Http redirect response without \"Location\" header detected in captured resources while rendering: " +
                            $"{_resourceBeingRendered.ToJson()}"
                            );
                        return(false);
                    }

                    if (!Uri.TryCreate(locationHeader.Value, UriKind.RelativeOrAbsolute, out var redirectUri))
                    {
                        return(false);
                    }
                    _resourceBeingRendered.Uri = redirectUri.IsAbsoluteUri ? redirectUri : new Uri(_resourceBeingRendered.Uri, redirectUri);

                    return(true);
                }

                bool TryFindUriBeingRendered()
                {
                    var capturedUri             = request.RequestUri;
                    var bothSchemesAreNotEqual  = !capturedUri.Scheme.Equals(_resourceBeingRendered.Uri.Scheme);
                    var strictTransportSecurity = _resourceBeingRendered.Uri.Scheme == "http" && capturedUri.Scheme == "https";

                    if (bothSchemesAreNotEqual && !strictTransportSecurity)
                    {
                        return(false);
                    }
                    return(RemoveScheme(capturedUri).Equals(RemoveScheme(_resourceBeingRendered.Uri)));
                }

                void UpdateStatusCodeIfChanged()
                {
                    var newStatusCode = originalResponseStatusCode;
                    var oldStatusCode = (int)_resourceBeingRendered.StatusCode;

                    if (newStatusCode == oldStatusCode)
                    {
                        return;
                    }

                    _resourceBeingRendered.StatusCode = (StatusCode)newStatusCode;
                    log.Debug($"StatusCode changed from [{oldStatusCode}] to [{newStatusCode}] at [{_resourceBeingRendered.Uri}]");
                }

                void TakeScreenshotIfConfigured()
                {
                    if (_resourceBeingRendered.StatusCode.IsWithinBrokenRange() && configurations.TakeScreenshotEvidence)
                    {
                        _takeScreenshot = true;
                    }
                }

                bool IsRedirectResponse()
                {
                    return(300 <= originalResponseStatusCode && originalResponseStatusCode < 400);
                }

                static string RemoveScheme(Uri uri)
                {
                    return(WebUtility.UrlDecode($"{uri.Host}:{uri.Port}{uri.PathAndQuery}"));
                }

                #endregion
            }