public string EnsureDependencies(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs = null, ConcurrentDictionary <string, string> replaceResourcePairs = null, int callDepth = 0)
        {
            if (configuration == null || !configuration.Enabled)
            {
                return(content);
            }

            if (currentPageResourcePairs == null)
            {
                currentPageResourcePairs = new Dictionary <string, string>();
            }

            if (replaceResourcePairs == null)
            {
                replaceResourcePairs = new ConcurrentDictionary <string, string>();
            }

            // make sure we have all resources from script, link and img tags for current page
            // <(use).*(xlink:href)="(?<resource>[^"]+)
            EnsureUseTagSupport(staticWebService, configuration, ref content, ref currentPageResourcePairs, ref replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth);

            var sbHtml = new StringBuilder(content);

            foreach (KeyValuePair <string, string> pair in replaceResourcePairs)
            {
                // We have a value if we want to replace orginal url with a new one
                if (pair.Value != null)
                {
                    sbHtml = sbHtml.Replace(pair.Key, pair.Value);
                }
            }

            return(sbHtml.ToString());
        }
Beispiel #2
0
        protected void EnsureSourceTagSupport(IStaticWebService staticWebService, SiteConfigurationElement configuration, ref string html, ref Dictionary <string, string> currentPageResourcePairs, ref ConcurrentDictionary <string, string> replaceResourcePairs, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, int callDepth = 0)
        {
            if (configuration == null || !configuration.Enabled)
            {
                return;
            }

            var sourceSetMatches = REGEX_FIND_SOURCE_REFERENCE.Matches(html);

            foreach (Match sourceSetMatch in sourceSetMatches)
            {
                var imageCandidatesGroup = sourceSetMatch.Groups["imageCandidates"];
                if (imageCandidatesGroup.Success)
                {
                    var imageCandidates = imageCandidatesGroup.Value;
                    // Take into account that we can have many image candidates, for example: logo-768.png 768w, logo-768-1.5x.png 1.5x
                    var resourceMatches = REGEX_FIND_SOURCE_RESOUCE_REFERENCE.Matches(imageCandidates);
                    foreach (Match match in resourceMatches)
                    {
                        var group = match.Groups["resource"];
                        if (group.Success)
                        {
                            var resourceUrl = group.Value;

                            if (currentPageResourcePairs.ContainsValue(resourceUrl))
                            {
                                /**
                                 * Website is probably using a 404 page that is not returning HTTP StatusCode 404, ignore this...
                                 **/
                                continue;
                            }

                            if (replaceResourcePairs.ContainsKey(resourceUrl))
                            {
                                /**
                                 * If we have already downloaded resource, we don't need to download it again.
                                 * Not only usefull for pages repeating same resource but also in our Scheduled job where we try to generate all pages.
                                 **/
                                if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                                {
                                    // current page has no info regarding this resource, add it
                                    currentPageResourcePairs.Add(resourceUrl, replaceResourcePairs[resourceUrl]);
                                }
                                continue;
                            }
                            var newResourceUrl = staticWebService.EnsureResource(configuration, resourceUrl, currentPageResourcePairs, replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth);
                            if (!replaceResourcePairs.ContainsKey(resourceUrl))
                            {
                                replaceResourcePairs.TryAdd(resourceUrl, newResourceUrl);
                            }
                            if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                            {
                                currentPageResourcePairs.Add(resourceUrl, newResourceUrl);
                            }
                        }
                    }
                }
            }
        }
        public StaticWebScheduledJob()
        {
            IsStoppable = true;

            _staticWebService  = ServiceLocator.Current.GetInstance <IStaticWebService>();
            _contentRepository = ServiceLocator.Current.GetInstance <IContentRepository>();
            _urlResolver       = ServiceLocator.Current.GetInstance <UrlResolver>();
        }
Beispiel #4
0
        protected void EnsureScriptAndLinkAndImgAndATagSupport(IStaticWebService staticWebService, SiteConfigurationElement configuration, ref string html, ref Dictionary <string, string> currentPageResourcePairs, ref ConcurrentDictionary <string, string> replaceResourcePairs, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, int callDepth = 0)
        {
            if (configuration == null || !configuration.Enabled)
            {
                return;
            }

            var matches = REGEX_FIND_SCRIPT_OR_LINK_OR_IMG_OR_A_URL_REFERENCE.Matches(html);

            foreach (Match match in matches)
            {
                var group = match.Groups["resource"];
                if (group.Success)
                {
                    var resourceUrl = group.Value;
                    if (currentPageResourcePairs.ContainsValue(resourceUrl))
                    {
                        /**
                         * Website is probably using a 404 page that is not returning HTTP StatusCode 404, ignore this...
                         **/
                        continue;
                    }

                    if (replaceResourcePairs.ContainsKey(resourceUrl))
                    {
                        /**
                         * If we have already downloaded resource, we don't need to download it again.
                         * Not only usefull for pages repeating same resource but also in our Scheduled job where we try to generate all pages.
                         **/

                        if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                        {
                            // current page has no info regarding this resource, add it
                            currentPageResourcePairs.Add(resourceUrl, replaceResourcePairs[resourceUrl]);
                        }
                        continue;
                    }

                    var newResourceUrl = staticWebService.EnsureResource(configuration, resourceUrl, currentPageResourcePairs, replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth);
                    if (!replaceResourcePairs.ContainsKey(resourceUrl))
                    {
                        replaceResourcePairs.TryAdd(resourceUrl, newResourceUrl);
                    }
                    if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                    {
                        currentPageResourcePairs.Add(resourceUrl, newResourceUrl);
                    }
                }
            }
        }
Beispiel #5
0
        public string EnsureDependencies(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs = null, ConcurrentDictionary <string, string> replaceResourcePairs = null, int callDepth = 0)
        {
            if (configuration == null || !configuration.Enabled)
            {
                return(content);
            }

            if (currentPageResourcePairs == null)
            {
                currentPageResourcePairs = new Dictionary <string, string>();
            }

            if (replaceResourcePairs == null)
            {
                replaceResourcePairs = new ConcurrentDictionary <string, string>();
            }

            // make sure we have all resources from script, link and img tags for current page
            // <(script|link|img).*(href|src)="(?<resource>[^"]+)
            EnsureScriptAndLinkAndImgAndATagSupport(staticWebService, configuration, ref content, ref currentPageResourcePairs, ref replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth);

            // make sure we have all source resources for current page
            // <(source).*(srcset)="(?<resource>[^"]+)"
            EnsureSourceTagSupport(staticWebService, configuration, ref content, ref currentPageResourcePairs, ref replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth);

            // TODO: make sure we have all meta resources for current page
            // Below matches ALL meta content that is a URL
            // <(meta).*(content)="(?<resource>(http:\/\/|https:\/\/|\/)[^"]+)"
            // Below matches ONLY known properties
            // <(meta).*(property|name)="(twitter:image|og:image)".*(content)="(?<resource>[http:\/\/|https:\/\/|\/][^"]+)"

            var sbHtml = new StringBuilder(content);

            foreach (KeyValuePair <string, string> pair in replaceResourcePairs)
            {
                // We have a value if we want to replace orginal url with a new one
                if (pair.Value != null)
                {
                    sbHtml = sbHtml.Replace(pair.Key, pair.Value);
                }
            }

            return(sbHtml.ToString());
        }
        public string EnsureDependencies(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs = null, ConcurrentDictionary <string, string> replaceResourcePairs = null, int callDepth = 0)
        {
            if (configuration == null || !configuration.Enabled)
            {
                return(content);
            }

            if (currentPageResourcePairs == null)
            {
                currentPageResourcePairs = new Dictionary <string, string>();
            }

            if (replaceResourcePairs == null)
            {
                replaceResourcePairs = new ConcurrentDictionary <string, string>();
            }

            content = EnsureUrlReferenceSupport(referencingUrl, content, staticWebService, configuration, useTemporaryAttribute, ignoreHtmlDependencies, currentPageResourcePairs, replaceResourcePairs);
            return(content);
        }
        private static string EnsureUrlReferenceSupport(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs, ConcurrentDictionary <string, string> replaceResourcePairs, int callDepth = 0)
        {
            // Download and ensure files referenced are downloaded also
            var matches = REGEX_FIND_URL_REFERENCE.Matches(content);

            foreach (Match match in matches)
            {
                var group = match.Groups["resource"];
                if (group.Success)
                {
                    var orginalUrl  = group.Value;
                    var resourceUrl = orginalUrl;
                    var changedDir  = false;
                    var directory   = referencingUrl.Substring(0, referencingUrl.LastIndexOf('/'));
                    while (resourceUrl.StartsWith("../"))
                    {
                        changedDir  = true;
                        resourceUrl = resourceUrl.Remove(0, 3);
                        directory   = directory.Substring(0, directory.LastIndexOf('/'));
                    }

                    if (changedDir)
                    {
                        resourceUrl = directory.Replace(@"\", "/") + "/" + resourceUrl;
                    }

                    if (currentPageResourcePairs.ContainsValue(resourceUrl))
                    {
                        /**
                         * Website is probably using a 404 page that is not returning HTTP StatusCode 404, ignore this...
                         **/
                        continue;
                    }

                    if (replaceResourcePairs.ContainsKey(resourceUrl))
                    {
                        /**
                         * If we have already downloaded resource, we don't need to download it again.
                         * Not only usefull for pages repeating same resource but also in our Scheduled job where we try to generate all pages.
                         **/

                        if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                        {
                            // current page has no info regarding this resource, add it
                            currentPageResourcePairs.Add(resourceUrl, replaceResourcePairs[resourceUrl]);
                        }
                        continue;
                    }

                    string newResourceUrl = staticWebService.EnsureResource(configuration, resourceUrl, currentPageResourcePairs, replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth);
                    if (!string.IsNullOrEmpty(newResourceUrl))
                    {
                        content = content.Replace(orginalUrl, newResourceUrl);
                        if (!replaceResourcePairs.ContainsKey(resourceUrl))
                        {
                            replaceResourcePairs.TryAdd(resourceUrl, newResourceUrl);
                        }
                        if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                        {
                            currentPageResourcePairs.Add(resourceUrl, newResourceUrl);
                        }
                    }
                    else
                    {
                        content = content.Replace(orginalUrl, "/" + configuration.ResourceFolder.Replace(@"\", "/") + resourceUrl);
                        if (!replaceResourcePairs.ContainsKey(resourceUrl))
                        {
                            replaceResourcePairs.TryAdd(resourceUrl, null);
                        }
                        if (!currentPageResourcePairs.ContainsKey(resourceUrl))
                        {
                            currentPageResourcePairs.Add(resourceUrl, null);
                        }
                    }
                }
            }

            return(content);
        }