public string EnsureDependencies(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs = null, ConcurrentDictionary <string, string> replaceResourcePairs = null, int callDepth = 0) { if (configuration == null || !configuration.Enabled) { return(content); } if (currentPageResourcePairs == null) { currentPageResourcePairs = new Dictionary <string, string>(); } if (replaceResourcePairs == null) { replaceResourcePairs = new ConcurrentDictionary <string, string>(); } // make sure we have all resources from script, link and img tags for current page // <(use).*(xlink:href)="(?<resource>[^"]+) EnsureUseTagSupport(staticWebService, configuration, ref content, ref currentPageResourcePairs, ref replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth); var sbHtml = new StringBuilder(content); foreach (KeyValuePair <string, string> pair in replaceResourcePairs) { // We have a value if we want to replace orginal url with a new one if (pair.Value != null) { sbHtml = sbHtml.Replace(pair.Key, pair.Value); } } return(sbHtml.ToString()); }
protected void EnsureSourceTagSupport(IStaticWebService staticWebService, SiteConfigurationElement configuration, ref string html, ref Dictionary <string, string> currentPageResourcePairs, ref ConcurrentDictionary <string, string> replaceResourcePairs, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, int callDepth = 0) { if (configuration == null || !configuration.Enabled) { return; } var sourceSetMatches = REGEX_FIND_SOURCE_REFERENCE.Matches(html); foreach (Match sourceSetMatch in sourceSetMatches) { var imageCandidatesGroup = sourceSetMatch.Groups["imageCandidates"]; if (imageCandidatesGroup.Success) { var imageCandidates = imageCandidatesGroup.Value; // Take into account that we can have many image candidates, for example: logo-768.png 768w, logo-768-1.5x.png 1.5x var resourceMatches = REGEX_FIND_SOURCE_RESOUCE_REFERENCE.Matches(imageCandidates); foreach (Match match in resourceMatches) { var group = match.Groups["resource"]; if (group.Success) { var resourceUrl = group.Value; if (currentPageResourcePairs.ContainsValue(resourceUrl)) { /** * Website is probably using a 404 page that is not returning HTTP StatusCode 404, ignore this... **/ continue; } if (replaceResourcePairs.ContainsKey(resourceUrl)) { /** * If we have already downloaded resource, we don't need to download it again. * Not only usefull for pages repeating same resource but also in our Scheduled job where we try to generate all pages. **/ if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { // current page has no info regarding this resource, add it currentPageResourcePairs.Add(resourceUrl, replaceResourcePairs[resourceUrl]); } continue; } var newResourceUrl = staticWebService.EnsureResource(configuration, resourceUrl, currentPageResourcePairs, replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth); if (!replaceResourcePairs.ContainsKey(resourceUrl)) { replaceResourcePairs.TryAdd(resourceUrl, newResourceUrl); } if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { currentPageResourcePairs.Add(resourceUrl, newResourceUrl); } } } } } }
public StaticWebScheduledJob() { IsStoppable = true; _staticWebService = ServiceLocator.Current.GetInstance <IStaticWebService>(); _contentRepository = ServiceLocator.Current.GetInstance <IContentRepository>(); _urlResolver = ServiceLocator.Current.GetInstance <UrlResolver>(); }
protected void EnsureScriptAndLinkAndImgAndATagSupport(IStaticWebService staticWebService, SiteConfigurationElement configuration, ref string html, ref Dictionary <string, string> currentPageResourcePairs, ref ConcurrentDictionary <string, string> replaceResourcePairs, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, int callDepth = 0) { if (configuration == null || !configuration.Enabled) { return; } var matches = REGEX_FIND_SCRIPT_OR_LINK_OR_IMG_OR_A_URL_REFERENCE.Matches(html); foreach (Match match in matches) { var group = match.Groups["resource"]; if (group.Success) { var resourceUrl = group.Value; if (currentPageResourcePairs.ContainsValue(resourceUrl)) { /** * Website is probably using a 404 page that is not returning HTTP StatusCode 404, ignore this... **/ continue; } if (replaceResourcePairs.ContainsKey(resourceUrl)) { /** * If we have already downloaded resource, we don't need to download it again. * Not only usefull for pages repeating same resource but also in our Scheduled job where we try to generate all pages. **/ if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { // current page has no info regarding this resource, add it currentPageResourcePairs.Add(resourceUrl, replaceResourcePairs[resourceUrl]); } continue; } var newResourceUrl = staticWebService.EnsureResource(configuration, resourceUrl, currentPageResourcePairs, replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth); if (!replaceResourcePairs.ContainsKey(resourceUrl)) { replaceResourcePairs.TryAdd(resourceUrl, newResourceUrl); } if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { currentPageResourcePairs.Add(resourceUrl, newResourceUrl); } } } }
public string EnsureDependencies(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs = null, ConcurrentDictionary <string, string> replaceResourcePairs = null, int callDepth = 0) { if (configuration == null || !configuration.Enabled) { return(content); } if (currentPageResourcePairs == null) { currentPageResourcePairs = new Dictionary <string, string>(); } if (replaceResourcePairs == null) { replaceResourcePairs = new ConcurrentDictionary <string, string>(); } // make sure we have all resources from script, link and img tags for current page // <(script|link|img).*(href|src)="(?<resource>[^"]+) EnsureScriptAndLinkAndImgAndATagSupport(staticWebService, configuration, ref content, ref currentPageResourcePairs, ref replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth); // make sure we have all source resources for current page // <(source).*(srcset)="(?<resource>[^"]+)" EnsureSourceTagSupport(staticWebService, configuration, ref content, ref currentPageResourcePairs, ref replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth); // TODO: make sure we have all meta resources for current page // Below matches ALL meta content that is a URL // <(meta).*(content)="(?<resource>(http:\/\/|https:\/\/|\/)[^"]+)" // Below matches ONLY known properties // <(meta).*(property|name)="(twitter:image|og:image)".*(content)="(?<resource>[http:\/\/|https:\/\/|\/][^"]+)" var sbHtml = new StringBuilder(content); foreach (KeyValuePair <string, string> pair in replaceResourcePairs) { // We have a value if we want to replace orginal url with a new one if (pair.Value != null) { sbHtml = sbHtml.Replace(pair.Key, pair.Value); } } return(sbHtml.ToString()); }
public string EnsureDependencies(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs = null, ConcurrentDictionary <string, string> replaceResourcePairs = null, int callDepth = 0) { if (configuration == null || !configuration.Enabled) { return(content); } if (currentPageResourcePairs == null) { currentPageResourcePairs = new Dictionary <string, string>(); } if (replaceResourcePairs == null) { replaceResourcePairs = new ConcurrentDictionary <string, string>(); } content = EnsureUrlReferenceSupport(referencingUrl, content, staticWebService, configuration, useTemporaryAttribute, ignoreHtmlDependencies, currentPageResourcePairs, replaceResourcePairs); return(content); }
private static string EnsureUrlReferenceSupport(string referencingUrl, string content, IStaticWebService staticWebService, SiteConfigurationElement configuration, bool?useTemporaryAttribute, bool ignoreHtmlDependencies, Dictionary <string, string> currentPageResourcePairs, ConcurrentDictionary <string, string> replaceResourcePairs, int callDepth = 0) { // Download and ensure files referenced are downloaded also var matches = REGEX_FIND_URL_REFERENCE.Matches(content); foreach (Match match in matches) { var group = match.Groups["resource"]; if (group.Success) { var orginalUrl = group.Value; var resourceUrl = orginalUrl; var changedDir = false; var directory = referencingUrl.Substring(0, referencingUrl.LastIndexOf('/')); while (resourceUrl.StartsWith("../")) { changedDir = true; resourceUrl = resourceUrl.Remove(0, 3); directory = directory.Substring(0, directory.LastIndexOf('/')); } if (changedDir) { resourceUrl = directory.Replace(@"\", "/") + "/" + resourceUrl; } if (currentPageResourcePairs.ContainsValue(resourceUrl)) { /** * Website is probably using a 404 page that is not returning HTTP StatusCode 404, ignore this... **/ continue; } if (replaceResourcePairs.ContainsKey(resourceUrl)) { /** * If we have already downloaded resource, we don't need to download it again. * Not only usefull for pages repeating same resource but also in our Scheduled job where we try to generate all pages. **/ if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { // current page has no info regarding this resource, add it currentPageResourcePairs.Add(resourceUrl, replaceResourcePairs[resourceUrl]); } continue; } string newResourceUrl = staticWebService.EnsureResource(configuration, resourceUrl, currentPageResourcePairs, replaceResourcePairs, useTemporaryAttribute, ignoreHtmlDependencies, callDepth); if (!string.IsNullOrEmpty(newResourceUrl)) { content = content.Replace(orginalUrl, newResourceUrl); if (!replaceResourcePairs.ContainsKey(resourceUrl)) { replaceResourcePairs.TryAdd(resourceUrl, newResourceUrl); } if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { currentPageResourcePairs.Add(resourceUrl, newResourceUrl); } } else { content = content.Replace(orginalUrl, "/" + configuration.ResourceFolder.Replace(@"\", "/") + resourceUrl); if (!replaceResourcePairs.ContainsKey(resourceUrl)) { replaceResourcePairs.TryAdd(resourceUrl, null); } if (!currentPageResourcePairs.ContainsKey(resourceUrl)) { currentPageResourcePairs.Add(resourceUrl, null); } } } } return(content); }