private string DownloadStyleWithImages(PageDownloadContext pageDownloadContext, string styleUrl, string styleFilePath) { var styleContent = _httpClient.DownloadString(styleUrl); if (!string.IsNullOrEmpty(styleContent)) { MatchEvaluator urlDelegate = new MatchEvaluator(delegate(Match m) { // Change relative (to the original CSS) URL references to make them relative to the requested URL (controller / action) string url = m.Value; var rawAbsoluteUrl = new Uri(new Uri(styleUrl), url).ToString(); string fileName = Path.GetFileName(url); var imagePath = "images/" + fileName; var styleImagePath = Path.Combine(Path.GetDirectoryName(styleFilePath), imagePath); if (!_siteFileProvider.IsFileExists(pageDownloadContext.SiteDownloadContext.Site, styleImagePath)) { var data = _httpClient.DownloadData(rawAbsoluteUrl); if (data != null) { _siteFileProvider.AddFile(pageDownloadContext.SiteDownloadContext.Site, styleImagePath, data); } } return(imagePath); }); styleContent = Regex.Replace(styleContent, @"(?<=url\(\s*([""']?))(?<url>[^'""]+?)(?=\1\s*\))", urlDelegate, RegexOptions.IgnoreCase); return(styleContent); } return(null); }
public void Download(PageLevel pageLevel, SiteDownloadContext siteDownloadContext) { var absolutePath = new Uri(pageLevel.Url).AbsolutePath; var pageName = absolutePath.Replace("/", "-"); var page = new Page(siteDownloadContext.Site, pageName) { Routes = new[] { new PageRoute() { Identifier = absolutePath } }, IsDefault = pageLevel.Level == 0 }; if (_pageProvider.Get(page) == null) { var text = _httpClient.DownloadString(pageLevel.Url); if (!string.IsNullOrEmpty(text)) { var pageDownloadContext = new PageDownloadContext(siteDownloadContext, pageLevel, text); foreach (var analyzer in _analyzers) { analyzer.Analyze(pageDownloadContext); } page.Html = pageDownloadContext.HtmlDocument.DocumentNode.InnerHtml; _pageProvider.Add(page); siteDownloadContext.DownloadedPages.Add(pageLevel); } } }
private string DownloadStyleSheet(PageDownloadContext pageDownloadContext, string styleUrl) { var absolutePath = new Uri(styleUrl).AbsolutePath; var filePath = Path.Combine("Styles", absolutePath.Trim('/')); if (!_siteFileProvider.IsFileExists(pageDownloadContext.SiteDownloadContext.Site, filePath)) { var styleContent = DownloadStyleWithImages(pageDownloadContext, styleUrl, filePath); if (!string.IsNullOrEmpty(styleContent)) { _siteFileProvider.AddFile(pageDownloadContext.SiteDownloadContext.Site, filePath, styleContent); } } return(UrlUtility.Combine("/", SiteExtensions.PREFIX_FRONT_PREVIEW_URL + pageDownloadContext.SiteDownloadContext.Site.AbsoluteName, filePath)); }
public void Analyze(PageDownloadContext context) { var links = context.HtmlDocument.DocumentNode.Descendants() .Where(script => script.Name == "script" && script.Attributes["src"] != null); foreach (var link in links) { var url = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["src"].Value); if (!string.IsNullOrEmpty(url)) { var newUrl = DownloadScript(context, url); link.Attributes["src"].Value = newUrl; } } }
private string DownloadImage(PageDownloadContext pageDownloadContext, string imageUrl) { var absolutePath = new Uri(imageUrl).AbsolutePath; var filePath = Path.Combine("Images", absolutePath.Trim('/')); if (!_siteFileProvider.IsFileExists(pageDownloadContext.SiteDownloadContext.Site, filePath)) { var data = _httpClient.DownloadData(imageUrl); if (data != null) { _siteFileProvider.AddFile(pageDownloadContext.SiteDownloadContext.Site, filePath, data); } } return(UrlUtility.Combine("/", SiteExtensions.PREFIX_FRONT_PREVIEW_URL + pageDownloadContext.SiteDownloadContext.Site.AbsoluteName, filePath)); }
public void Analyze(PageDownloadContext context) { var images = context.HtmlDocument.DocumentNode.Descendants() .Where(lnks => lnks.Name == "img" && lnks.Attributes["src"] != null && !string.IsNullOrEmpty(lnks.Attributes["src"].Value)); foreach (var link in images) { var url = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["src"].Value); if (!string.IsNullOrEmpty(url)) { var newUrl = DownloadImage(context, url); link.Attributes["src"].Value = newUrl; } } }
public void Analyze(PageDownloadContext context) { var links = context.HtmlDocument.DocumentNode.Descendants() .Where(lnks => lnks.Name == "link" && lnks.Attributes["href"] != null && lnks.Attributes["rel"] != null && lnks.Attributes["rel"].Value != null && lnks.Attributes["rel"].Value.ToLower() == "stylesheet"); foreach (var link in links) { var url = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["href"].Value); if (!string.IsNullOrEmpty(url)) { var newUrl = DownloadStyleSheet(context, url); link.Attributes["href"].Value = newUrl; } } }
public void Analyze(PageDownloadContext context) { var links = context.HtmlDocument.DocumentNode.Descendants() .Where(lnks => lnks.Name == "a" && lnks.Attributes["href"] != null && lnks.InnerText.Trim().Length > 0); //.Select(lnks => UriHelper.GetInsideAbsoluteUrl(context.PageUrl, lnks.Attributes["href"].Value)) //.Where(it => !string.IsNullOrEmpty(it)); if (context.PageLevel.Level < context.SiteDownloadContext.Options.Deep) { int count = 0; foreach (var link in links) { var insideUrl = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["href"].Value); if (!string.IsNullOrEmpty(insideUrl)) { if (count < context.SiteDownloadContext.Options.Pages) { var absolutePath = new Uri(insideUrl).AbsolutePath; link.Attributes["href"].Value = "/" + SiteExtensions.PREFIX_FRONT_PREVIEW_URL + context.SiteDownloadContext.Site.AbsoluteName + absolutePath; var nextPageLevel = new PageLevel(insideUrl, context.PageLevel.Level + 1); if (!new PageLevelComparer().Equals(context.PageLevel, nextPageLevel) && !context.SiteDownloadContext.DownloadedPages.Contains(nextPageLevel, new PageLevelComparer()) && !context.SiteDownloadContext.DownloadQueue.Contains(nextPageLevel, new PageLevelComparer())) { context.SiteDownloadContext.DownloadQueue.Enqueue(nextPageLevel); count++; } } else { link.Attributes["href"].Value = insideUrl; } } } } }