Пример #1
0
        private string DownloadStyleWithImages(PageDownloadContext pageDownloadContext, string styleUrl, string styleFilePath)
        {
            var styleContent = _httpClient.DownloadString(styleUrl);

            if (!string.IsNullOrEmpty(styleContent))
            {
                MatchEvaluator urlDelegate = new MatchEvaluator(delegate(Match m)
                {
                    // Change relative (to the original CSS) URL references to make them relative to the requested URL (controller / action)
                    string url = m.Value;

                    var rawAbsoluteUrl = new Uri(new Uri(styleUrl), url).ToString();
                    string fileName    = Path.GetFileName(url);

                    var imagePath = "images/" + fileName;

                    var styleImagePath = Path.Combine(Path.GetDirectoryName(styleFilePath), imagePath);

                    if (!_siteFileProvider.IsFileExists(pageDownloadContext.SiteDownloadContext.Site, styleImagePath))
                    {
                        var data = _httpClient.DownloadData(rawAbsoluteUrl);
                        if (data != null)
                        {
                            _siteFileProvider.AddFile(pageDownloadContext.SiteDownloadContext.Site, styleImagePath, data);
                        }
                    }
                    return(imagePath);
                });

                styleContent = Regex.Replace(styleContent, @"(?<=url\(\s*([""']?))(?<url>[^'""]+?)(?=\1\s*\))", urlDelegate, RegexOptions.IgnoreCase);

                return(styleContent);
            }
            return(null);
        }
Пример #2
0
        public void Download(PageLevel pageLevel, SiteDownloadContext siteDownloadContext)
        {
            var absolutePath = new Uri(pageLevel.Url).AbsolutePath;
            var pageName     = absolutePath.Replace("/", "-");
            var page         = new Page(siteDownloadContext.Site, pageName)
            {
                Routes = new[] { new PageRoute()
                                 {
                                     Identifier = absolutePath
                                 } },
                IsDefault = pageLevel.Level == 0
            };

            if (_pageProvider.Get(page) == null)
            {
                var text = _httpClient.DownloadString(pageLevel.Url);
                if (!string.IsNullOrEmpty(text))
                {
                    var pageDownloadContext = new PageDownloadContext(siteDownloadContext, pageLevel, text);
                    foreach (var analyzer in _analyzers)
                    {
                        analyzer.Analyze(pageDownloadContext);
                    }
                    page.Html = pageDownloadContext.HtmlDocument.DocumentNode.InnerHtml;
                    _pageProvider.Add(page);
                    siteDownloadContext.DownloadedPages.Add(pageLevel);
                }
            }
        }
Пример #3
0
        private string DownloadStyleSheet(PageDownloadContext pageDownloadContext, string styleUrl)
        {
            var absolutePath = new Uri(styleUrl).AbsolutePath;
            var filePath     = Path.Combine("Styles", absolutePath.Trim('/'));

            if (!_siteFileProvider.IsFileExists(pageDownloadContext.SiteDownloadContext.Site, filePath))
            {
                var styleContent = DownloadStyleWithImages(pageDownloadContext, styleUrl, filePath);
                if (!string.IsNullOrEmpty(styleContent))
                {
                    _siteFileProvider.AddFile(pageDownloadContext.SiteDownloadContext.Site, filePath, styleContent);
                }
            }

            return(UrlUtility.Combine("/", SiteExtensions.PREFIX_FRONT_PREVIEW_URL + pageDownloadContext.SiteDownloadContext.Site.AbsoluteName, filePath));
        }
Пример #4
0
        public void Analyze(PageDownloadContext context)
        {
            var links = context.HtmlDocument.DocumentNode.Descendants()
                        .Where(script => script.Name == "script" &&
                               script.Attributes["src"] != null);

            foreach (var link in links)
            {
                var url = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["src"].Value);
                if (!string.IsNullOrEmpty(url))
                {
                    var newUrl = DownloadScript(context, url);
                    link.Attributes["src"].Value = newUrl;
                }
            }
        }
Пример #5
0
        private string DownloadImage(PageDownloadContext pageDownloadContext, string imageUrl)
        {
            var absolutePath = new Uri(imageUrl).AbsolutePath;
            var filePath     = Path.Combine("Images", absolutePath.Trim('/'));

            if (!_siteFileProvider.IsFileExists(pageDownloadContext.SiteDownloadContext.Site, filePath))
            {
                var data = _httpClient.DownloadData(imageUrl);
                if (data != null)
                {
                    _siteFileProvider.AddFile(pageDownloadContext.SiteDownloadContext.Site, filePath, data);
                }
            }

            return(UrlUtility.Combine("/", SiteExtensions.PREFIX_FRONT_PREVIEW_URL + pageDownloadContext.SiteDownloadContext.Site.AbsoluteName, filePath));
        }
Пример #6
0
        public void Analyze(PageDownloadContext context)
        {
            var images = context.HtmlDocument.DocumentNode.Descendants()
                         .Where(lnks => lnks.Name == "img" &&
                                lnks.Attributes["src"] != null &&
                                !string.IsNullOrEmpty(lnks.Attributes["src"].Value));

            foreach (var link in images)
            {
                var url = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["src"].Value);
                if (!string.IsNullOrEmpty(url))
                {
                    var newUrl = DownloadImage(context, url);
                    link.Attributes["src"].Value = newUrl;
                }
            }
        }
Пример #7
0
        public void Analyze(PageDownloadContext context)
        {
            var links = context.HtmlDocument.DocumentNode.Descendants()
                        .Where(lnks => lnks.Name == "link" &&
                               lnks.Attributes["href"] != null &&
                               lnks.Attributes["rel"] != null &&
                               lnks.Attributes["rel"].Value != null &&
                               lnks.Attributes["rel"].Value.ToLower() == "stylesheet");



            foreach (var link in links)
            {
                var url = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["href"].Value);
                if (!string.IsNullOrEmpty(url))
                {
                    var newUrl = DownloadStyleSheet(context, url);
                    link.Attributes["href"].Value = newUrl;
                }
            }
        }
Пример #8
0
        public void Analyze(PageDownloadContext context)
        {
            var links = context.HtmlDocument.DocumentNode.Descendants()
                        .Where(lnks => lnks.Name == "a" &&
                               lnks.Attributes["href"] != null &&
                               lnks.InnerText.Trim().Length > 0);

            //.Select(lnks => UriHelper.GetInsideAbsoluteUrl(context.PageUrl, lnks.Attributes["href"].Value))
            //.Where(it => !string.IsNullOrEmpty(it));


            if (context.PageLevel.Level < context.SiteDownloadContext.Options.Deep)
            {
                int count = 0;
                foreach (var link in links)
                {
                    var insideUrl = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["href"].Value);

                    if (!string.IsNullOrEmpty(insideUrl))
                    {
                        if (count < context.SiteDownloadContext.Options.Pages)
                        {
                            var absolutePath = new Uri(insideUrl).AbsolutePath;
                            link.Attributes["href"].Value = "/" + SiteExtensions.PREFIX_FRONT_PREVIEW_URL + context.SiteDownloadContext.Site.AbsoluteName + absolutePath;
                            var nextPageLevel = new PageLevel(insideUrl, context.PageLevel.Level + 1);
                            if (!new PageLevelComparer().Equals(context.PageLevel, nextPageLevel) && !context.SiteDownloadContext.DownloadedPages.Contains(nextPageLevel, new PageLevelComparer()) && !context.SiteDownloadContext.DownloadQueue.Contains(nextPageLevel, new PageLevelComparer()))
                            {
                                context.SiteDownloadContext.DownloadQueue.Enqueue(nextPageLevel);
                                count++;
                            }
                        }
                        else
                        {
                            link.Attributes["href"].Value = insideUrl;
                        }
                    }
                }
            }
        }