Ejemplo n.º 1
0
        public void Download(PageLevel pageLevel, SiteDownloadContext siteDownloadContext)
        {
            var absolutePath = new Uri(pageLevel.Url).AbsolutePath;
            var pageName     = absolutePath.Replace("/", "-");
            var page         = new Page(siteDownloadContext.Site, pageName)
            {
                Routes = new[] { new PageRoute()
                                 {
                                     Identifier = absolutePath
                                 } },
                IsDefault = pageLevel.Level == 0
            };

            if (_pageProvider.Get(page) == null)
            {
                var text = _httpClient.DownloadString(pageLevel.Url);
                if (!string.IsNullOrEmpty(text))
                {
                    var pageDownloadContext = new PageDownloadContext(siteDownloadContext, pageLevel, text);
                    foreach (var analyzer in _analyzers)
                    {
                        analyzer.Analyze(pageDownloadContext);
                    }
                    page.Html = pageDownloadContext.HtmlDocument.DocumentNode.InnerHtml;
                    _pageProvider.Add(page);
                    siteDownloadContext.DownloadedPages.Add(pageLevel);
                }
            }
        }
Ejemplo n.º 2
0
        public PageDownloadContext(SiteDownloadContext siteDownloadContext, PageLevel pageLevel, string pageHtml)
        {
            this.SiteDownloadContext = siteDownloadContext;

            this.PageLevel = pageLevel;
            this.PageHtml  = pageHtml;

            HtmlDocument = new HtmlDocument();
            HtmlDocument.LoadHtml(pageHtml);
        }
Ejemplo n.º 3
0
        public void Analyze(PageDownloadContext context)
        {
            var links = context.HtmlDocument.DocumentNode.Descendants()
                        .Where(lnks => lnks.Name == "a" &&
                               lnks.Attributes["href"] != null &&
                               lnks.InnerText.Trim().Length > 0);

            //.Select(lnks => UriHelper.GetInsideAbsoluteUrl(context.PageUrl, lnks.Attributes["href"].Value))
            //.Where(it => !string.IsNullOrEmpty(it));


            if (context.PageLevel.Level < context.SiteDownloadContext.Options.Deep)
            {
                int count = 0;
                foreach (var link in links)
                {
                    var insideUrl = UriHelper.GetInsideAbsoluteUrl(context.PageLevel.Url, link.Attributes["href"].Value);

                    if (!string.IsNullOrEmpty(insideUrl))
                    {
                        if (count < context.SiteDownloadContext.Options.Pages)
                        {
                            var absolutePath = new Uri(insideUrl).AbsolutePath;
                            link.Attributes["href"].Value = "/" + SiteExtensions.PREFIX_FRONT_PREVIEW_URL + context.SiteDownloadContext.Site.AbsoluteName + absolutePath;
                            var nextPageLevel = new PageLevel(insideUrl, context.PageLevel.Level + 1);
                            if (!new PageLevelComparer().Equals(context.PageLevel, nextPageLevel) && !context.SiteDownloadContext.DownloadedPages.Contains(nextPageLevel, new PageLevelComparer()) && !context.SiteDownloadContext.DownloadQueue.Contains(nextPageLevel, new PageLevelComparer()))
                            {
                                context.SiteDownloadContext.DownloadQueue.Enqueue(nextPageLevel);
                                count++;
                            }
                        }
                        else
                        {
                            link.Attributes["href"].Value = insideUrl;
                        }
                    }
                }
            }
        }
Ejemplo n.º 4
0
 public PageDownloadedEventArgs(PageLevel downloadPage)
 {
     this.DownloadedPage = downloadPage;
 }