Exemplo n.º 1
0
        private void CrawlWebPage(Page page, SiteDictionary siteDictionary)
        {
            Console.WriteLine(string.Format("Processing Page {0}", page.Url));

            FlatPage flatPage = GenerateFlatPage(page.Url);

            MergeFlatPage(page, flatPage, siteDictionary);

            foreach (var link in page.InternalLinks)
            {
                if (!link.Visited)
                {
                    CrawlWebPage(link, siteDictionary);
                }
            }
        }
Exemplo n.º 2
0
        public SiteMap CrawlWebSite(string url)
        {
            var root = new Page { Url = url };
            var siteDictionary = new SiteDictionary();

            _webPageParser.SetRootUrl(url);

            CrawlWebPage(root, siteDictionary);

            return new SiteMap
            {
                Root = root,
                TotalExternalLinks = siteDictionary.ExternalLinks.Count,
                TotalImages = siteDictionary.Images.Count,
                TotalInternalLinks = siteDictionary.Links.Count
            };
        }
Exemplo n.º 3
0
        private void MergeFlatPage(Page page, FlatPage flatPage, SiteDictionary siteDictionary)
        {
            foreach (var link in flatPage.Links)
            {
                string url = link.Value;
                Page existingPage;

                if (!siteDictionary.Links.TryGetValue(url, out existingPage))
                {
                    var newPage = new Page { Url = url };
                    siteDictionary.Links.Add(url, newPage);
                    page.InternalLinks.Add(newPage);
                }
                else
                {
                    page.InternalLinks.Add((Page)existingPage);
                }
            }

            foreach (var image in flatPage.Images)
            {
                string url = image.Value;
                Image existingImage;

                if (!siteDictionary.Images.TryGetValue(url, out existingImage))
                {
                    var newImage = new Image { Url = url };
                    siteDictionary.Images.Add(url, newImage);
                    page.Images.Add(newImage);
                }
                else
                {
                    page.Images.Add((Image)existingImage);
                }
            }

            foreach (var image in flatPage.ExternalLinks)
            {
                string url = image.Value;
                ExternalLink existingLink;

                if (!siteDictionary.ExternalLinks.TryGetValue(url, out existingLink))
                {
                    var newExternalLink = new ExternalLink { Url = url };
                    siteDictionary.ExternalLinks.Add(url, newExternalLink);
                    page.ExternalLinks.Add(newExternalLink);
                }
                else
                {
                    page.ExternalLinks.Add((ExternalLink)existingLink);
                }
            }

            page.Visited = true;
        }