Пример #1
0
        public void Crawl(int depth, string url)
        {
            if (depth > _depth)
            {
                return;
            }
            var node   = GetNode(url);
            var status = OpenPage(node);

            node.MakeVisited()
            .SetStatusCode(status);
            _pages[url] = node;

            var urls = GetAttributes("a", "href").ToList();

            urls = Sanitizer.SanitizeUrls(urls, _baseUrl, _pages).ToList();

            var urlsToDelete = AddParentNode(url, urls);

            urls = urls.Except(urlsToDelete).ToList();

            Parallel.ForEach(urls,
                             new ParallelOptions {
                MaxDegreeOfParallelism = 2
            },
                             url =>
            {
                if (_pages[url].GetVisited())
                {
                    return;
                }
                if (!url.StartsWith(_baseUrl))
                {
                    return;
                }
                Crawl(depth + 1, url);
            });
        }