Esempio n. 1
0
        public void StartCrawl(string uri, out List<string> internalPages, out List<string> otherPages)
        {
            this.crawledPages = new List<string> { uri };
            this.externalPages = new List<string>();

            var page = new WebPage("http://wiprodigital.com/");

            Console.WriteLine("Starting Crawl with: " + uri);
            this.Crawl(page);

            internalPages = this.crawledPages;
            otherPages = this.externalPages;
        }
Esempio n. 2
0
        private void Crawl(WebPage page)
        {
            Console.WriteLine("Crawling to: {0}", page.Address);

            var uris = page.GetFilteredUris(this.UriFilter);
            var pageList = new List<WebPage>();

            uris.ForEach(
                uri =>
                    {
                        // Checking if we have already been to this page to avoid cirular loop.
                        if (crawledPages.Contains(uri))
                        {
                            return;
                        }

                        pageList.Add(new WebPage(uri));
                        this.crawledPages.Add(uri);
                    });

            Parallel.ForEach(pageList, this.Crawl);
        }