Beispiel #1
0
        static void Main(string[] args)
        {
            CrawlConfiguration config = new CrawlConfiguration();

            config.MaxConcurrentThreads = 1; // Web Extractor is not currently thread-safe.

            // Create the PhantomJS instance. This will spawn a new PhantomJS process using phantomjs.exe.
            // Make sure to dispose this instance or you will have a zombie process!
            IWebDriver driver = CreatePhantomJsDriver(config);

            // Create the content extractor that uses PhantomJS.
            IWebContentExtractor extractor = new JavaScriptContentExtractor(driver);

            // Create a PageRequester that will use the extractor.
            IPageRequester requester = new PageRequester(config, extractor);

            using (IWebCrawler crawler = new PoliteWebCrawler(config, null, null, null, requester, null, null, null, null)) {
                crawler.PageCrawlCompleted += OnPageCrawlCompleted;

                CrawlResult result = crawler.Crawl(new Uri("http://wvtesting2.com/"));
                if (result.ErrorOccurred)
                {
                    Console.WriteLine("Crawl of {0} completed with error: {1}", result.RootUri.AbsoluteUri, result.ErrorException.Message);
                }
                else
                {
                    Console.WriteLine("Crawl of {0} completed without error.", result.RootUri.AbsoluteUri);
                }
            }

            Console.Read();
        }
Beispiel #2
0
        static void Main(string[] args)
        {
            CrawlConfiguration config = new CrawlConfiguration();
            config.MaxConcurrentThreads = 1; // Web Extractor is not currently thread-safe.

            // Create the PhantomJS instance. This will spawn a new PhantomJS process using phantomjs.exe.
            // Make sure to dispose this instance or you will have a zombie process!
            IWebDriver driver = CreatePhantomJsDriver(config);

            // Create the content extractor that uses PhantomJS.
            IWebContentExtractor extractor = new JavaScriptContentExtractor(driver);

            // Create a PageRequester that will use the extractor.
            IPageRequester requester = new PageRequester(config, extractor);

            using (IWebCrawler crawler = new PoliteWebCrawler(config, null, null, null, requester, null, null, null, null)) {
                crawler.PageCrawlCompleted += OnPageCrawlCompleted;

                CrawlResult result = crawler.Crawl(new Uri("http://wvtesting2.com/"));
                if (result.ErrorOccurred)
                    Console.WriteLine("Crawl of {0} completed with error: {1}", result.RootUri.AbsoluteUri, result.ErrorException.Message);
                else
                    Console.WriteLine("Crawl of {0} completed without error.", result.RootUri.AbsoluteUri);
            }

            Console.Read();
        }