예제 #1
0
        // by Noseratio - http://stackoverflow.com/a/22262976/1768303

        // main logic
        static async Task ScrapSitesAsync(string[] urls, CancellationToken token)
        {
            using (var apartment = new MessageLoopApartment())
            {
                // create WebBrowser inside MessageLoopApartment
                var webBrowser = apartment.Invoke(() => new WebBrowser());
                try
                {
                    foreach (var url in urls)
                    {
                        Console.WriteLine("URL:\n" + url);

                        // cancel in 30s or when the main token is signalled
                        var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token);
                        navigationCts.CancelAfter((int)TimeSpan.FromSeconds(30).TotalMilliseconds);
                        var navigationToken = navigationCts.Token;

                        // run the navigation task inside MessageLoopApartment
                        string html = await apartment.Run(() =>
                                                          webBrowser.NavigateAsync(url, navigationToken), navigationToken);

                        Console.WriteLine("HTML:\n" + html);
                    }
                }
                finally
                {
                    // dispose of WebBrowser inside MessageLoopApartment
                    apartment.Invoke(() => webBrowser.Dispose());
                }
            }
        }
예제 #2
0
        // Navigate to a site and get a snapshot of its DOM HTML
        public async Task <string> ScrapSiteAsync(string url, int timeout, CancellationToken token = default(CancellationToken))
        {
            var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token, _cts.Token);
            var combinedToken = navigationCts.Token;

            // we have a limited number of WebBrowser objects available, so await the semaphore
            await _semaphore.WaitAsync(combinedToken);

            try
            {
                if (timeout != Timeout.Infinite)
                {
                    navigationCts.CancelAfter(timeout);
                }

                // run the main logic on the STA thread
                return(await _apartment.Run(async() =>
                {
                    // acquire the 1st available WebBrowser from the pool
                    var webBrowser = _browsers.Dequeue();
                    try
                    {
                        var task = webBrowser.NavigateAsync(url, combinedToken);
                        _pendingTasks.Add(task);     // register the pending task
                        try
                        {
                            return await task;
                        }
                        finally
                        {
                            // unregister the completed task
                            _pendingTasks.Remove(task);
                        }
                    }
                    finally
                    {
                        // return the WebBrowser to the pool
                        _browsers.Enqueue(webBrowser);
                    }
                }, combinedToken));
            }
            finally
            {
                _semaphore.Release();
            }
        }