Ejemplo n.º 1
0
        /// <summary>
        ///     Instructs Chrome to navigate to the given <paramref name="uri" />
        /// </summary>
        /// <param name="uri"></param>
        /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then
        /// the method will raise an <see cref="ConversionTimedOutException"/> if the
        /// <see cref="CountdownTimer"/> reaches zero before finishing navigation</param>
        /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
        /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception>
        public void NavigateTo(Uri uri, CountdownTimer countdownTimer = null)
        {
            _pageConnection.SendAsync(new Message {
                Method = "Page.enable"
            }).GetAwaiter();

            var message = new Message {
                Method = "Page.navigate"
            };

            message.AddParameter("url", uri.ToString());

            var waitEvent = new ManualResetEvent(false);

            void MessageReceived(object sender, string data)
            {
                var page = PageEvent.FromJson(data);

                if (!uri.IsFile)
                {
                    switch (page.Method)
                    {
                    case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded":
                    case "Page.frameStoppedLoading":
                        waitEvent.Set();
                        break;
                    }
                }
                else if (page.Method == "Page.loadEventFired")
                {
                    waitEvent.Set();
                }
            }

            _pageConnection.MessageReceived += MessageReceived;
            _pageConnection.Closed          += (sender, args) =>
            {
                waitEvent.Set();
            };
            _pageConnection.SendAsync(message).GetAwaiter();

            if (countdownTimer != null)
            {
                waitEvent.WaitOne(countdownTimer.MillisecondsLeft);
                if (countdownTimer.MillisecondsLeft == 0)
                {
                    throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timedout");
                }
            }
            else
            {
                waitEvent.WaitOne();
            }

            _pageConnection.MessageReceived -= MessageReceived;

            _pageConnection.SendAsync(new Message {
                Method = "Page.disable"
            }).GetAwaiter();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Instructs Chrome to navigate to the given <paramref name="uri"/>
        /// </summary>
        /// <param name="uri"></param>
        /// <param name="waitForNetworkIdle">Wait until all external sources are loaded</param>
        /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
        public void NavigateTo(Uri uri, bool waitForNetworkIdle)
        {
            WebSocketSend(new Message {
                Id = MessageId, Method = "Page.enable"
            }.ToJson());

            var localFile = uri.Scheme == "file";

            var message = new Message
            {
                Id     = MessageId,
                Method = "Page.navigate"
            };

            message.AddParameter("url", uri.ToString());

            var loaded = false;

            _webSocket.MessageReceived += (sender, args) =>
            {
                //File.AppendAllText("d:\\trace.txt", args.Message + Environment.NewLine);
                var page = PageEvent.FromJson(args.Message);

                if (!localFile)
                {
                    if (waitForNetworkIdle)
                    {
                        if (page.Params?.Name == "networkIdle")
                        {
                            loaded = true;
                        }
                    }
                    else if (page.Method == "Page.lifecycleEvent" && page.Params.Name == "DOMContentLoaded")
                    {
                        loaded = true;
                    }
                }
                else if (page.Method == "Page.loadEventFired")
                {
                    loaded = true;
                }
            };

            WebSocketSend(message.ToJson());

            while (!loaded)
            {
                Thread.Sleep(1);
            }

            WebSocketSend(new Message {
                Id = MessageId, Method = "Page.disable"
            }.ToJson());
        }
Ejemplo n.º 3
0
        /// <summary>
        ///     Instructs Chrome to navigate to the given <paramref name="uri" />
        /// </summary>
        /// <param name="uri"></param>
        /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then
        ///     the method will raise an <see cref="ConversionTimedOutException"/> if the
        ///     <see cref="CountdownTimer"/> reaches zero before finishing navigation</param>
        /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded
        ///     event has fired. After a timeout the NavigateTo method will exit as if the page
        ///     has been completely loaded</param>
        /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
        /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
        /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception>
        public void NavigateTo(
            Uri uri,
            CountdownTimer countdownTimer = null,
            int?mediaLoadTimeout          = null,
            List <string> urlBlacklist    = null)
        {
            var  waitEvent            = new ManualResetEvent(false);
            Task mediaLoadTimeoutTask = null;
            CancellationToken mediaLoadTimeoutCancellationToken;
            var asyncLogging = new ConcurrentQueue <string>();
            var absoluteUri  = uri.AbsoluteUri.Substring(0, uri.AbsoluteUri.LastIndexOf('/') + 1);

            async Task MessageReceived(string data)
            {
                //System.IO.File.AppendAllText("e:\\logs.txt", DateTime.Now.ToString("yyyy-MM-ddTHH:mm:ss.fff") + " - " + data + Environment.NewLine);
                var message = Message.FromJson(data);

                switch (message.Method)
                {
                case "Fetch.requestPaused":
                {
                    var fetch     = Fetch.FromJson(data);
                    var requestId = fetch.Params.RequestId;
                    var url       = fetch.Params.Request.Url;


                    if (!IsRegExMatch(urlBlacklist, url, out var matchedPattern) || url.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase))
                    {
                        asyncLogging.Enqueue($"The url '{url}' has been allowed");
                        var fetchContinue = new Message {
                            Method = "Fetch.continueRequest"
                        };
                        fetchContinue.Parameters.Add("requestId", requestId);
                        _pageConnection.SendAsync(fetchContinue).GetAwaiter();
                    }
                    else
                    {
                        asyncLogging.Enqueue($"The url '{url}' has been blocked by url blacklist pattern '{matchedPattern}'");

                        var fetchFail = new Message {
                            Method = "Fetch.failRequest"
                        };
                        fetchFail.Parameters.Add("requestId", requestId);
                        // Failed, Aborted, TimedOut, AccessDenied, ConnectionClosed, ConnectionReset, ConnectionRefused,
                        // ConnectionAborted, ConnectionFailed, NameNotResolved, InternetDisconnected, AddressUnreachable,
                        // BlockedByClient, BlockedByResponse
                        fetchFail.Parameters.Add("errorReason", "BlockedByClient");
                        _pageConnection.SendAsync(fetchFail).GetAwaiter();
                    }

                    break;
                }

                default:
                {
                    var page = PageEvent.FromJson(data);

                    if (!uri.IsFile)
                    {
                        switch (page.Method)
                        {
                        // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without
                        // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to
                        // detect a fully-loaded page).
                        case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded":
                            if (mediaLoadTimeout.HasValue)
                            {
                                mediaLoadTimeoutCancellationToken = new CancellationToken();
                                mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value,
                                                                  mediaLoadTimeoutCancellationToken);
                                if (mediaLoadTimeoutTask != null)
                                {
                                    // ReSharper disable once PossibleNullReferenceException
                                    await mediaLoadTimeoutTask;
                                }

                                asyncLogging.Enqueue($"Media load timed out after {mediaLoadTimeout.Value} milliseconds");

                                waitEvent.Set();
                            }
                            else
                            {
                                waitEvent.Set();
                            }

                            break;

                        case "Page.frameStoppedLoading":
                            waitEvent.Set();
                            break;
                        }
                    }
                    else
                    {
                        switch (page.Method)
                        {
                        // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without
                        // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to
                        // detect a fully-loaded page).
                        case "Page.domContentEventFired":
                        {
                            if (mediaLoadTimeout.HasValue)
                            {
                                mediaLoadTimeoutCancellationToken = new CancellationToken();
                                mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value,
                                                                  mediaLoadTimeoutCancellationToken);
                                if (mediaLoadTimeoutTask != null)
                                {
                                    // ReSharper disable once PossibleNullReferenceException
                                    await mediaLoadTimeoutTask;
                                }

                                asyncLogging.Enqueue($"Media load timed out after {mediaLoadTimeout.Value} milliseconds");

                                waitEvent.Set();
                            }

                            break;
                        }

                        case "Page.loadEventFired":
                            waitEvent.Set();
                            break;
                        }
                    }

                    break;
                }
                }
            }

            _pageConnection.MessageReceived += async(sender, data) => await MessageReceived(data);

            // Enable Fetch when we want to blacklist certain URL's
            if (urlBlacklist?.Count > 0)
            {
                Logger.WriteToLog("Enabling Fetch to block url's that are in the url blacklist'");
                _pageConnection.SendAsync(new Message {
                    Method = "Fetch.enable"
                }).GetAwaiter();
            }

            _pageConnection.SendAsync(new Message {
                Method = "Page.enable"
            }).GetAwaiter();
            _pageConnection.Closed += (sender, args) => waitEvent.Set();

            var pageNavigateMessage = new Message {
                Method = "Page.navigate"
            };

            pageNavigateMessage.AddParameter("url", uri.ToString());

            _pageConnection.SendAsync(pageNavigateMessage).GetAwaiter();

            if (countdownTimer != null)
            {
                waitEvent.WaitOne(countdownTimer.MillisecondsLeft);
                if (countdownTimer.MillisecondsLeft == 0)
                {
                    throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timed out");
                }
            }
            else
            {
                waitEvent.WaitOne();
            }

            if (mediaLoadTimeoutCancellationToken != null)
            {
                mediaLoadTimeoutTask?.Wait(mediaLoadTimeoutCancellationToken);
            }

            while (asyncLogging.TryDequeue(out var message))
            {
                Logger.WriteToLog(message);
            }

            // ReSharper disable once EventUnsubscriptionViaAnonymousDelegate
            _pageConnection.MessageReceived -= async(sender, data) => await MessageReceived(data);

            // Disable Fetch again if it was enabled
            if (urlBlacklist?.Count > 0)
            {
                _pageConnection.SendAsync(new Message {
                    Method = "Fetch.disable"
                }).GetAwaiter();
            }

            _pageConnection.SendAsync(new Message {
                Method = "Page.disable"
            }).GetAwaiter();
        }
Ejemplo n.º 4
0
        /// <summary>
        ///     Instructs Chrome to navigate to the given <paramref name="uri" />
        /// </summary>
        /// <param name="uri"></param>
        /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then
        ///     the method will raise an <see cref="ConversionTimedOutException"/> if the
        ///     <see cref="CountdownTimer"/> reaches zero before finishing navigation</param>
        /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded
        ///     event has fired. After a timeout the NavigateTo method will exit as if the page
        ///     has been completely loaded</param>
        /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param>
        /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
        /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception>
        public void NavigateTo(
            Uri uri,
            CountdownTimer countdownTimer = null,
            int?mediaLoadTimeout          = null,
            List <string> urlBlacklist    = null)
        {
            var waitEvent = new ManualResetEvent(false);
            var mediaLoadTimeoutCancellationTokenSource = new CancellationTokenSource();
            var absoluteUri         = uri.AbsoluteUri.Substring(0, uri.AbsoluteUri.LastIndexOf('/') + 1);
            var navigationError     = string.Empty;
            var waitforNetworkIdle  = false;
            var mediaTimeoutTaskSet = false;

            var messageHandler = new EventHandler <string>(delegate(object sender, string data)
            {
                //System.IO.File.AppendAllText("d:\\logs.txt", DateTime.Now.ToString("yyyy-MM-ddTHH:mm:ss.fff") + " - " + data + Environment.NewLine);
                var message = Message.FromJson(data);

                switch (message.Method)
                {
                case "Fetch.requestPaused":
                    {
                        var fetch     = Fetch.FromJson(data);
                        var requestId = fetch.Params.RequestId;
                        var url       = fetch.Params.Request.Url;

                        if (!IsRegExMatch(urlBlacklist, url, out var matchedPattern) ||
                            url.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase))
                        {
                            WriteToLog($"The url '{url}' has been allowed");
                            var fetchContinue = new Message {
                                Method = "Fetch.continueRequest"
                            };
                            fetchContinue.Parameters.Add("requestId", requestId);
                            _pageConnection.SendAsync(fetchContinue).GetAwaiter();
                        }
                        else
                        {
                            WriteToLog($"The url '{url}' has been blocked by url blacklist pattern '{matchedPattern}'");

                            var fetchFail = new Message {
                                Method = "Fetch.failRequest"
                            };
                            fetchFail.Parameters.Add("requestId", requestId);

                            // Failed, Aborted, TimedOut, AccessDenied, ConnectionClosed, ConnectionReset, ConnectionRefused,
                            // ConnectionAborted, ConnectionFailed, NameNotResolved, InternetDisconnected, AddressUnreachable,
                            // BlockedByClient, BlockedByResponse
                            fetchFail.Parameters.Add("errorReason", "BlockedByClient");
                            _pageConnection.SendAsync(fetchFail).GetAwaiter();
                        }

                        break;
                    }

                default:
                    {
                        var page = PageEvent.FromJson(data);

                        switch (page.Method)
                        {
                        // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without
                        // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to
                        // detect a fully-loaded page).
                        case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded":
                            if (mediaLoadTimeout.HasValue && !mediaTimeoutTaskSet)
                            {
                                try
                                {
                                    Task.Run(async delegate
                                    {
                                        await Task.Delay(mediaLoadTimeout.Value, mediaLoadTimeoutCancellationTokenSource.Token);
                                        WriteToLog($"Media load timed out after {mediaLoadTimeout.Value} milliseconds");
                                        waitEvent?.Set();
                                    }, mediaLoadTimeoutCancellationTokenSource.Token);

                                    mediaTimeoutTaskSet = true;
                                }
                                catch
                                {
                                    // Ignore
                                }
                            }

                            break;

                        case "Page.frameNavigated":
                            WriteToLog("The 'Page.frameNavigated' event has been fired, waiting for the 'Page.lifecycleEvent' with name 'networkIdle'");
                            waitforNetworkIdle = true;
                            break;

                        case "Page.lifecycleEvent" when page.Params?.Name == "networkIdle" && waitforNetworkIdle:
                            WriteToLog("The 'Page.lifecycleEvent' event with name 'networkIdle' has been fired, the page is now fully loaded");
                            waitEvent?.Set();
                            break;

                        default:
                            var pageNavigateResponse = PageNavigateResponse.FromJson(data);
                            if (!string.IsNullOrEmpty(pageNavigateResponse.Result?.ErrorText) &&
                                !pageNavigateResponse.Result.ErrorText.Contains("net::ERR_BLOCKED_BY_CLIENT"))
                            {
                                navigationError = $"{pageNavigateResponse.Result.ErrorText} occured when navigating to the page '{uri}'";
                                waitEvent?.Set();
                            }

                            break;
                        }

                        break;
                    }
                }
            });

            _pageConnection.MessageReceived += messageHandler;
            _pageConnection.Closed          += (sender, args) => waitEvent?.Set();

            // Enable Fetch when we want to blacklist certain URL's
            if (urlBlacklist?.Count > 0)
            {
                WriteToLog("Enabling Fetch to block url's that are in the url blacklist'");
                _pageConnection.SendAsync(new Message {
                    Method = "Fetch.enable"
                }).GetAwaiter();
            }

            _pageConnection.SendAsync(new Message {
                Method = "Page.enable"
            }).GetAwaiter();

            var lifecycleEventEnabledMessage = new Message {
                Method = "Page.setLifecycleEventsEnabled"
            };

            lifecycleEventEnabledMessage.AddParameter("enabled", true);
            _pageConnection.SendAsync(lifecycleEventEnabledMessage).GetAwaiter();

            var pageNavigateMessage = new Message {
                Method = "Page.navigate"
            };

            pageNavigateMessage.AddParameter("url", uri.ToString());

            _pageConnection.SendAsync(pageNavigateMessage).GetAwaiter();

            if (countdownTimer != null)
            {
                waitEvent.WaitOne(countdownTimer.MillisecondsLeft);
                if (countdownTimer.MillisecondsLeft == 0)
                {
                    throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timed out");
                }
            }
            else
            {
                waitEvent.WaitOne();
            }

            _pageConnection.MessageReceived -= messageHandler;

            if (mediaTimeoutTaskSet)
            {
                mediaLoadTimeoutCancellationTokenSource.Cancel();
                mediaLoadTimeoutCancellationTokenSource.Dispose();
            }

            var lifecycleEventDisableddMessage = new Message {
                Method = "Page.setLifecycleEventsEnabled"
            };

            lifecycleEventDisableddMessage.AddParameter("enabled", false);

            _pageConnection.SendAsync(lifecycleEventDisableddMessage).GetAwaiter();
            _pageConnection.SendAsync(new Message {
                Method = "Page.disable"
            }).GetAwaiter();

            // Disable Fetch again if it was enabled
            if (urlBlacklist?.Count > 0)
            {
                _pageConnection.SendAsync(new Message {
                    Method = "Fetch.disable"
                }).GetAwaiter();
            }

            waitEvent.Dispose();
            waitEvent = null;

            if (!string.IsNullOrEmpty(navigationError))
            {
                WriteToLog(navigationError);
                throw new ChromeNavigationException(navigationError);
            }
        }
Ejemplo n.º 5
0
        /// <summary>
        ///     Instructs Chrome to navigate to the given <paramref name="uri" />
        /// </summary>
        /// <param name="uri"></param>
        /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then
        ///     the method will raise an <see cref="ConversionTimedOutException"/> if the
        ///     <see cref="CountdownTimer"/> reaches zero before finishing navigation</param>
        /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded
        /// event has fired. After a timeout the NavigateTo method will exit as if the page
        /// has been completely loaded</param>
        /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception>
        /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception>
        public void NavigateTo(Uri uri, CountdownTimer countdownTimer = null, int?mediaLoadTimeout = null)
        {
            _pageConnection.SendAsync(new Message {
                Method = "Page.enable"
            }).GetAwaiter();

            var message = new Message {
                Method = "Page.navigate"
            };

            message.AddParameter("url", uri.ToString());

            var  waitEvent            = new ManualResetEvent(false);
            Task mediaLoadTimeoutTask = null;
            CancellationToken mediaLoadTimeoutCancellationToken;

            async Task MessageReceived(string data)
            {
                var page = PageEvent.FromJson(data);

                if (!uri.IsFile)
                {
                    switch (page.Method)
                    {
                    // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without
                    // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to
                    // detect a fully-loaded page).
                    case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded":
                        if (mediaLoadTimeout.HasValue)
                        {
                            mediaLoadTimeoutCancellationToken = new CancellationToken();
                            mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value,
                                                              mediaLoadTimeoutCancellationToken);
                            if (mediaLoadTimeoutTask != null)
                            {
                                await mediaLoadTimeoutTask;
                            }

                            Logger.WriteToLog($"Media load timed out after {mediaLoadTimeout.Value} milliseconds");

                            waitEvent.Set();
                        }
                        else
                        {
                            waitEvent.Set();
                        }
                        break;

                    case "Page.frameStoppedLoading":
                        waitEvent.Set();
                        break;
                    }
                }
                else
                {
                    switch (page.Method)
                    {
                    // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without
                    // waiting for stylesheets, images, and subframes to finish loading (the load event can be used to
                    // detect a fully-loaded page).
                    case "Page.domContentEventFired":
                    {
                        if (mediaLoadTimeout.HasValue)
                        {
                            mediaLoadTimeoutCancellationToken = new CancellationToken();
                            mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value,
                                                              mediaLoadTimeoutCancellationToken);
                            if (mediaLoadTimeoutTask != null)
                            {
                                await mediaLoadTimeoutTask;
                            }

                            Logger.WriteToLog($"Media load timed out after {mediaLoadTimeout.Value} milliseconds");

                            waitEvent.Set();
                        }

                        break;
                    }

                    case "Page.loadEventFired":
                        waitEvent.Set();
                        break;
                    }
                }
            }

            _pageConnection.MessageReceived += async(sender, data) => await MessageReceived(data);

            _pageConnection.Closed += (sender, args) => waitEvent.Set();
            _pageConnection.SendAsync(message).GetAwaiter();

            if (countdownTimer != null)
            {
                waitEvent.WaitOne(countdownTimer.MillisecondsLeft);
                if (countdownTimer.MillisecondsLeft == 0)
                {
                    throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timed out");
                }
            }
            else
            {
                waitEvent.WaitOne();
            }

            if (mediaLoadTimeoutCancellationToken != null)
            {
                mediaLoadTimeoutTask?.Wait(mediaLoadTimeoutCancellationToken);
            }

            // ReSharper disable once EventUnsubscriptionViaAnonymousDelegate
            _pageConnection.MessageReceived -= async(sender, data) => await MessageReceived(data);

            _pageConnection.SendAsync(new Message {
                Method = "Page.disable"
            }).GetAwaiter();
        }