/// <summary> /// Instructs Chrome to navigate to the given <paramref name="uri" /> /// </summary> /// <param name="uri"></param> /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then /// the method will raise an <see cref="ConversionTimedOutException"/> if the /// <see cref="CountdownTimer"/> reaches zero before finishing navigation</param> /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception> /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception> public void NavigateTo(Uri uri, CountdownTimer countdownTimer = null) { _pageConnection.SendAsync(new Message { Method = "Page.enable" }).GetAwaiter(); var message = new Message { Method = "Page.navigate" }; message.AddParameter("url", uri.ToString()); var waitEvent = new ManualResetEvent(false); void MessageReceived(object sender, string data) { var page = PageEvent.FromJson(data); if (!uri.IsFile) { switch (page.Method) { case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded": case "Page.frameStoppedLoading": waitEvent.Set(); break; } } else if (page.Method == "Page.loadEventFired") { waitEvent.Set(); } } _pageConnection.MessageReceived += MessageReceived; _pageConnection.Closed += (sender, args) => { waitEvent.Set(); }; _pageConnection.SendAsync(message).GetAwaiter(); if (countdownTimer != null) { waitEvent.WaitOne(countdownTimer.MillisecondsLeft); if (countdownTimer.MillisecondsLeft == 0) { throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timedout"); } } else { waitEvent.WaitOne(); } _pageConnection.MessageReceived -= MessageReceived; _pageConnection.SendAsync(new Message { Method = "Page.disable" }).GetAwaiter(); }
/// <summary> /// Instructs Chrome to navigate to the given <paramref name="uri"/> /// </summary> /// <param name="uri"></param> /// <param name="waitForNetworkIdle">Wait until all external sources are loaded</param> /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception> public void NavigateTo(Uri uri, bool waitForNetworkIdle) { WebSocketSend(new Message { Id = MessageId, Method = "Page.enable" }.ToJson()); var localFile = uri.Scheme == "file"; var message = new Message { Id = MessageId, Method = "Page.navigate" }; message.AddParameter("url", uri.ToString()); var loaded = false; _webSocket.MessageReceived += (sender, args) => { //File.AppendAllText("d:\\trace.txt", args.Message + Environment.NewLine); var page = PageEvent.FromJson(args.Message); if (!localFile) { if (waitForNetworkIdle) { if (page.Params?.Name == "networkIdle") { loaded = true; } } else if (page.Method == "Page.lifecycleEvent" && page.Params.Name == "DOMContentLoaded") { loaded = true; } } else if (page.Method == "Page.loadEventFired") { loaded = true; } }; WebSocketSend(message.ToJson()); while (!loaded) { Thread.Sleep(1); } WebSocketSend(new Message { Id = MessageId, Method = "Page.disable" }.ToJson()); }
/// <summary> /// Instructs Chrome to navigate to the given <paramref name="uri" /> /// </summary> /// <param name="uri"></param> /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then /// the method will raise an <see cref="ConversionTimedOutException"/> if the /// <see cref="CountdownTimer"/> reaches zero before finishing navigation</param> /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded /// event has fired. After a timeout the NavigateTo method will exit as if the page /// has been completely loaded</param> /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param> /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception> /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception> public void NavigateTo( Uri uri, CountdownTimer countdownTimer = null, int?mediaLoadTimeout = null, List <string> urlBlacklist = null) { var waitEvent = new ManualResetEvent(false); Task mediaLoadTimeoutTask = null; CancellationToken mediaLoadTimeoutCancellationToken; var asyncLogging = new ConcurrentQueue <string>(); var absoluteUri = uri.AbsoluteUri.Substring(0, uri.AbsoluteUri.LastIndexOf('/') + 1); async Task MessageReceived(string data) { //System.IO.File.AppendAllText("e:\\logs.txt", DateTime.Now.ToString("yyyy-MM-ddTHH:mm:ss.fff") + " - " + data + Environment.NewLine); var message = Message.FromJson(data); switch (message.Method) { case "Fetch.requestPaused": { var fetch = Fetch.FromJson(data); var requestId = fetch.Params.RequestId; var url = fetch.Params.Request.Url; if (!IsRegExMatch(urlBlacklist, url, out var matchedPattern) || url.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase)) { asyncLogging.Enqueue($"The url '{url}' has been allowed"); var fetchContinue = new Message { Method = "Fetch.continueRequest" }; fetchContinue.Parameters.Add("requestId", requestId); _pageConnection.SendAsync(fetchContinue).GetAwaiter(); } else { asyncLogging.Enqueue($"The url '{url}' has been blocked by url blacklist pattern '{matchedPattern}'"); var fetchFail = new Message { Method = "Fetch.failRequest" }; fetchFail.Parameters.Add("requestId", requestId); // Failed, Aborted, TimedOut, AccessDenied, ConnectionClosed, ConnectionReset, ConnectionRefused, // ConnectionAborted, ConnectionFailed, NameNotResolved, InternetDisconnected, AddressUnreachable, // BlockedByClient, BlockedByResponse fetchFail.Parameters.Add("errorReason", "BlockedByClient"); _pageConnection.SendAsync(fetchFail).GetAwaiter(); } break; } default: { var page = PageEvent.FromJson(data); if (!uri.IsFile) { switch (page.Method) { // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to // detect a fully-loaded page). case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded": if (mediaLoadTimeout.HasValue) { mediaLoadTimeoutCancellationToken = new CancellationToken(); mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value, mediaLoadTimeoutCancellationToken); if (mediaLoadTimeoutTask != null) { // ReSharper disable once PossibleNullReferenceException await mediaLoadTimeoutTask; } asyncLogging.Enqueue($"Media load timed out after {mediaLoadTimeout.Value} milliseconds"); waitEvent.Set(); } else { waitEvent.Set(); } break; case "Page.frameStoppedLoading": waitEvent.Set(); break; } } else { switch (page.Method) { // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to // detect a fully-loaded page). case "Page.domContentEventFired": { if (mediaLoadTimeout.HasValue) { mediaLoadTimeoutCancellationToken = new CancellationToken(); mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value, mediaLoadTimeoutCancellationToken); if (mediaLoadTimeoutTask != null) { // ReSharper disable once PossibleNullReferenceException await mediaLoadTimeoutTask; } asyncLogging.Enqueue($"Media load timed out after {mediaLoadTimeout.Value} milliseconds"); waitEvent.Set(); } break; } case "Page.loadEventFired": waitEvent.Set(); break; } } break; } } } _pageConnection.MessageReceived += async(sender, data) => await MessageReceived(data); // Enable Fetch when we want to blacklist certain URL's if (urlBlacklist?.Count > 0) { Logger.WriteToLog("Enabling Fetch to block url's that are in the url blacklist'"); _pageConnection.SendAsync(new Message { Method = "Fetch.enable" }).GetAwaiter(); } _pageConnection.SendAsync(new Message { Method = "Page.enable" }).GetAwaiter(); _pageConnection.Closed += (sender, args) => waitEvent.Set(); var pageNavigateMessage = new Message { Method = "Page.navigate" }; pageNavigateMessage.AddParameter("url", uri.ToString()); _pageConnection.SendAsync(pageNavigateMessage).GetAwaiter(); if (countdownTimer != null) { waitEvent.WaitOne(countdownTimer.MillisecondsLeft); if (countdownTimer.MillisecondsLeft == 0) { throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timed out"); } } else { waitEvent.WaitOne(); } if (mediaLoadTimeoutCancellationToken != null) { mediaLoadTimeoutTask?.Wait(mediaLoadTimeoutCancellationToken); } while (asyncLogging.TryDequeue(out var message)) { Logger.WriteToLog(message); } // ReSharper disable once EventUnsubscriptionViaAnonymousDelegate _pageConnection.MessageReceived -= async(sender, data) => await MessageReceived(data); // Disable Fetch again if it was enabled if (urlBlacklist?.Count > 0) { _pageConnection.SendAsync(new Message { Method = "Fetch.disable" }).GetAwaiter(); } _pageConnection.SendAsync(new Message { Method = "Page.disable" }).GetAwaiter(); }
/// <summary> /// Instructs Chrome to navigate to the given <paramref name="uri" /> /// </summary> /// <param name="uri"></param> /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then /// the method will raise an <see cref="ConversionTimedOutException"/> if the /// <see cref="CountdownTimer"/> reaches zero before finishing navigation</param> /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded /// event has fired. After a timeout the NavigateTo method will exit as if the page /// has been completely loaded</param> /// <param name="urlBlacklist">A list of URL's that need to be blocked (use * as a wildcard)</param> /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception> /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception> public void NavigateTo( Uri uri, CountdownTimer countdownTimer = null, int?mediaLoadTimeout = null, List <string> urlBlacklist = null) { var waitEvent = new ManualResetEvent(false); var mediaLoadTimeoutCancellationTokenSource = new CancellationTokenSource(); var absoluteUri = uri.AbsoluteUri.Substring(0, uri.AbsoluteUri.LastIndexOf('/') + 1); var navigationError = string.Empty; var waitforNetworkIdle = false; var mediaTimeoutTaskSet = false; var messageHandler = new EventHandler <string>(delegate(object sender, string data) { //System.IO.File.AppendAllText("d:\\logs.txt", DateTime.Now.ToString("yyyy-MM-ddTHH:mm:ss.fff") + " - " + data + Environment.NewLine); var message = Message.FromJson(data); switch (message.Method) { case "Fetch.requestPaused": { var fetch = Fetch.FromJson(data); var requestId = fetch.Params.RequestId; var url = fetch.Params.Request.Url; if (!IsRegExMatch(urlBlacklist, url, out var matchedPattern) || url.StartsWith(absoluteUri, StringComparison.InvariantCultureIgnoreCase)) { WriteToLog($"The url '{url}' has been allowed"); var fetchContinue = new Message { Method = "Fetch.continueRequest" }; fetchContinue.Parameters.Add("requestId", requestId); _pageConnection.SendAsync(fetchContinue).GetAwaiter(); } else { WriteToLog($"The url '{url}' has been blocked by url blacklist pattern '{matchedPattern}'"); var fetchFail = new Message { Method = "Fetch.failRequest" }; fetchFail.Parameters.Add("requestId", requestId); // Failed, Aborted, TimedOut, AccessDenied, ConnectionClosed, ConnectionReset, ConnectionRefused, // ConnectionAborted, ConnectionFailed, NameNotResolved, InternetDisconnected, AddressUnreachable, // BlockedByClient, BlockedByResponse fetchFail.Parameters.Add("errorReason", "BlockedByClient"); _pageConnection.SendAsync(fetchFail).GetAwaiter(); } break; } default: { var page = PageEvent.FromJson(data); switch (page.Method) { // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to // detect a fully-loaded page). case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded": if (mediaLoadTimeout.HasValue && !mediaTimeoutTaskSet) { try { Task.Run(async delegate { await Task.Delay(mediaLoadTimeout.Value, mediaLoadTimeoutCancellationTokenSource.Token); WriteToLog($"Media load timed out after {mediaLoadTimeout.Value} milliseconds"); waitEvent?.Set(); }, mediaLoadTimeoutCancellationTokenSource.Token); mediaTimeoutTaskSet = true; } catch { // Ignore } } break; case "Page.frameNavigated": WriteToLog("The 'Page.frameNavigated' event has been fired, waiting for the 'Page.lifecycleEvent' with name 'networkIdle'"); waitforNetworkIdle = true; break; case "Page.lifecycleEvent" when page.Params?.Name == "networkIdle" && waitforNetworkIdle: WriteToLog("The 'Page.lifecycleEvent' event with name 'networkIdle' has been fired, the page is now fully loaded"); waitEvent?.Set(); break; default: var pageNavigateResponse = PageNavigateResponse.FromJson(data); if (!string.IsNullOrEmpty(pageNavigateResponse.Result?.ErrorText) && !pageNavigateResponse.Result.ErrorText.Contains("net::ERR_BLOCKED_BY_CLIENT")) { navigationError = $"{pageNavigateResponse.Result.ErrorText} occured when navigating to the page '{uri}'"; waitEvent?.Set(); } break; } break; } } }); _pageConnection.MessageReceived += messageHandler; _pageConnection.Closed += (sender, args) => waitEvent?.Set(); // Enable Fetch when we want to blacklist certain URL's if (urlBlacklist?.Count > 0) { WriteToLog("Enabling Fetch to block url's that are in the url blacklist'"); _pageConnection.SendAsync(new Message { Method = "Fetch.enable" }).GetAwaiter(); } _pageConnection.SendAsync(new Message { Method = "Page.enable" }).GetAwaiter(); var lifecycleEventEnabledMessage = new Message { Method = "Page.setLifecycleEventsEnabled" }; lifecycleEventEnabledMessage.AddParameter("enabled", true); _pageConnection.SendAsync(lifecycleEventEnabledMessage).GetAwaiter(); var pageNavigateMessage = new Message { Method = "Page.navigate" }; pageNavigateMessage.AddParameter("url", uri.ToString()); _pageConnection.SendAsync(pageNavigateMessage).GetAwaiter(); if (countdownTimer != null) { waitEvent.WaitOne(countdownTimer.MillisecondsLeft); if (countdownTimer.MillisecondsLeft == 0) { throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timed out"); } } else { waitEvent.WaitOne(); } _pageConnection.MessageReceived -= messageHandler; if (mediaTimeoutTaskSet) { mediaLoadTimeoutCancellationTokenSource.Cancel(); mediaLoadTimeoutCancellationTokenSource.Dispose(); } var lifecycleEventDisableddMessage = new Message { Method = "Page.setLifecycleEventsEnabled" }; lifecycleEventDisableddMessage.AddParameter("enabled", false); _pageConnection.SendAsync(lifecycleEventDisableddMessage).GetAwaiter(); _pageConnection.SendAsync(new Message { Method = "Page.disable" }).GetAwaiter(); // Disable Fetch again if it was enabled if (urlBlacklist?.Count > 0) { _pageConnection.SendAsync(new Message { Method = "Fetch.disable" }).GetAwaiter(); } waitEvent.Dispose(); waitEvent = null; if (!string.IsNullOrEmpty(navigationError)) { WriteToLog(navigationError); throw new ChromeNavigationException(navigationError); } }
/// <summary> /// Instructs Chrome to navigate to the given <paramref name="uri" /> /// </summary> /// <param name="uri"></param> /// <param name="countdownTimer">If a <see cref="CountdownTimer"/> is set then /// the method will raise an <see cref="ConversionTimedOutException"/> if the /// <see cref="CountdownTimer"/> reaches zero before finishing navigation</param> /// <param name="mediaLoadTimeout">When set a timeout will be started after the DomContentLoaded /// event has fired. After a timeout the NavigateTo method will exit as if the page /// has been completely loaded</param> /// <exception cref="ChromeException">Raised when an error is returned by Chrome</exception> /// <exception cref="ConversionTimedOutException">Raised when <paramref name="countdownTimer"/> reaches zero</exception> public void NavigateTo(Uri uri, CountdownTimer countdownTimer = null, int?mediaLoadTimeout = null) { _pageConnection.SendAsync(new Message { Method = "Page.enable" }).GetAwaiter(); var message = new Message { Method = "Page.navigate" }; message.AddParameter("url", uri.ToString()); var waitEvent = new ManualResetEvent(false); Task mediaLoadTimeoutTask = null; CancellationToken mediaLoadTimeoutCancellationToken; async Task MessageReceived(string data) { var page = PageEvent.FromJson(data); if (!uri.IsFile) { switch (page.Method) { // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without // waiting for stylesheets, images, and sub frames to finish loading (the load event can be used to // detect a fully-loaded page). case "Page.lifecycleEvent" when page.Params?.Name == "DOMContentLoaded": if (mediaLoadTimeout.HasValue) { mediaLoadTimeoutCancellationToken = new CancellationToken(); mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value, mediaLoadTimeoutCancellationToken); if (mediaLoadTimeoutTask != null) { await mediaLoadTimeoutTask; } Logger.WriteToLog($"Media load timed out after {mediaLoadTimeout.Value} milliseconds"); waitEvent.Set(); } else { waitEvent.Set(); } break; case "Page.frameStoppedLoading": waitEvent.Set(); break; } } else { switch (page.Method) { // The DOMContentLoaded event is fired when the document has been completely loaded and parsed, without // waiting for stylesheets, images, and subframes to finish loading (the load event can be used to // detect a fully-loaded page). case "Page.domContentEventFired": { if (mediaLoadTimeout.HasValue) { mediaLoadTimeoutCancellationToken = new CancellationToken(); mediaLoadTimeoutTask = Task.Delay(mediaLoadTimeout.Value, mediaLoadTimeoutCancellationToken); if (mediaLoadTimeoutTask != null) { await mediaLoadTimeoutTask; } Logger.WriteToLog($"Media load timed out after {mediaLoadTimeout.Value} milliseconds"); waitEvent.Set(); } break; } case "Page.loadEventFired": waitEvent.Set(); break; } } } _pageConnection.MessageReceived += async(sender, data) => await MessageReceived(data); _pageConnection.Closed += (sender, args) => waitEvent.Set(); _pageConnection.SendAsync(message).GetAwaiter(); if (countdownTimer != null) { waitEvent.WaitOne(countdownTimer.MillisecondsLeft); if (countdownTimer.MillisecondsLeft == 0) { throw new ConversionTimedOutException($"The {nameof(NavigateTo)} method timed out"); } } else { waitEvent.WaitOne(); } if (mediaLoadTimeoutCancellationToken != null) { mediaLoadTimeoutTask?.Wait(mediaLoadTimeoutCancellationToken); } // ReSharper disable once EventUnsubscriptionViaAnonymousDelegate _pageConnection.MessageReceived -= async(sender, data) => await MessageReceived(data); _pageConnection.SendAsync(new Message { Method = "Page.disable" }).GetAwaiter(); }