/// <summary> /// Beging getting document by the url. /// </summary> /// <param name="url"></param> /// <returns></returns> public bool Navigate(string url) { try { IE_result = false; lock (completed_urls) { completed_urls.Clear(); } init_loading(url); init_loading_progress(url); IeRoutines.Invoke(browser, () => { browser.Navigate(url); }); IE_result = true; return(IE_result); } catch (ThreadAbortException) { } catch (Exception error) { web_routine_status = WebRoutineStatus.EXCEPTION; Log.Error(error.Message + "\nURL: " + url + "\nIE using"); } IE_result = false; return(IE_result); }
//********************************************************************************************* //********************************************************************************************* //********************************************************************************************* public IEnumerable <HtmlElement> GetHtmlElementsByAttr(string attribute, string value = null, string tag = null, HtmlElement parent_he = null) { if (parent_he == null) { parent_he = HtmlDoc.Body; } return(IeRoutines.GetHtmlElementsByAttr(parent_he, attribute, value, tag)); }
public void Dispose() { IeRoutines.Invoke(browser, () => { try { browser.Stop(); browser.Dispose(); } catch { } }); }
/// <summary> /// Get complete document by the url. /// </summary> /// <param name="url"></param> /// <param name="timeout_in_mss"></param> /// <param name="return_if_exists"></param> /// <returns></returns> public bool GetDoc(string url, int timeout_in_mss = -1, Func <object> return_if_exists = null) { try { IE_result = false; if (UseCache && Cache.GetCachedFile(url, null, out binary_result, out ResponseUrl, out CachedFile)) { web_routine_status = WebRoutineStatus.CACHED; Log.Write("From cache: " + url); IE_result = true; return(true); } if (Navigate(url)) { if (return_if_exists == null) { IE_result = WaitForCompletion(timeout_in_mss); } else { if (timeout_in_mss < 0) { timeout_in_mss = Settings.Browser.PageCompletedTimeoutInSeconds * 1000; } IE_result = (IeRoutines.WaitForCondition(browser, return_if_exists, timeout_in_mss) != null); } browser.Invoke(() => { HtmlDoc = browser.Document; }); } return(IE_result); } catch (ThreadAbortException) { } catch (Exception error) { web_routine_status = WebRoutineStatus.EXCEPTION; Log.Error(error.Message + "\nURL: " + url + "\nIE using"); } IE_result = false; return(IE_result); }
public IeRoutine(WebBrowser browser) { IeRoutines.Invoke(browser, () => { if (_CloseWebBrowserDialogsAutomatically) { browser.ScriptErrorsSuppressed = true; WindowInterceptor.AddOwnerWindow(browser.Handle); } browser.Navigating += new System.Windows.Forms.WebBrowserNavigatingEventHandler(Browser_Navigating); browser.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(Browser_DocumentCompleted); browser.NewWindow += new System.ComponentModel.CancelEventHandler(browser_NewWindow); browser.ProgressChanged += new WebBrowserProgressChangedEventHandler(browser_ProgressChanged); this.browser = browser; //needed to make google (and probably other sites) work correctly //if (!InternetExplorerBrowserEmulation.IsBrowserEmulationSet()) // InternetExplorerBrowserEmulation.SetBrowserEmulationVersion(); }); }
internal void CloseIE() { try { IeRoutines.Invoke(browser, () => { if (browser != null) { GetDoc("about:blank"); browser.Dispose(); browser = null; } }); } catch (ThreadAbortException) { } catch (Exception error) { Log.Error(error); } }
/// <summary> /// Universal method to load page /// </summary> bool _do(HttpRequest http_request, bool send_cookies) { Stream res_stream = null; try { init_loading(http_request.Url); if (UseCache) { if (Cache.GetCachedFile(http_request.Url, http_request.PostString, out binary_result, out ResponseUrl, out CachedFile)) { web_routine_status = WebRoutineStatus.CACHED; Log.Write("From cache: " + http_request.Url); return(true); } } init_loading_progress(http_request.Url); if (http_request.Method == HttpRequest.RequestMethod.POST) { System.Net.ServicePointManager.Expect100Continue = false; } HttpWebRequest req = (HttpWebRequest)WebRequest.Create(http_request.Url); req.Credentials = credential_cache; req.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; if (send_cookies) { if (UseIeCookies) { Uri uri = new Uri(http_request.Url); string cookie = IeRoutines.RetrieveIeCookies(uri); if (cookie.Length > 0) { cookie = Regex.Replace(cookie, ";", ","); lock (CookieContainer) CookieContainer.SetCookies(uri, cookie); } } req.CookieContainer = CookieContainer; } if (Proxy != null) { req.Proxy = Proxy.WebProxy; } req.Timeout = Settings.Web.HttpRequestTimeoutInSeconds * 1000; req.ReadWriteTimeout = Settings.Web.HttpRequestTimeoutInSeconds * 1000; if (HttpRequest.MaxAutoRedirectionCount == 0) { req.AllowAutoRedirect = false; } else if (HttpRequest.MaxAutoRedirectionCount > 0) { req.AllowAutoRedirect = true; req.MaximumAutomaticRedirections = HttpRequest.MaxAutoRedirectionCount; } foreach (KeyValuePair <string, string> header in http_request.Headers) { if (header.Value == null) { continue; } switch (header.Key) { case "User-Agent": req.UserAgent = header.Value; break; case "Accept": req.Accept = header.Value; break; case "Referer": req.Referer = header.Value; break; case "Connection": if (Regex.IsMatch(header.Value, "keep-alive", RegexOptions.IgnoreCase)) { req.KeepAlive = true; } break; case "Content-Type": req.ContentType = header.Value; break; case "Expect": req.Expect = header.Value; break; default: req.Headers[header.Key] = header.Value; break; } } if (http_request.Method == HttpRequest.RequestMethod.POST) { req.Method = "POST"; if (http_request.PostData != null) { req.ContentLength = http_request.PostData.Length; Stream req_stream = req.GetRequestStream(); req_stream.Write(http_request.PostData, 0, http_request.PostData.Length); req_stream.Close(); } } HWResponse = (HttpWebResponse)req.GetResponse(); ResponseUrl = HWResponse.ResponseUri.ToString(); if (HWResponse.StatusCode == HttpStatusCode.Redirect) { web_routine_status = WebRoutineStatus.REDIRECTION; throw new Exception("Redirected.\nURL:" + http_request.Url); } if (send_cookies) { lock (CookieContainer) CookieContainer.Add(HWResponse.Cookies); } string accept; if (http_request.Headers.TryGetValue("Accept", out accept) && accept == Settings.Web.TextModeHttpRequestAcceptHeader && !Regex.IsMatch(HWResponse.ContentType, Settings.Web.TextModeDownloadableContentTypePattern, RegexOptions.Compiled | RegexOptions.IgnoreCase) ) { web_routine_status = WebRoutineStatus.UNACCEPTABLE_CONTENT_TYPE; throw new Exception("Unacceptable Content-Type:" + HWResponse.ContentType + "\nURL:" + http_request.Url); } MemoryStream result_ms = new MemoryStream(); int progress_max = (int)HWResponse.ContentLength; res_stream = HWResponse.GetResponseStream(); byte[] buff = new byte[8192]; int total_byte_count = 0; while (true) { int byte_count = res_stream.Read(buff, 0, buff.Length); if (byte_count < 1) { break; } result_ms.Write(buff, 0, byte_count); total_byte_count += byte_count; show_progress(progress_max, total_byte_count); if (Settings.Web.MaxDownloadedFileLength > 0 && total_byte_count > Settings.Web.MaxDownloadedFileLength ) { web_routine_status = WebRoutineStatus.FILE_TRUNCATED; Log.Write("TRUNCATED. URL:" + http_request.Url); break; } } binary_result = result_ms.ToArray(); //if (res.StatusCode != HttpStatusCode.OK) //{ // web_routine_status = WebRoutineStatus.DOWNLOAD_ERROR; // string proxy = ""; // if (Proxy != null && Proxy.Address != null && Proxy.Address.Authority != null) // proxy = Proxy.Address.Authority; // Log.Error("Download error: " + res.StatusDescription + "\nURL:" + url + "\nPROXY: " + proxy); // return false; //} if (web_routine_status == WebRoutineStatus.UNDEFINED) { web_routine_status = WebRoutineStatus.OK; } bool content_is_text = false; if (Regex.IsMatch(HWResponse.ContentType, "text|json|xml", RegexOptions.Compiled | RegexOptions.IgnoreCase)) { content_is_text = true; } CachedFile = Cache.CacheDownloadedFile(content_is_text, http_request.Url, http_request.PostString, ResponseUrl, BinaryResult, get_next_page_number(), cycle_identifier, web_routine_status); return(true); } catch (ThreadAbortException) { //Thread.ResetAbort(); } catch (System.Net.WebException error) { string proxy = ""; if (Proxy != null && Proxy.WebProxy.Address != null && Proxy.WebProxy.Address.Authority != null) { proxy = Proxy.WebProxy.Address.Authority; } ErrorMessage = error.Message; if (web_routine_status == WebRoutineStatus.UNDEFINED) { web_routine_status = WebRoutineStatus.DOWNLOAD_ERROR; } Log.Write("DOWNLOAD ERROR: " + error.Message + "\n" + error.StackTrace + "\nPROXY: " + proxy); } catch (Exception error) { ErrorMessage = error.Message; if (web_routine_status == WebRoutineStatus.UNDEFINED) { web_routine_status = WebRoutineStatus.DOWNLOAD_ERROR; } Log.Error(error); } finally { if (res_stream != null) { res_stream.Close(); } if (HWResponse != null) { HWResponse.Close(); } } if (ErrorMessage == null) { return(false); } return(false); }