Example #1
0
 /// <summary>
 /// Beging getting document by the url.
 /// </summary>
 /// <param name="url"></param>
 /// <returns></returns>
 public bool Navigate(string url)
 {
     try
     {
         IE_result = false;
         lock (completed_urls)
         {
             completed_urls.Clear();
         }
         init_loading(url);
         init_loading_progress(url);
         IeRoutines.Invoke(browser, () => { browser.Navigate(url); });
         IE_result = true;
         return(IE_result);
     }
     catch (ThreadAbortException)
     {
     }
     catch (Exception error)
     {
         web_routine_status = WebRoutineStatus.EXCEPTION;
         Log.Error(error.Message + "\nURL: " + url + "\nIE using");
     }
     IE_result = false;
     return(IE_result);
 }
Example #2
0
        //*********************************************************************************************
        //*********************************************************************************************
        //*********************************************************************************************

        public IEnumerable <HtmlElement> GetHtmlElementsByAttr(string attribute, string value = null, string tag = null, HtmlElement parent_he = null)
        {
            if (parent_he == null)
            {
                parent_he = HtmlDoc.Body;
            }
            return(IeRoutines.GetHtmlElementsByAttr(parent_he, attribute, value, tag));
        }
Example #3
0
 public void Dispose()
 {
     IeRoutines.Invoke(browser, () =>
     {
         try
         {
             browser.Stop();
             browser.Dispose();
         }
         catch { }
     });
 }
Example #4
0
        /// <summary>
        /// Get complete document by the url.
        /// </summary>
        /// <param name="url"></param>
        /// <param name="timeout_in_mss"></param>
        /// <param name="return_if_exists"></param>
        /// <returns></returns>
        public bool GetDoc(string url, int timeout_in_mss = -1, Func <object> return_if_exists = null)
        {
            try
            {
                IE_result = false;

                if (UseCache && Cache.GetCachedFile(url, null, out binary_result, out ResponseUrl, out CachedFile))
                {
                    web_routine_status = WebRoutineStatus.CACHED;
                    Log.Write("From cache: " + url);
                    IE_result = true;
                    return(true);
                }

                if (Navigate(url))
                {
                    if (return_if_exists == null)
                    {
                        IE_result = WaitForCompletion(timeout_in_mss);
                    }
                    else
                    {
                        if (timeout_in_mss < 0)
                        {
                            timeout_in_mss = Settings.Browser.PageCompletedTimeoutInSeconds * 1000;
                        }
                        IE_result = (IeRoutines.WaitForCondition(browser, return_if_exists, timeout_in_mss) != null);
                    }
                    browser.Invoke(() => { HtmlDoc = browser.Document; });
                }
                return(IE_result);
            }
            catch (ThreadAbortException)
            {
            }
            catch (Exception error)
            {
                web_routine_status = WebRoutineStatus.EXCEPTION;
                Log.Error(error.Message + "\nURL: " + url + "\nIE using");
            }
            IE_result = false;
            return(IE_result);
        }
Example #5
0
        public IeRoutine(WebBrowser browser)
        {
            IeRoutines.Invoke(browser, () =>
            {
                if (_CloseWebBrowserDialogsAutomatically)
                {
                    browser.ScriptErrorsSuppressed = true;
                    WindowInterceptor.AddOwnerWindow(browser.Handle);
                }
                browser.Navigating        += new System.Windows.Forms.WebBrowserNavigatingEventHandler(Browser_Navigating);
                browser.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(Browser_DocumentCompleted);
                browser.NewWindow         += new System.ComponentModel.CancelEventHandler(browser_NewWindow);
                browser.ProgressChanged   += new WebBrowserProgressChangedEventHandler(browser_ProgressChanged);
                this.browser = browser;

                //needed to make google (and probably other sites) work correctly
                //if (!InternetExplorerBrowserEmulation.IsBrowserEmulationSet())
                //    InternetExplorerBrowserEmulation.SetBrowserEmulationVersion();
            });
        }
Example #6
0
 internal void CloseIE()
 {
     try
     {
         IeRoutines.Invoke(browser, () =>
         {
             if (browser != null)
             {
                 GetDoc("about:blank");
                 browser.Dispose();
                 browser = null;
             }
         });
     }
     catch (ThreadAbortException)
     {
     }
     catch (Exception error)
     {
         Log.Error(error);
     }
 }
Example #7
0
        /// <summary>
        /// Universal method to load page
        /// </summary>
        bool _do(HttpRequest http_request, bool send_cookies)
        {
            Stream res_stream = null;

            try
            {
                init_loading(http_request.Url);
                if (UseCache)
                {
                    if (Cache.GetCachedFile(http_request.Url, http_request.PostString, out binary_result, out ResponseUrl, out CachedFile))
                    {
                        web_routine_status = WebRoutineStatus.CACHED;
                        Log.Write("From cache: " + http_request.Url);
                        return(true);
                    }
                }

                init_loading_progress(http_request.Url);

                if (http_request.Method == HttpRequest.RequestMethod.POST)
                {
                    System.Net.ServicePointManager.Expect100Continue = false;
                }

                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(http_request.Url);

                req.Credentials = credential_cache;

                req.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;

                if (send_cookies)
                {
                    if (UseIeCookies)
                    {
                        Uri    uri    = new Uri(http_request.Url);
                        string cookie = IeRoutines.RetrieveIeCookies(uri);
                        if (cookie.Length > 0)
                        {
                            cookie = Regex.Replace(cookie, ";", ",");
                            lock (CookieContainer)
                                CookieContainer.SetCookies(uri, cookie);
                        }
                    }

                    req.CookieContainer = CookieContainer;
                }

                if (Proxy != null)
                {
                    req.Proxy = Proxy.WebProxy;
                }

                req.Timeout          = Settings.Web.HttpRequestTimeoutInSeconds * 1000;
                req.ReadWriteTimeout = Settings.Web.HttpRequestTimeoutInSeconds * 1000;

                if (HttpRequest.MaxAutoRedirectionCount == 0)
                {
                    req.AllowAutoRedirect = false;
                }
                else if (HttpRequest.MaxAutoRedirectionCount > 0)
                {
                    req.AllowAutoRedirect            = true;
                    req.MaximumAutomaticRedirections = HttpRequest.MaxAutoRedirectionCount;
                }

                foreach (KeyValuePair <string, string> header in http_request.Headers)
                {
                    if (header.Value == null)
                    {
                        continue;
                    }
                    switch (header.Key)
                    {
                    case "User-Agent":
                        req.UserAgent = header.Value;
                        break;

                    case "Accept":
                        req.Accept = header.Value;
                        break;

                    case "Referer":
                        req.Referer = header.Value;
                        break;

                    case "Connection":
                        if (Regex.IsMatch(header.Value, "keep-alive", RegexOptions.IgnoreCase))
                        {
                            req.KeepAlive = true;
                        }
                        break;

                    case "Content-Type":
                        req.ContentType = header.Value;
                        break;

                    case "Expect":
                        req.Expect = header.Value;
                        break;

                    default:
                        req.Headers[header.Key] = header.Value;
                        break;
                    }
                }

                if (http_request.Method == HttpRequest.RequestMethod.POST)
                {
                    req.Method = "POST";
                    if (http_request.PostData != null)
                    {
                        req.ContentLength = http_request.PostData.Length;

                        Stream req_stream = req.GetRequestStream();
                        req_stream.Write(http_request.PostData, 0, http_request.PostData.Length);
                        req_stream.Close();
                    }
                }

                HWResponse = (HttpWebResponse)req.GetResponse();

                ResponseUrl = HWResponse.ResponseUri.ToString();

                if (HWResponse.StatusCode == HttpStatusCode.Redirect)
                {
                    web_routine_status = WebRoutineStatus.REDIRECTION;
                    throw new Exception("Redirected.\nURL:" + http_request.Url);
                }

                if (send_cookies)
                {
                    lock (CookieContainer)
                        CookieContainer.Add(HWResponse.Cookies);
                }

                string accept;
                if (http_request.Headers.TryGetValue("Accept", out accept) &&
                    accept == Settings.Web.TextModeHttpRequestAcceptHeader &&
                    !Regex.IsMatch(HWResponse.ContentType, Settings.Web.TextModeDownloadableContentTypePattern, RegexOptions.Compiled | RegexOptions.IgnoreCase)
                    )
                {
                    web_routine_status = WebRoutineStatus.UNACCEPTABLE_CONTENT_TYPE;
                    throw new Exception("Unacceptable Content-Type:" + HWResponse.ContentType + "\nURL:" + http_request.Url);
                }

                MemoryStream result_ms = new MemoryStream();

                int progress_max = (int)HWResponse.ContentLength;

                res_stream = HWResponse.GetResponseStream();

                byte[] buff             = new byte[8192];
                int    total_byte_count = 0;
                while (true)
                {
                    int byte_count = res_stream.Read(buff, 0, buff.Length);

                    if (byte_count < 1)
                    {
                        break;
                    }

                    result_ms.Write(buff, 0, byte_count);

                    total_byte_count += byte_count;

                    show_progress(progress_max, total_byte_count);

                    if (Settings.Web.MaxDownloadedFileLength > 0 &&
                        total_byte_count > Settings.Web.MaxDownloadedFileLength
                        )
                    {
                        web_routine_status = WebRoutineStatus.FILE_TRUNCATED;
                        Log.Write("TRUNCATED. URL:" + http_request.Url);
                        break;
                    }
                }

                binary_result = result_ms.ToArray();

                //if (res.StatusCode != HttpStatusCode.OK)
                //{
                //    web_routine_status = WebRoutineStatus.DOWNLOAD_ERROR;
                //    string proxy = "";
                //    if (Proxy != null && Proxy.Address != null && Proxy.Address.Authority != null)
                //        proxy = Proxy.Address.Authority;
                //    Log.Error("Download error: " + res.StatusDescription + "\nURL:" + url + "\nPROXY: " + proxy);
                //    return false;
                //}

                if (web_routine_status == WebRoutineStatus.UNDEFINED)
                {
                    web_routine_status = WebRoutineStatus.OK;
                }

                bool content_is_text = false;
                if (Regex.IsMatch(HWResponse.ContentType, "text|json|xml", RegexOptions.Compiled | RegexOptions.IgnoreCase))
                {
                    content_is_text = true;
                }
                CachedFile = Cache.CacheDownloadedFile(content_is_text, http_request.Url, http_request.PostString, ResponseUrl, BinaryResult, get_next_page_number(), cycle_identifier, web_routine_status);

                return(true);
            }
            catch (ThreadAbortException)
            {
                //Thread.ResetAbort();
            }
            catch (System.Net.WebException error)
            {
                string proxy = "";
                if (Proxy != null && Proxy.WebProxy.Address != null && Proxy.WebProxy.Address.Authority != null)
                {
                    proxy = Proxy.WebProxy.Address.Authority;
                }

                ErrorMessage = error.Message;
                if (web_routine_status == WebRoutineStatus.UNDEFINED)
                {
                    web_routine_status = WebRoutineStatus.DOWNLOAD_ERROR;
                }

                Log.Write("DOWNLOAD ERROR: " + error.Message + "\n" + error.StackTrace + "\nPROXY: " + proxy);
            }
            catch (Exception error)
            {
                ErrorMessage = error.Message;
                if (web_routine_status == WebRoutineStatus.UNDEFINED)
                {
                    web_routine_status = WebRoutineStatus.DOWNLOAD_ERROR;
                }
                Log.Error(error);
            }
            finally
            {
                if (res_stream != null)
                {
                    res_stream.Close();
                }
                if (HWResponse != null)
                {
                    HWResponse.Close();
                }
            }

            if (ErrorMessage == null)
            {
                return(false);
            }
            return(false);
        }