Beispiel #1
0
        public async Task <Dictionary <string, string> > ParseUrls(string baseURL)
        {
            SoundPlayer player = new SoundPlayer(Environment.CurrentDirectory + "\\Speech On.wav");
            Dictionary <string, string> result = new Dictionary <string, string>();
            HtmlDocument document;
            HtmlWeb      web = new HtmlWeb();

            PagesFounded = false;
            int page     = 1;
            int max_page = 0;

            do
            {
captcha_link:
                document = web.Load(baseURL + page);
                Thread.Sleep(BaseSleepLoading);

                if (document.Text.Contains("«Я не робот»."))
                {
                    player.Play();
                    Console.WriteLine("CAPTCHA");

                    string dsKey = document.DocumentNode.SelectSingleNode("//p[@class='g-recaptcha']").Attributes["data-sitekey"].Value;

                    CapMonsterCloud.CapMonsterClient antcap = new CapMonsterCloud.CapMonsterClient(CapMonsterKey);

                    NoCaptchaTask captchaTask = new NoCaptchaTask()
                    {
                        WebsiteUrl = baseURL + page,
                        WebsiteKey = dsKey
                    };

                    decimal             balance    = antcap.GetBalanceAsync().Result;
                    int                 taskId     = antcap.CreateTaskAsync(captchaTask).Result;
                    NoCaptchaTaskResult taskResult = antcap.GetTaskResultAsync <NoCaptchaTaskResult>(taskId).Result;

                    HtmlNode input = document.DocumentNode.SelectSingleNode("//input[@id='recaptcha-token']");

                    goto captcha_link;
                }
                if (!PagesFounded)
                {
                    int size = Convert.ToInt32(document.DocumentNode.SelectSingleNode("//span[@class='page-navigation__num']")
                                               .InnerText.Replace("из ", string.Empty));
                    int more_100 = (size - (size / 100)) > 0 ? 1 : 0;
                    max_page     = (size / 100) + more_100;
                    PagesFounded = true;
                }
                HtmlNodeCollection titles = document.DocumentNode.SelectNodes("//div[@class='company-item__title']/a");
                foreach (HtmlNode title in titles)
                {
                    HtmlAttribute href = title.Attributes["href"];
                    result.AddWithKey(title.InnerText.Trim(), href.Value);
                }
                Console.WriteLine($"Page {page} parsed");
                page++;
            }while (page <= max_page);

            return(result);
        }
Beispiel #2
0
        public void SolveCaptcha(IWebDriver driver, string URL)
        {
            string dsKey = driver.FindElement(By.XPath("//p[@class='g-recaptcha']")).GetAttribute("data-sitekey");

            CapMonsterCloud.CapMonsterClient antcap = new CapMonsterCloud.CapMonsterClient(CapMonsterKey);

solving:
            try
            {
                NoCaptchaTask captchaTask = new NoCaptchaTask()
                {
                    WebsiteUrl = URL,
                    WebsiteKey = dsKey
                };
                int taskId = antcap.CreateTaskAsync(captchaTask).Result;
                Console.Write($"TaskCreated...\t");
                NoCaptchaTaskResult taskResult = antcap.GetTaskResultAsync <NoCaptchaTaskResult>(taskId).Result;
                Console.WriteLine($"Response retrieved");

                IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
                js.ExecuteScript("document.getElementById('g-recaptcha-response').style.removeProperty('display');");
                IWebElement input = driver.FindElement(By.Id("g-recaptcha-response"));
                input.SendKeys(taskResult.GRecaptchaResponse);
                input.Submit();
            }
            catch (Exception)
            {
                goto solving;
            }
        }