public void scrapCinemaName(string googleURL) { IWebDriver driver = new PhantomJSDriver(); var url = googleURL; driver.Navigate().GoToUrl(url); for (int i = 0; i < 5; i++) { //add current page cinemas var cinemasName = scrapOnePageCinema(driver); //add all allCinemas.AddRange(cinemasName); //Go goto next on current page try { var nextUrl = driver.FindElements(By.PartialLinkText("Next")).Last().GetAttribute("href"); driver.Navigate().GoToUrl(nextUrl); } catch (InvalidOperationException e) { //Console.WriteLine(e.Source); } } //close driver driver.Dispose(); }
public void LoadData() { IWebDriver webDriver = null; State currentState = State.Ready; try { try { webDriver = new PhantomJSDriver(DriverPath); } catch (WebDriverException exc) { throw new DriverStartupException("Impossibile avviare il web driver", exc); } webDriver.Navigate().GoToUrl(LoginUrl); Timer.Wait(2); currentState = State.Login; //Eseguo login IWebElement t_username = webDriver.FindElement(By.Id("username-id")); IWebElement t_password = webDriver.FindElement(By.Id("pwd-id")); IWebElement accesso = webDriver.FindElement(By.Name("login")); t_username.SendKeys(Username); t_password.SendKeys(Password); accesso.Click(); Timer.Wait(3); webDriver.Navigate().GoToUrl(ReportUrl); Timer.Wait(3); //Verifico sia presente il bottone di logout, per essere sicuro di essere loggato try { webDriver.FindElement(By.CssSelector(".logout.btn.btn3d.tbbtn")); } catch (Exception) { throw new ApplicationException("Login in Remedy non riuscito"); } currentState = State.LoggedIn; //Inizio ricerca IWebElement ricercaAvanzata = webDriver.FindElement(By.Id("TBadvancedsearch")); IWebElement queryInput = webDriver.FindElement(By.Id("arid1005")); IWebElement avviaRicerca = webDriver.FindElement(By.Id("TBsearchsavechanges")); ricercaAvanzata.Click(); Timer.Wait(2); queryInput.SendKeys(ReportQuery); Timer.Wait(1); avviaRicerca.Click(); Timer.Wait(10); IWebElement selezionaTicket = webDriver.FindElement(By.ClassName("SelAll")); IWebElement creaReport = webDriver.FindElement(By.ClassName("Rep")); selezionaTicket.Click(); Timer.Wait(5); creaReport.Click(); Timer.Wait(5); if (webDriver.WindowHandles.Count > 1) webDriver.SwitchTo().Window(webDriver.WindowHandles.Last()); else throw new Exception("Finestra navigazione report non trovata"); IWebElement techeReportRow = webDriver.FindElement(By.XPath("//span[text()='Teche Report']")); new Actions(webDriver) .MoveToElement(techeReportRow) .DoubleClick() .Build() .Perform(); Timer.Wait(5); if (webDriver.WindowHandles.Count > 2) webDriver.SwitchTo().Window(webDriver.WindowHandles.Last()); else throw new Exception("Finestra report non trovata"); currentState = State.ReportReady; ReadOnlyCollection<IWebElement> elem = webDriver.FindElements(By.TagName("tr")); Timer.Wait(5); var tickets = elem .Select(row => row.FindElements(By.TagName("td"))) .Where(cells => cells.Count > 0) .Select(cells => new { Ticket = cells[0].Text, Stato = cells[1].Text, DataFineAtt = cells[4].Text, Componente = cells[10].Text, ApertoDa = cells[12].Text, InCaricoA = cells[13].Text, Servizio = cells[14].Text, }) .Select(x => new { ticket = new ReportCollection( Int32.Parse(x.Ticket), x.Stato, String.IsNullOrEmpty((x.DataFineAtt ?? "").Trim()) ? (DateTime?)null : DateTime.Parse(x.DataFineAtt), x.Componente, x.ApertoDa, (x.InCaricoA ?? "").Trim() == String.Empty ? null : mailingList.FindByLoginName(x.InCaricoA), x.Servizio ), IsMAC = String.IsNullOrEmpty((x.DataFineAtt ?? "").Trim()) || x.Servizio == "MAC", }); var result = ( from pair in tickets orderby pair.IsMAC ? 0 : 1 ascending, //MAC per primi pair.ticket.dataFineAtt ascending, //Prima quelli a scadenza prossima pair.ticket.stato.ToLower().Contains("assegnato") ? 1 : 0 //Non assegnati per primi select pair.ticket) .ToArray(); _tickets = new ReadOnlyCollection<ReportCollection>(result); } finally { if (webDriver != null) { //Eseguo il logout per evitare di lasciare aperta la sessione IWebElement logoutButton; switch (currentState) { case State.LoggedIn: webDriver.SwitchTo().Window(webDriver.WindowHandles[0]); logoutButton = webDriver.FindElement(By.CssSelector(".logout.btn.btn3d.tbbtn")); break; case State.ReportReady: webDriver.SwitchTo().Window(webDriver.WindowHandles[1]); logoutButton = webDriver.FindElement(By.CssSelector("[ardbn=\"btnLogout\"]")); break; default: goto LogoutComplete; //A quanto sembra non è possibile fare un semplice return nel finally } new Actions(webDriver) .MoveToElement(logoutButton) .Click() .Build() .Perform(); Timer.Wait(4); LogoutComplete: webDriver.Dispose(); } } }
/// <summary> /// Prerequisite - . /// </summary> public void Logger() { try { //reading the log file generated by the coded ui by launching the phantomjs & generating different logs string totaltime, testOutcome = "Passed"; List<IWebElement> tags; IWebElement Element; string defaultSplunkLogDirectory = null; string splunkLogPath = this.SplunkLogPath(); string testResultsDirectory = LogFilePath(); // string SplunkLogPath = this.SplunkLogPath; this.Testcaseinfo = new TestCaseInfo(this.GetType().GetMethod(TestContext.TestName), this); Testcaseinfo.StartTime = DateTime.Now; IWebDriver driver = new PhantomJSDriver("Externals"); driver.Navigate().GoToUrl("file:///" + testResultsDirectory); tags = driver.FindElements(By.CssSelector(".g-h.passed")).ToList(); try { Element = driver.FindElement(By.CssSelector(".g-h.failed")); IWebElement exception = driver.FindElement(By.ClassName("e-m")); tags.AddRange(driver.FindElements(By.CssSelector(".g-h.failed"))); tags.Add(exception); } catch { Element = null; } if (Element != null) { testOutcome = "Failed"; Testcaseinfo.TestOutCome = testOutcome; } else { Testcaseinfo.TestOutCome = testOutcome; } //Create Directoory string logDirectory = "C:\\TestLog\\" + DateTime.Now.ToString("yyyy_MM_dd") + "\\Result_" + Testcaseinfo.TestName; if (splunkLogPath.Equals("null")) { defaultSplunkLogDirectory = "C:\\TestLog\\SplunkLogs"; } else { defaultSplunkLogDirectory = splunkLogPath; } DirectoryInfo directory = new DirectoryInfo(logDirectory); if (!directory.Exists) { directory.Create(); } DirectoryInfo splunklog = new DirectoryInfo(defaultSplunkLogDirectory); if (!splunklog.Exists) { splunklog.Create(); } DirectoryInfo temp = new DirectoryInfo("C:\\Temp"); if (!temp.Exists) { directory.Create(); } if (File.Exists("C:\\Temp\\Result.txt")) { File.Delete("C:\\Temp\\Result.txt"); } foreach (IWebElement element in tags) { string value = element.GetAttribute("textContent").Remove(element.GetAttribute("textContent").Length - 8); if (Testcaseinfo.TestName == null) { totaltime = element.GetAttribute("textContent").Substring(element.GetAttribute("textContent").Length - 8); ; Testcaseinfo.TestName = value; } else { Logger log = new Logger(); log.GenerateLog(value); } } // Genrating splunk log SplunkXmlTestRunReport splunkxmltestrunreport = new SplunkXmlTestRunReport(); splunkxmltestrunreport.Generate(Testcaseinfo, defaultSplunkLogDirectory); //generating xml log XmlTestRunReport xmlrunreport = new XmlTestRunReport(); xmlrunreport.Generate(Testcaseinfo, logDirectory); //Html log File.Copy(testResultsDirectory, logDirectory + "\\Result.Html", true); //Json log for Future // Killl the PhantomJs Process[] processs = Process.GetProcessesByName("PhantomJS"); if (processs.Count() > 0) { foreach (var process in processs) { process.Kill(); } } } catch(Exception ex) { Logger log = new Logger(); log.GenerateLog(TestContext.TestResultsDirectory); log.GenerateLog(TestContext.TestRunResultsDirectory); log.GenerateLog(TestContext.TestLogsDir); log.GenerateLog(TestContext.ResultsDirectory); DirectoryInfo directory = new DirectoryInfo(this.TestContext.TestResultsDirectory); string fullPath = directory.GetFiles("*.html").First().FullName; log.GenerateLog(fullPath); } }
public void YahooGemini(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>(); List<string> adImages = new List<string>(); List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath("//li[@class='content has-image voh-parent cf']/div/div/div/h3/a")); //NetworkFunctions ns = new NetworkFunctions(); List<NetworkFunctions> List = new List<NetworkFunctions>(); foreach (var v1 in getAdds) { string s2 = v1.GetAttribute("href"); adUrls.Add(s2); //ns.adUrl = s2; //List.Add(ns); try { string s4 = v1.GetAttribute("outerText"); adTitle.Add(s4); //ns.adTitle = s4; //List.Add(ns); } catch { } } ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath("//li[@class='content has-image voh-parent cf']/div/div/img")); foreach (var v2 in getImages) { string s3 = v2.GetAttribute("src"); if (s3 != null) { adImages.Add(s3); //ns.adImage = s3; //List.Add(ns); } try { string s5 = v2.GetAttribute("width"); if (s5 != null) { imgwidth.Add(s5); //ns.imgHeight = s5; //List.Add(ns); } } catch { } try { string s6 = v2.GetAttribute("height"); if (s6 != null) { imgheight.Add(s6); // ns.imgWidth = s6; // List.Add(ns); } } catch { } } ReadOnlyCollection<IWebElement> getID = driver.FindElements(By.XPath("//li[@class='content has-image voh-parent cf']")); foreach (var v3 in getID) { try { string v7 = v3.GetAttribute("data-uuid"); if (v7 != null) { adIds.Add(v7); //ns.adId = v7.Trim(); //List.Add(ns); } } catch { } } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent, time); } } catch (Exception) { } driver.Quit(); driver.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void ZergNetDotcom(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>(); List<string> adImages = new List<string>(); List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath(path1)); //NetworkFunctions ns = new NetworkFunctions();////"//div[@class='zergentity']/div/a" List<NetworkFunctions> List = new List<NetworkFunctions>(); foreach (var v1 in getAdds) { string s2 = v1.GetAttribute("href"); adUrls.Add(s2); try { string s1 = s2.Split('/')[4]; // ns.adId = s1.Trim(); // List.Add(ns); adIds.Add(s1.Trim()); } catch { } try { string s4 = v1.GetAttribute("outerText"); adTitle.Add(s4); // ns.adTitle = s4; // List.Add(ns); } catch { } } ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath(path2));//"//div[@class='zergentity']/a/img" foreach (var v2 in getImages) { string s3 = v2.GetAttribute("src"); adImages.Add(s3); //ns.adImage = s3; //List.Add(ns); // s3 = s3.Split('_','/')[2]; // adIds.Add(s3); try { string s5 = v2.GetAttribute("style"); string width = s5.Split(';', ':')[5]; string height = s5.Split(';', ':')[7]; imgwidth.Add(width); imgheight.Add(height); } catch { } } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent, time); } } catch(Exception) { } driver.Quit(); driver.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void Outbrain(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>(); List<string> adImages = new List<string>(); List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath(path1)); // NetworkFunctions ns = new NetworkFunctions();////a[contains(@onmousedown,'paid.outbrain.com/')] List<NetworkFunctions> List = new List<NetworkFunctions>(); foreach (var v1 in getAdds) { string s2 = v1.GetAttribute("href"); adUrls.Add(s2); //ns.adUrl = s2; //List.Add(ns); } ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath(path2)); foreach (var v2 in getImages)//"//img[@class='ob-rec-image ob-show']" { string s3 = v2.GetAttribute("src"); adImages.Add(s3); string id = s3.Split('/')[8]; adIds.Add(id); //ns.adImage = s3; //List.Add(ns); } ReadOnlyCollection<IWebElement> getID = driver.FindElements(By.XPath(path3)); foreach (var v3 in getID)//"//span[@class='ob-unit ob-rec-text']" { string v7 = v3.GetAttribute("title"); if (v7 != null) { adTitle.Add(v7); // List.Add(ns); } } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent, time); } } catch (Exception) { } driver.Quit(); driver.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void Taboola(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>(); List<string> adImages = new List<string>(); List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath(path1)); // NetworkFunctions ns = new NetworkFunctions(); List<NetworkFunctions> List = new List<NetworkFunctions>(); foreach (var v1 in getAdds) { try { string s1 = v1.GetAttribute("data-item-id"); if (s1.Contains("~")) { s1 = s1.Split('~')[4]; adIds.Add(s1.Trim().Replace("-", "")); //ns.adId = s1; //List.Add(ns); } else { adIds.Add(s1.Trim().Replace("-", "")); //ns.adId = s1.Trim().Replace("-", ""); // List.Add(ns); } } catch { } try { string s4 = v1.GetAttribute("data-item-title").Replace(" ", ""); if (s4 != "") { adTitle.Add(s4); //ns.adTitle = s4; //List.Add(ns); } } catch { } try { string s3 = v1.GetAttribute("data-item-thumb"); if (s3 != null) { adImages.Add(s3); //ns.adImage = s3; //List.Add(ns); } } catch { } } ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath(path2)); foreach (var v2 in getImages) { string s2 = v2.GetAttribute("href"); adUrls.Add(s2); //ns.adUrl = s2; //List.Add(ns); } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent, time); } } catch (Exception) { } driver.Quit(); driver.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void Mgid(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>();////div[@class='text-elements']/div/a //div[@class='mcimg']/a/img List<string> adImages = new List<string>();////div[contains(@class,'mgline teaser')] List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath("//div[@class='text-elements']/div/a")); //NetworkFunctions ns = new NetworkFunctions(); List<NetworkFunctions> List = new List<NetworkFunctions>(); foreach (var v1 in getAdds) { string s2 = v1.GetAttribute("href"); adUrls.Add(s2); //ns.adUrl = s2; ///List.Add(ns); try { string s4 = v1.GetAttribute("outerText"); adTitle.Add(s4); //ns.adTitle = s4; //List.Add(ns); } catch { } } ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath("//div[@class='mcimg']/a/img")); foreach (var v2 in getImages) { string s3 = v2.GetAttribute("src"); if (s3 != null) { adImages.Add(s3); //ns.adImage = s3; //List.Add(ns); s3 = s3.Split('_')[1].Replace(".jpg", ""); string width = s3.Split('x')[0]; string height = s3.Split('x')[1]; imgwidth.Add(width); imgheight.Add(height); } //try //{ // string s5 = v2.GetAttribute("width"); // if (s5 != null) // { // imgwidth.Add(s5); // //ns.imgHeight = s5; // //List.Add(ns); // } //} //catch //{ } //try //{ // string s6 = v2.GetAttribute("height"); // if (s6 != null) // { // imgheight.Add(s6); // // ns.imgWidth = s6; // //List.Add(ns); // } //} //catch //{ } } ReadOnlyCollection<IWebElement> getID = driver.FindElements(By.XPath(path3)); foreach (var v3 in getID) { string v7 = v3.GetAttribute("class"); if (v7 != null) { adIds.Add(v7.Split('-')[1].Trim()); // List.Add(ns); } } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent, time); } } catch (Exception) { } driver.Quit(); driver.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void Adblade(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); var service1 = PhantomJSDriverService.CreateDefaultService(); service1.HideCommandPromptWindow = true; var driver1 = new PhantomJSDriver(service1); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>(); List<string> adImages = new List<string>(); List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath(path1)); //NetworkFunctions ns = new NetworkFunctions(); List<NetworkFunctions> List = new List<NetworkFunctions>(); foreach (var v1 in getAdds) { try { string s2 = v1.GetAttribute("href"); try { driver1.Navigate().GoToUrl(s2); //driver.GetScreenshot().SaveAsFile("image3.png", ImageFormat.Png); IWebElement geturl = driver1.FindElement(By.XPath("//meta[@property='og:url']")); string content = geturl.GetAttribute("content"); adUrls.Add(content); } catch (Exception ex) { adUrls.Add(s2); } } catch { } // ns.adUrl = s2; //List.Add(ns); try { string s4 = v1.GetAttribute("outerText"); adTitle.Add(s4); // ns.adTitle = s4; //List.Add(ns); } catch { } } ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath(path2)); foreach (var v2 in getImages) {//http://staticd.cdn.adblade.com/banners/images/298x224/4365_548f126ed8024.jpg string s3 = v2.GetAttribute("src"); if (s3 != null) { adImages.Add(s3); //ns.adImage = s3; //List.Add(ns); s3 = s3.Split('/')[5]; string width = s3.Split('x')[0]; string height = s3.Split('x')[1]; imgwidth.Add(width); imgheight.Add(height); } } ReadOnlyCollection<IWebElement> getID = driver.FindElements(By.XPath(path3)); foreach (var v3 in getID) { try { string v7 = v3.GetAttribute("data-adid"); if (v7 != null) { adIds.Add(v7); //ns.adId = v7.Trim(); // List.Add(ns); } } catch { } } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent, time); } } catch (Exception) { } driver.Quit(); driver.Dispose(); driver1.Quit(); driver1.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void ContentDotAd(string url, PhantomJSDriver driver, string path1, string path2, string path3, ad_network adN, string agent) { Stopwatch watch = new Stopwatch(); watch.Start(); List<string> adIds = new List<string>(); List<string> adUrls = new List<string>(); List<string> adImages = new List<string>(); List<string> adTitle = new List<string>(); List<string> imgwidth = new List<string>(); List<string> imgheight = new List<string>(); // List<NetworkFunctions> List = new List<NetworkFunctions>(); try { ReadOnlyCollection<IWebElement> getAdds = driver.FindElements(By.XPath(path1)); foreach (var v1 in getAdds) { string s1 = v1.GetAttribute("onclick"); s1 = s1.Split(',')[2].Replace("'", ""); adIds.Add(s1.Trim()); string s2 = v1.GetAttribute("href"); adUrls.Add(s2); }//div[@class='ac_container']/a/img ReadOnlyCollection<IWebElement> getImages = driver.FindElements(By.XPath("//div[@class='ac_container']/a/img")); foreach (var v2 in getImages) { string s3 = v2.GetAttribute("src"); adImages.Add(s3); string s4 = v2.GetAttribute("title"); adTitle.Add(s4); string height = v2.GetAttribute("height"); string width = v2.GetAttribute("width"); imgwidth.Add(height); imgheight.Add(width); } watch.Stop(); string time = watch.Elapsed.ToString(); if (adIds.Count == adImages.Count && adIds.Count == adUrls.Count) { _form1.combineAdd(url, adIds, adUrls, adImages, adTitle, imgwidth, imgheight, adN, agent,time); } } catch(Exception) { } driver.Quit(); driver.Dispose(); adIds = null; adUrls = null; adImages = null; adTitle = null; }
public void ScrapeAdds(string url) { var service = PhantomJSDriverService.CreateDefaultService(); service.HideCommandPromptWindow = true; var driver = new PhantomJSDriver(service); List<string> categories = new List<string>(); List<string> Links = new List<string>(); List<string> Images = new List<string>(); List<string> Titles = new List<string>(); service.IgnoreSslErrors = true; service.LoadImages = false; service.ProxyType = "none"; try { driver.Navigate().GoToUrl(url); driver.GetScreenshot().SaveAsFile("image1.png", ImageFormat.Png); IWebElement body = driver.FindElement(By.TagName("head")); //IWebElement body = driver.FindElement(By.TagName("body")); //string pageSource = body.ToString(); ReadOnlyCollection<IWebElement> getCategories = driver.FindElements(By.XPath("//ul[@id='menu-menu_main']/li/a")); foreach(var cat in getCategories) { string s1 = cat.GetAttribute("href").ToString(); categories.Add(s1); } foreach (var catUrl in categories) { driver.Navigate().GoToUrl(catUrl); ReadOnlyCollection<IWebElement> pgNumber = driver.FindElements(By.XPath("//span[@class='pages']")); string pgnum = pgNumber[0].GetAttribute("outerText"); int pageNumber = int.Parse(pgnum.Split(' ')[3]); for (int i = 1; i < pageNumber; i++) { string categUrl = catUrl; driver.Navigate().GoToUrl(categUrl); ReadOnlyCollection<IWebElement> urls = driver.FindElements(By.XPath("//a[@itemprop='url']")); ReadOnlyCollection<IWebElement> image = driver.FindElements(By.XPath("//img[@class='entry-thumb']")); foreach (var v1 in urls) { string link = v1.GetAttribute("href"); string title = v1.GetAttribute("title"); string img = v1.GetAttribute("src"); Links.Add(link); } foreach (var v2 in image) { string img = v2.GetAttribute("src"); Images.Add(img); } categUrl=categUrl+ "page / "+i+"/"; } } } catch(Exception ex) { } }
/// <summary> /// Scraper statistik for data /// </summary> /// <returns>Liste af statistik-objekter</returns> public List<Politistatistik> ScrapeStatistik() { List<Politistatistik> politistatistik = new List<Politistatistik>(); try { int aarsTal = 2007; using (IWebDriver webDriver = new PhantomJSDriver()) { webDriver.Navigate().GoToUrl("http://www.politistatistik.dk/parameter.aspx?id=27"); webDriver.FindElement(By.XPath("//*[@id='geo00']/optgroup/option[8]")).Click(); webDriver.FindElement(By.XPath("//*[@id='kriminalitet01']/optgroup[2]/option[5]")).Click(); webDriver.FindElement(By.XPath("//*[@id='rightCloBaggr']/div[5]/div[3]/div[2]/input")).Click(); foreach (var aar in webDriver.FindElements(By.XPath("//*[@name='periodeYear']"))) { aar.Click(); } webDriver.FindElement(By.XPath("//*[@id='rightCol']/div[2]/div/div[3]/img")).Click(); webDriver.SwitchTo().Window(webDriver.WindowHandles.Last()); foreach (var item in webDriver.FindElements(By.ClassName("dataitem"))) { politistatistik.Add(new Politistatistik(aarsTal++, item.Text)); } } List<Politistatistik> list = politistatistik.ToList(); TraceHjaelp(new[] { "" }, list.ToString()); return list; } catch (Exception) {} return politistatistik.ToList(); }