public string xpath_crwal(string url, string xpath0) { /// <summary> /// 利用phantomjs.exe来完成爬虫 /// 需要在工具--NuGet 程序包管理器--安装几个包 /// Selenium.PhantomJS.WebDriver和Selenium.WebDriver /// </summary> /// <param name="url">数据的网址</param> /// <param name="xpath0">数据的xpath</param> /// <returns></returns> PhantomJSDriverService service = PhantomJSDriverService.CreateDefaultService(); var options = new PhantomJSOptions(); options.AddAdditionalCapability("phantomjs.page.settings.userAgent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0"); service.HideCommandPromptWindow = true; // 隐藏dos窗口 var driver1 = new PhantomJSDriver(service, options); driver1.Navigate().GoToUrl(url); ReadOnlyCollection <IWebElement> res = driver1.FindElementsByXPath(xpath0); // 搜索嘛,结果肯定是一个数组 string res_text; if (res.Count != 0) { res_text = res[0].Text; driver1.Quit(); } else { res_text = "NULL"; } return(res_text); }
public IEnumerable <Activity> GetActivities() { NavigateToSelectActivity(); var webElements = PhantomJsDriver.FindElementsByXPath("//input[contains(@name,'MainContent$activitiesGrid')]"); var activities = webElements.Select(x => new Activity() { Id = x.GetAttribute("id"), Text = x.GetAttribute("value") }); return(activities); }
static void Main(string[] args) { var driver = new PhantomJSDriver(); driver.Url = "https://robertsspaceindustries.com/ship-matrix"; driver.Navigate(); //the driver can now provide you with what you need (it will execute the script) //get the source of the page var source = driver.PageSource; //fully navigate the dom //var el1 = driver.FindElementByXPath("//*[@id=\"shipscontainer\"]/div[1]/div[2]/div[1]/p"); ReadOnlyCollection <IWebElement> names = driver.FindElementsByXPath("//*[@id=\"shipscontainer\"]/div"); StringBuilder sb = new StringBuilder(); /* foreach (IWebElement el in names) * { * Console.Write(el.Text); * */ Console.Write(names[2].Text); //driver.Close(); //*[@id="shipscontainer"]/div[2]/div[2]/div[1]/p /*for (var i = 1; i < c; i++) * { * // var name = ele.FindElement(By.XPath("//p")); * var pathElement = driver.FindElementByXPath("//*[@id=\"shipscontainer\"]/div[" + i + "]/div[2]/div[1]/p"); * Console.WriteLine(pathElement.Text); * }*/ Console.ReadLine(); }
/// <summary> /// Our objective from here is to get the json data in the source and save it. We'll pull it apart later. /// </summary> /// <param name="args"></param> static void Main(string[] args) { try { Console.WriteLine("Hit enter to start the magic....."); Console.ReadLine(); string uid = "*****@*****.**", pwd = "leblanc2016"; // PhantomJSOptions options = new PhantomJSOptions(); // driver.Manage().Timeouts().ImplicitWait = implicitWait; driver.Manage().Timeouts().PageLoad = pageLoadWait; //I can't really say what this might do but f**k it, why not? driver.Manage().Window.Size = new System.Drawing.Size(1920, 1080); driver.Navigate().GoToUrl("http://www.loopnet.com/xNet/MainSite/User/customlogin.aspx?LinkCode=31824"); //var bigAssTextBox = driver.FindElementByName("geography"); //Console.WriteLine("This element has this for a class value: " + bigAssTextBox.GetAttribute("class")); //login driver.FindElement(By.Name("ctlLogin$LogonEmail")).SendKeys(uid); driver.FindElement(By.Name("ctlLogin$LogonPassword")).SendKeys(pwd); driver.FindElement(By.Id("ctlLogin_btnLogon")).Click(); //Go to the searches page // driver.Navigate().GoToUrl("http://www.loopnet.com/xNet/MainSite/Listing/SavedSearches/MySavedSearches_FSFL.aspx?LinkCode=29400"); // //Get the search names first, then get their urls // var submarketNamesCollection = driver.FindElement(By.ClassName("savedSearchContainer")).FindElements(By.XPath("./tbody/tr/td[2]")); var searchLinkElements = driver.FindElementsByXPath("//*[@id='form1']/div[5]/div/div/table/tbody/tr/td[1]/div/a[1]"); //Spin up a collection to hold our data from here on out // List <BaseSearch> recoveredSearches = new List <BaseSearch>(); if (submarketNamesCollection.Count != searchLinkElements.Count) { throw new Exception($"Submarket/Search names count: {submarketNamesCollection.Count}. Doesn't equal recovered link elements count: {searchLinkElements.Count}"); } for (int i = 0; i < submarketNamesCollection.Count; i++) { recoveredSearches.Add(new BaseSearch() { Name = submarketNamesCollection[i].Text, BaseResultsURL = searchLinkElements[i].GetAttribute("href") }); } //Iterate through the results and do your thing for (int searchIndex = 0; searchIndex < recoveredSearches.Count; searchIndex++) { var currentSearch = recoveredSearches[searchIndex]; driver.Navigate().GoToUrl(currentSearch.BaseResultsURL); //Property name is in the title attribute of these link elements var propertyNamesList = driver.FindElements(By.XPath("//*[@id='placardSec']//h5[@class = 'listing-address']/a")).Select(x => x.GetAttribute("title")).ToList <string>(); //Let's get the building class since they need that. May also need broker info. var possibleBldgClasses = driver.FindElements(By.XPath("//*[@id='placardSec']/div[2]/div/article/div[1]/section[2]/div[1]/ul/li[3]/i")).Select(x => x.Text.Trim()).ToList <string>(); //Make sure the classes list and names list are 1 to 1 if (propertyNamesList.Count != possibleBldgClasses.Count) { throw new Exception($"The property names list count: {propertyNamesList.Count} does not match the Bldg Class candidate list count: {possibleBldgClasses.Count}"); } for (int tempIndex = 0; tempIndex < propertyNamesList.Count; tempIndex++) { currentSearch.Listings.Add(new Listing() { PropertyName = propertyNamesList[tempIndex], BldgClass = char.IsLetter(possibleBldgClasses[tempIndex][0]) ? possibleBldgClasses[tempIndex] : "N/A" }); } //Broker info. Deal with that later. //Click the create reports button // driver.FindElement(By.XPath("/html/body/section/main/section/div/section[1]//div[@class='toolbar-right']/div/button")).Click(); //Select all reports // bool firstTry = true; bool lastPage = false; while (!lastPage) { //We're already on the page for the first group we need to select, so we don't go to the next one on the first go around // if (!firstTry) { //firstTry = false; FlipDriverTimeout(true); var nextPageLinkContainer = driver.FindElements(By.CssSelector("a.caret-right-large")); FlipDriverTimeout(false); if (nextPageLinkContainer?.Count > 0) { nextPageLinkContainer[0].Click(); } else { lastPage = true; } } firstTry = false; //Select all the elements then circle around to the next page and repeat. // //driver.GetScreenshot(); var selectAllButton = driver.FindElement(By.XPath("//button[text()='Select all']")); ((IJavaScriptExecutor)driver).ExecuteScript("arguments[0].click();", selectAllButton); //OpenQA.Selenium.Interactions.Actions actions = new OpenQA.Selenium.Interactions.Actions(driver); //actions.MoveToElement(selectAllButton); //actions.Perform(); // //selectAllButton.Click(); } //Onward to our report. Click the big red generate reports button. driver.FindElement(By.XPath("//button[text()='Generate Reports']")).Click(); // //Select listing summary report radio button driver.FindElement(By.Id("listingSummary")).Click(); // driver.FindElement(By.Id("btnCreateReport1")).Click(); // driver.SwitchTo().Frame("reportFrame"); //Get that dirty JSON string source = driver.PageSource; source = source.Substring(source.IndexOf("\"Data\":{\"Report\":")); source = source.Substring(0, source.IndexOf("Config={")).Trim(); //If this is indeed valid JSON, save it. currentSearch.rawJSON = source; // if (Directory.Exists(jsonOutputDirectory) == false) { Directory.CreateDirectory(jsonOutputDirectory); } // File.WriteAllText(jsonOutputDirectory + "//SearchData_" + searchIndex, currentSearch.rawJSON); } } catch (Exception ex) { Console.WriteLine("Explosion: " + ex.Message + Environment.NewLine + ex.StackTrace + Environment.NewLine); } finally { driver.Close(); Console.WriteLine("Tear down complete, strike [ENTER] to exit."); } }
//Get Bet Types and scrape the shit out of them public void betTypes() { takeScreenshot(); string a = "//div[@id='bettype-tabs']//li[@style='display: block;']//a"; // string a = "//div[@id='bettype-tabs']//a"; var activeLink = driver.FindElementByXPath("//div[@id='bettype-tabs']//li[@class=' active']").Text; switchBettype(activeLink); var hidden = "//div[@id='bettype-tabs']//li[14]//a"; var li = driver.FindElementsByXPath(a).ToList(); var hid = driver.FindElementsByXPath(hidden).ToList(); string js; Bettypes = new Dictionary <string, string>(); foreach (var link in li) { js = link.GetAttribute("onmousedown"); js = js.Split(';')[0]; Bettypes[link.Text] = js; } foreach (var link in hid) { try { js = link.GetAttribute("onmousedown"); js = js.Split(';')[1]; Bettypes[link.Text] = js; } catch (Exception w) { } } foreach (var dict in Bettypes) { try { switchBettype(dict); } catch (Exception e) { Console.WriteLine("/////////////////LOG: Exception retrieving " + dict.Key + " From" + url); } } }