Exemplo n.º 1
0
        public string xpath_crwal(string url, string xpath0)
        {
            /// <summary>
            /// 利用phantomjs.exe来完成爬虫
            /// 需要在工具--NuGet 程序包管理器--安装几个包
            /// Selenium.PhantomJS.WebDriver和Selenium.WebDriver
            /// </summary>
            /// <param name="url">数据的网址</param>
            /// <param name="xpath0">数据的xpath</param>
            /// <returns></returns>
            PhantomJSDriverService service = PhantomJSDriverService.CreateDefaultService();
            var options = new PhantomJSOptions();

            options.AddAdditionalCapability("phantomjs.page.settings.userAgent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0");
            service.HideCommandPromptWindow = true; // 隐藏dos窗口
            var driver1 = new PhantomJSDriver(service, options);

            driver1.Navigate().GoToUrl(url);


            ReadOnlyCollection <IWebElement> res = driver1.FindElementsByXPath(xpath0); // 搜索嘛,结果肯定是一个数组
            string res_text;

            if (res.Count != 0)
            {
                res_text = res[0].Text;
                driver1.Quit();
            }
            else
            {
                res_text = "NULL";
            }
            return(res_text);
        }
Exemplo n.º 2
0
        public IEnumerable <Activity> GetActivities()
        {
            NavigateToSelectActivity();

            var webElements = PhantomJsDriver.FindElementsByXPath("//input[contains(@name,'MainContent$activitiesGrid')]");

            var activities = webElements.Select(x => new Activity()
            {
                Id   = x.GetAttribute("id"),
                Text = x.GetAttribute("value")
            });

            return(activities);
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            var driver = new PhantomJSDriver();

            driver.Url = "https://robertsspaceindustries.com/ship-matrix";
            driver.Navigate();
            //the driver can now provide you with what you need (it will execute the script)
            //get the source of the page
            var source = driver.PageSource;
            //fully navigate the dom
            //var el1 = driver.FindElementByXPath("//*[@id=\"shipscontainer\"]/div[1]/div[2]/div[1]/p");


            ReadOnlyCollection <IWebElement> names = driver.FindElementsByXPath("//*[@id=\"shipscontainer\"]/div");

            StringBuilder sb = new StringBuilder();

            /* foreach (IWebElement el in names)
             * {
             *   Console.Write(el.Text);
             *
             */

            Console.Write(names[2].Text);

            //driver.Close();

            //*[@id="shipscontainer"]/div[2]/div[2]/div[1]/p

            /*for (var i = 1; i < c; i++)
             * {
             *  // var name = ele.FindElement(By.XPath("//p"));
             *  var pathElement = driver.FindElementByXPath("//*[@id=\"shipscontainer\"]/div[" + i + "]/div[2]/div[1]/p");
             *  Console.WriteLine(pathElement.Text);
             * }*/


            Console.ReadLine();
        }
Exemplo n.º 4
0
 /// <summary>
 /// Our objective from here is to get the json data in the source and save it.  We'll pull it apart later.
 /// </summary>
 /// <param name="args"></param>
 static void Main(string[] args)
 {
     try
     {
         Console.WriteLine("Hit enter to start the magic.....");
         Console.ReadLine();
         string uid = "*****@*****.**", pwd = "leblanc2016";
         //
         PhantomJSOptions options = new PhantomJSOptions();
         //
         driver.Manage().Timeouts().ImplicitWait = implicitWait;
         driver.Manage().Timeouts().PageLoad = pageLoadWait;
         //I can't really say what this might do but f**k it, why not?
         driver.Manage().Window.Size = new System.Drawing.Size(1920, 1080);
         driver.Navigate().GoToUrl("http://www.loopnet.com/xNet/MainSite/User/customlogin.aspx?LinkCode=31824");
         //var bigAssTextBox = driver.FindElementByName("geography");
         //Console.WriteLine("This element has this for a class value: " + bigAssTextBox.GetAttribute("class"));
         //login
         driver.FindElement(By.Name("ctlLogin$LogonEmail")).SendKeys(uid);
         driver.FindElement(By.Name("ctlLogin$LogonPassword")).SendKeys(pwd);
         driver.FindElement(By.Id("ctlLogin_btnLogon")).Click();
         //Go to the searches page
         //
         driver.Navigate().GoToUrl("http://www.loopnet.com/xNet/MainSite/Listing/SavedSearches/MySavedSearches_FSFL.aspx?LinkCode=29400");
         //
         //Get the search names first, then get their urls
         //
         var submarketNamesCollection = driver.FindElement(By.ClassName("savedSearchContainer")).FindElements(By.XPath("./tbody/tr/td[2]"));
         var searchLinkElements       = driver.FindElementsByXPath("//*[@id='form1']/div[5]/div/div/table/tbody/tr/td[1]/div/a[1]");
         //Spin up a collection to hold our data from here on out
         //
         List <BaseSearch> recoveredSearches = new List <BaseSearch>();
         if (submarketNamesCollection.Count != searchLinkElements.Count)
         {
             throw new Exception($"Submarket/Search names count: {submarketNamesCollection.Count}. Doesn't equal recovered link elements count: {searchLinkElements.Count}");
         }
         for (int i = 0; i < submarketNamesCollection.Count; i++)
         {
             recoveredSearches.Add(new BaseSearch()
             {
                 Name = submarketNamesCollection[i].Text, BaseResultsURL = searchLinkElements[i].GetAttribute("href")
             });
         }
         //Iterate through the results and do your thing
         for (int searchIndex = 0; searchIndex < recoveredSearches.Count; searchIndex++)
         {
             var currentSearch = recoveredSearches[searchIndex];
             driver.Navigate().GoToUrl(currentSearch.BaseResultsURL);
             //Property name is in the title attribute of these link elements
             var propertyNamesList = driver.FindElements(By.XPath("//*[@id='placardSec']//h5[@class = 'listing-address']/a")).Select(x => x.GetAttribute("title")).ToList <string>();
             //Let's get the building class since they need that.  May also need broker info.
             var possibleBldgClasses = driver.FindElements(By.XPath("//*[@id='placardSec']/div[2]/div/article/div[1]/section[2]/div[1]/ul/li[3]/i")).Select(x => x.Text.Trim()).ToList <string>();
             //Make sure the classes list and names list are 1 to 1
             if (propertyNamesList.Count != possibleBldgClasses.Count)
             {
                 throw new Exception($"The property names list count: {propertyNamesList.Count} does not match the Bldg Class candidate list count: {possibleBldgClasses.Count}");
             }
             for (int tempIndex = 0; tempIndex < propertyNamesList.Count; tempIndex++)
             {
                 currentSearch.Listings.Add(new Listing()
                 {
                     PropertyName = propertyNamesList[tempIndex],
                     BldgClass    = char.IsLetter(possibleBldgClasses[tempIndex][0]) ? possibleBldgClasses[tempIndex] : "N/A"
                 });
             }
             //Broker info.  Deal with that later.
             //Click the create reports button
             //
             driver.FindElement(By.XPath("/html/body/section/main/section/div/section[1]//div[@class='toolbar-right']/div/button")).Click();
             //Select all reports
             //
             bool firstTry = true;
             bool lastPage = false;
             while (!lastPage)
             {
                 //We're already on the page for the first group we need to select, so we don't go to the next one on the first go around
                 //
                 if (!firstTry)
                 {
                     //firstTry = false;
                     FlipDriverTimeout(true);
                     var nextPageLinkContainer = driver.FindElements(By.CssSelector("a.caret-right-large"));
                     FlipDriverTimeout(false);
                     if (nextPageLinkContainer?.Count > 0)
                     {
                         nextPageLinkContainer[0].Click();
                     }
                     else
                     {
                         lastPage = true;
                     }
                 }
                 firstTry = false;
                 //Select all the elements then circle around to the next page and repeat.
                 //
                 //driver.GetScreenshot();
                 var selectAllButton = driver.FindElement(By.XPath("//button[text()='Select all']"));
                 ((IJavaScriptExecutor)driver).ExecuteScript("arguments[0].click();", selectAllButton);
                 //OpenQA.Selenium.Interactions.Actions actions = new OpenQA.Selenium.Interactions.Actions(driver);
                 //actions.MoveToElement(selectAllButton);
                 //actions.Perform();
                 //
                 //selectAllButton.Click();
             }
             //Onward to our report. Click the big red generate reports button.
             driver.FindElement(By.XPath("//button[text()='Generate Reports']")).Click();
             //
             //Select listing summary report radio button
             driver.FindElement(By.Id("listingSummary")).Click();
             //
             driver.FindElement(By.Id("btnCreateReport1")).Click();
             //
             driver.SwitchTo().Frame("reportFrame");
             //Get that dirty JSON
             string source = driver.PageSource;
             source = source.Substring(source.IndexOf("\"Data\":{\"Report\":"));
             source = source.Substring(0, source.IndexOf("Config={")).Trim();
             //If this is indeed valid JSON, save it.
             currentSearch.rawJSON = source;
             //
             if (Directory.Exists(jsonOutputDirectory) == false)
             {
                 Directory.CreateDirectory(jsonOutputDirectory);
             }
             //
             File.WriteAllText(jsonOutputDirectory + "//SearchData_" + searchIndex, currentSearch.rawJSON);
         }
     }
     catch (Exception ex)
     {
         Console.WriteLine("Explosion: " + ex.Message + Environment.NewLine + ex.StackTrace + Environment.NewLine);
     }
     finally
     {
         driver.Close();
         Console.WriteLine("Tear down complete, strike [ENTER] to exit.");
     }
 }
Exemplo n.º 5
0
        //Get Bet Types and scrape the shit out of them
        public void betTypes()
        {
            takeScreenshot();
            string a = "//div[@id='bettype-tabs']//li[@style='display: block;']//a";
            // string a = "//div[@id='bettype-tabs']//a";
            var activeLink = driver.FindElementByXPath("//div[@id='bettype-tabs']//li[@class=' active']").Text;

            switchBettype(activeLink);


            var hidden = "//div[@id='bettype-tabs']//li[14]//a";
            var li     = driver.FindElementsByXPath(a).ToList();
            var hid    = driver.FindElementsByXPath(hidden).ToList();

            string js;

            Bettypes = new Dictionary <string, string>();

            foreach (var link in li)
            {
                js = link.GetAttribute("onmousedown");
                js = js.Split(';')[0];
                Bettypes[link.Text] = js;
            }
            foreach (var link in hid)
            {
                try
                {
                    js = link.GetAttribute("onmousedown");
                    js = js.Split(';')[1];
                    Bettypes[link.Text] = js;
                }
                catch (Exception w)
                {
                }
            }

            foreach (var dict in Bettypes)
            {
                try
                {
                    switchBettype(dict);
                }
                catch (Exception e)
                {
                    Console.WriteLine("/////////////////LOG: Exception retrieving " + dict.Key + " From" + url);
                }
            }
        }