コード例 #1
0
ファイル: Program1.cs プロジェクト: namishmaheshwari/WebScrap
        public static void Main1(string[] args)
        {
            DriverExtension webDriver = new DriverExtension();

            using (var driver = new ChromeDriver(@"C:\Users\chill\Downloads\WebScrap"))
            {
                // Go to the home page
                driver.Navigate().GoToUrl("https://camel2.usc.edu/commbook/");
                driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(5);
                driver.Manage().Timeouts().PageLoad     = TimeSpan.FromSeconds(5);
                IWebElement user1 = driver.FindElementByCssSelector("#content > div > div.panel-body > div");

                string        selector = $"//*[@id=\"content\"]/div/div[2]/div/div";
                var           elements = driver.FindElementsByXPath(selector);
                List <School> schools  = new List <School>();

                for (int schoolCount = 1; schoolCount <= elements.Count; schoolCount++)
                {
                    IWebElement element = driver.FindElement(By.XPath(selector + "[" + schoolCount + "]"));
                    string[]    str     = new string[element.FindElements(By.XPath("div[1]/div/a/span")).Count];
                    int         i       = 0;
                    foreach (IWebElement schoolElement in element.FindElements(By.XPath("div[1]/div/a/span")))
                    {
                        str[i] = schoolElement.Text;
                        i++;
                    }
                    School school = new School();
                    school.Name = str[0];
                    school.Dean = str[1];
                    //click
                    element.FindElement(By.XPath("div[1]/div/a")).Click();

                    var           streamElement = element.FindElements(By.XPath("div[2]/div/ul/li"));
                    List <Stream> streamList    = new List <Stream>();

                    for (int streamCount = 1; streamCount <= streamElement.Count; streamCount++)
                    {
                        Stream      stream1 = new Stream();
                        IWebElement stream  = driver.FindElement(By.XPath(selector + "[" + schoolCount + "]/div[2]/div/ul/li[" + streamCount + "]"));
                        webDriver.FindElement(driver, stream, By.XPath("a"), 100);
                        stream1.Name = stream.FindElement(By.XPath("a")).Text;
                        stream.FindElement(By.XPath("a")).Click();
                        string       majorSelector = $"//*[@id=\"content\"]/div/div[2]/ul/li";
                        var          majorElement  = driver.FindElementsByXPath(majorSelector);
                        List <Major> majors        = new List <Major>();
                        for (int majorCount = 1; majorCount <= majorElement.Count; majorCount++)
                        {
                            IWebElement majorIWebElement = driver.FindElement(By.XPath(majorSelector + "[" + majorCount + "]"));
                            Major       major            = new Major();
                            major.Name = majorIWebElement.FindElement(By.XPath("a")).Text;
                            majorIWebElement.FindElement(By.XPath("a")).Click();
                            string         studentSelector = $"//*[@id=\"content\"]/div/div[2]/ul/li";
                            List <Student> students        = new List <Student>();
                            try
                            {
                                webDriver.FindOrThrow(driver, By.XPath("//*[@id=\"content\"]/div/div[2]/ul/span"), 20);
                                Console.WriteLine("No Student Data found for the School:{0} Stream: {1} major:{2}", school.Name, stream1.Name, major.Name);
                            }
                            catch (Exception e)
                            {
                                // Console.WriteLine(e.Message);
                                var studentElement = driver.FindElementsByXPath(studentSelector);
                                for (int studentCount = 1; studentCount <= studentElement.Count; studentCount++)
                                {
                                    webDriver.ClickElement(driver, By.XPath(studentSelector + "[" + studentCount + "]"), 100);
                                    driver.FindElement(By.XPath(studentSelector + "[" + studentCount + "]")).Click();
                                    Student student = new Student();

                                    webDriver.FindElement(driver, By.XPath("//*[@id=\"content\"]/div/ul/li[1]/p[1]/span/span"), 100);
                                    student.Name = driver.FindElement(By.XPath("//*[@id=\"content\"]/div/ul/li[1]/p[1]/span/span")).Text;


                                    webDriver.FindElements(driver, By.XPath("//*[@id=\"content\"]/div/ul/li[2]/p/span"), 100);
                                    var majorStudentElement = driver.FindElements(By.XPath("//*[@id=\"content\"]/div/ul/li[2]/p/span"));
                                    foreach (IWebElement majorStudent in majorStudentElement)
                                    {
                                        webDriver.FindElement(driver, majorStudent, By.XPath("span"), 100);
                                        string maj = majorStudent.FindElement(By.XPath("span")).Text;
                                        student.Major.Add(maj);
                                    }

                                    webDriver.FindElements(driver, By.XPath("//*[@id=\"content\"]/div/ul/li[3]/p/span"), 100);
                                    var schoolStudentElement = driver.FindElements(By.XPath("//*[@id=\"content\"]/div/ul/li[3]/p/span"));
                                    foreach (IWebElement schoolStudent in schoolStudentElement)
                                    {
                                        webDriver.FindElement(driver, schoolStudent, By.XPath("span"), 100);
                                        string sch = schoolStudent.FindElement(By.XPath("span")).Text;
                                        student.School.Add(sch);
                                    }

                                    webDriver.FindElements(driver, By.XPath("//*[@id=\"content\"]/div/ul/li[4]/p/span"), 100);
                                    var degreeStudentElement = driver.FindElements(By.XPath("//*[@id=\"content\"]/div/ul/li[4]/p/span"));
                                    foreach (IWebElement degreeStudent in degreeStudentElement)
                                    {
                                        webDriver.FindElement(driver, degreeStudent, By.XPath("span"), 100);
                                        string deg = degreeStudent.FindElement(By.XPath("span")).Text;
                                        student.Degree.Add(deg);
                                    }

                                    string additionalInfoSelector = $"//*[@id=\"content\"]/div/ul/li[5]/p/span";
                                    //webDriver.FindElements(driver, By.XPath(additionalInfoSelector), 1000);
                                    var           additionalInfoElement = driver.FindElementsByXPath(additionalInfoSelector);
                                    List <string> additionalInfoList    = new List <string>();
                                    foreach (IWebElement additionalInfoIWebElement in additionalInfoElement)
                                    {
                                        webDriver.FindElement(driver, additionalInfoIWebElement, By.XPath("span"), 100);
                                        additionalInfoList.Add(additionalInfoIWebElement.FindElement(By.XPath("span")).Text);
                                    }
                                    student.additionalInfo = additionalInfoList;
                                    students.Add(student);
                                    webDriver.ClickElement(driver, By.XPath("//*[@id=\"breadcrumb\"]/ol/li[3]/a"), 100);
                                    driver.FindElement(By.XPath("//*[@id=\"breadcrumb\"]/ol/li[3]/a")).Click();
                                }
                            }
                            major.Students = students;
                            majors.Add(major);
                            webDriver.ClickElement(driver, By.XPath("//*[@id=\"breadcrumb\"]/ol/li[2]/a"), 100);
                            driver.FindElement(By.XPath("//*[@id=\"breadcrumb\"]/ol/li[2]/a")).Click();
                        }
                        stream1.Majors = majors;
                        streamList.Add(stream1);
                        webDriver.ClickElement(driver, By.XPath("//*[@id=\"breadcrumb\"]/ol/li[1]/a"), 100);
                        driver.FindElement(By.XPath("//*[@id=\"breadcrumb\"]/ol/li[1]/a")).Click();

                        driver.FindElement(By.XPath(selector + "[" + schoolCount + "]/div[1]/div/a")).Click();
                    }
                    school.Streams = streamList;
                    schools.Add(school);
                    break;
                }

                System.IO.File.WriteAllText(@"D:/student-details.txt", string.Join(" \n ", schools));

                foreach (School school in schools)
                {
                    Console.WriteLine(school.Name);
                    Console.WriteLine(school.Dean);
                    foreach (Stream stream in school.Streams)
                    {
                        Console.WriteLine(stream.Name);
                        foreach (Major major in stream.Majors)
                        {
                            Console.WriteLine(major.Name);
                            foreach (Student student in major.Students)
                            {
                                Console.WriteLine(student.Name);
                                Console.WriteLine(string.Join(",", student.Degree));
                                Console.WriteLine(string.Join(",", student.Major));
                                Console.WriteLine(string.Join(",", student.School));
                                Console.WriteLine(string.Join(",", student.additionalInfo));
                            }
                        }
                    }
                }
            }
        }
コード例 #2
0
        public static void Main(string[] args)
        {
            Environment.SetEnvironmentVariable("webdriver.gecko.driver", "/usr/local/Cellar/geckodriver/0.23.0");
            Environment.SetEnvironmentVariable("ASPNETCORE_ENVIRONMENT", "Development");
            DriverExtension webDriver = new DriverExtension();
            FirefoxOptions  options   = new FirefoxOptions();

            //options.AddArguments("--headless");
            //using (var driver = new FirefoxDriver(options))
            using (var driver = new ChromeDriver(@"/Users/namishmaheshwari/Documents/work/"))
            {
                // Go to the home page
                driver.Navigate().GoToUrl("http://oweb7-vm.usc.edu/CommBookNg/schools");
                driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(55);
                driver.Manage().Timeouts().PageLoad     = TimeSpan.FromSeconds(20);
                string        selector = $"/html/body/app-root/div/div[2]/app-school-list/mat-card/mat-nav-list/mat-list-item";
                var           elements = driver.FindElementsByXPath(selector);
                List <School> schools  = new List <School>();
                foreach (IWebElement element in elements)
                {
                    string selectorSchoolName = "div/div[2]/span[1]/span";
                    string selectorSchoolDean = "div/div[2]/span[2]";
                    School school             = new School();
                    school.Name = element.FindElement(By.XPath(selectorSchoolName)).Text;
                    school.Dean = element.FindElement(By.XPath(selectorSchoolDean)).Text.Split(":")[1].Trim();

                    element.Click();

                    string        selectorStream = "/html/body/app-root/div/div[2]/app-degree-list/mat-card/mat-nav-list/mat-list-item";
                    var           elementsStream = driver.FindElementsByXPath(selectorStream);
                    List <Stream> streamList     = new List <Stream>();
                    for (int i = 1; i <= elementsStream.Count; i++)
                    {
                        Stream stream = new Stream();
                        stream.Name = driver.FindElementByXPath(selectorStream + "[" + i + "]" + "/div/div[2]/span").Text;

                        driver.FindElementByXPath(selectorStream + "[" + i + "]").Click();

                        string       selectorMajor = "/html/body/app-root/div/div[2]/app-major-list/mat-card/mat-nav-list/mat-list-item";
                        var          elementsMajor = driver.FindElementsByXPath(selectorMajor);
                        List <Major> majorList     = new List <Major>();
                        for (int majorCount = 1; majorCount <= elementsMajor.Count; majorCount++)
                        {
                            Major major = new Major();
                            major.Name = driver.FindElementByXPath(selectorMajor + "[" + majorCount + "]" + "/div/div[2]/span").Text;
                            webDriver.ClickElement(driver, By.XPath(selectorMajor + "[" + majorCount + "]"), 100);
                            driver.FindElementByXPath(selectorMajor + "[" + majorCount + "]").Click();


                            string selectorStudent = "/html/body/app-root/div/div[2]/app-degree-candidate-list/mat-card/mat-nav-list/mat-list-item";
                            var    elementsStudent = driver.FindElementsByXPath(selectorStudent);
                            //var elementsStudent = webDriver.FindElements(driver, By.XPath(selectorStudent), 1000);
                            List <Student> studentList = new List <Student>();
                            for (int studentCount = 1; studentCount <= elementsStudent.Count; studentCount++)
                            {
                                Student student = new Student();

                                webDriver.ClickElement(driver, By.XPath(selectorStudent + "[" + studentCount + "]"), 1000);
                                driver.FindElementByXPath(selectorStudent + "[" + studentCount + "]").Click();
                                string selectorStudentDetails = "/html/body/app-root/div/div[2]/app-degree-candidate-detail/mat-card/mat-card-content/div/div/mat-card";

                                webDriver.FindElement(driver, By.XPath(selectorStudentDetails + "/mat-card-title"), 1000);
                                student.Name = driver.FindElementByXPath(selectorStudentDetails + "/mat-card-title").Text;

                                string selectorStudentMajor = "/mat-list[1]/mat-list-item";
                                webDriver.FindElements(driver, By.XPath(selectorStudentDetails + selectorStudentMajor), 1000);
                                var elementsStudentMajors = driver.FindElementsByXPath(selectorStudentDetails + selectorStudentMajor);
                                foreach (IWebElement ele in elementsStudentMajors)
                                {
                                    webDriver.FindElement(driver, ele, By.XPath("div"), 1000);
                                    string maj = ele.FindElement(By.XPath("div")).Text;
                                    student.Major.Add(maj);
                                }

                                string selectorStudentSchool = "/mat-list[2]/mat-list-item";
                                webDriver.FindElements(driver, By.XPath(selectorStudentDetails + selectorStudentSchool), 1000);
                                var elementsStudentSchools = driver.FindElementsByXPath(selectorStudentDetails + selectorStudentSchool);
                                foreach (IWebElement ele in elementsStudentSchools)
                                {
                                    webDriver.FindElement(driver, ele, By.XPath("div"), 1000);
                                    string sch = ele.FindElement(By.XPath("div")).Text;
                                    student.School.Add(sch);
                                }

                                string selectorStudentDegree = "/mat-list[3]/mat-list-item";
                                webDriver.FindElements(driver, By.XPath(selectorStudentDetails + selectorStudentDegree), 1000);
                                var elementsStudentDegree = driver.FindElementsByXPath(selectorStudentDetails + selectorStudentDegree);
                                foreach (IWebElement ele in elementsStudentDegree)
                                {
                                    webDriver.FindElement(driver, ele, By.XPath("div"), 1000);
                                    string deg = ele.FindElement(By.XPath("div")).Text;
                                    student.Degree.Add(deg);
                                }

                                driver.Navigate().Back();
                                //driver.FindElementByXPath("/html/body/app-root/div/div[1]/mat-toolbar/mat-toolbar-row[2]/div/div[1]/button").Click();
                                studentList.Add(student);
                            }
                            driver.Navigate().Back();
                            //webDriver.ClickElement(driver, By.XPath("/html/body/app-root/div/div[1]/mat-toolbar/mat-toolbar-row[2]/div/div[1]/button"), 1000).Click();
                            //driver.FindElementByXPath("/html/body/app-root/div/div[1]/mat-toolbar/mat-toolbar-row[2]/div/div[1]/button").Click();
                            major.Students = studentList;
                            majorList.Add(major);
                        }
                        driver.Navigate().Back();
                        //webDriver.ClickElement(driver, By.XPath("/html/body/app-root/div/div[1]/mat-toolbar/mat-toolbar-row[2]/div/div[1]/button"),1000).Click();
                        //driver.FindElementByXPath("/html/body/app-root/div/div[1]/mat-toolbar/mat-toolbar-row[2]/div/div[1]/button").Click();
                        stream.Majors = majorList;
                        streamList.Add(stream);
                    }
                    driver.Navigate().Back();
                    //driver.FindElementByXPath("/html/body/app-root/div/div[1]/mat-toolbar/mat-toolbar-row[2]/div/div[1]/button/span/mat-icon").Click();
                    school.Streams = streamList;
                    schools.Add(school);
                    break;
                }


                System.IO.File.WriteAllText(@"/Users/namishmaheshwari/Documents/work/student-details-new.txt", string.Join(" \n ", schools));

                foreach (School school in schools)
                {
                    Console.WriteLine(school.Name);
                    Console.WriteLine(school.Dean);
                    foreach (Stream stream in school.Streams)
                    {
                        Console.WriteLine(stream.Name);
                        foreach (Major major in stream.Majors)
                        {
                            Console.WriteLine(major.Name);
                            foreach (Student student in major.Students)
                            {
                                Console.WriteLine(student.Name);
                                Console.WriteLine(string.Join(",", student.Degree));
                                Console.WriteLine(string.Join(",", student.Major));
                                Console.WriteLine(string.Join(",", student.School));
                                Console.WriteLine(string.Join(",", student.additionalInfo));
                            }
                        }
                    }
                }
            }
        }