示例#1
0
        private static void getUniversityCatalog()
        {
            Console.WriteLine("Login to get links from database");
            var conn = openDatabase();

            Console.WriteLine("Starting collection");

            MySqlCommand command = new MySqlCommand();

            command.Connection = conn;


            List <string> values = new List <string>();



            string sql = "SELECT link FROM idp.course_names";

            command.CommandText = sql;
            MySqlDataReader reader = command.ExecuteReader();



            while (reader.Read())
            {
                values.Add(digitsOnly.Replace(reader.GetString(0), "").Substring(1));
                //Console.WriteLine(digitsOnly.Replace(reader.GetString(0),"").Substring(1));
            }

            conn.Close();
            reader.Close();

            MySqlConnection comm = openDatabase();
            MySqlCommand    cmd  = new MySqlCommand();

            cmd.Connection = comm;
            foreach (string item in values)
            {
                // Console.WriteLine(item);


                string url = $"http://catalog.stcloudstate.edu/Catalog/ViewCatalog.aspx?pageid=viewcatalog&catalogid=8&topicgroupid={item}";
                // string url = $"http://catalog.stcloudstate.edu/Catalog/ViewCatalog.aspx?pageid=viewcatalog&catalogid=8&topicgroupid=107";
                Console.WriteLine("Current url: " + url);
                string name, description, credits;

                string shortVersion, href;
                int    courseNumber;



                HtmlWeb web = new HtmlWeb();

                HtmlDocument document = web.Load(url);
                Console.WriteLine("Webpage loaded");



                HtmlNode node = document.DocumentNode.SelectSingleNode("//body//span[@id='ctl00_ctl00_mainLayoutContent_mainContent_pager']");
                string   maxPages;
                if (node != null)
                {
                    maxPages = node.LastChild.PreviousSibling.PreviousSibling.InnerText;
                }
                else
                {
                    maxPages = "1";
                }
                // number of pages to concat onto thing
                int tryer;
                int.TryParse(maxPages, out tryer);
                HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//table[@class='DeAcFormTable']");
                //Number of pages to grab
                for (int j = 0; j < tryer; j++)
                {
                    document = web.Load(url + $"&pg={j + 1 }");
                    //Amount of tables on the page
                    collection = document.DocumentNode.SelectNodes("//table[@class='DeAcFormTable']");
                    for (int i = 0; i < collection.LongCount(); i += 2)
                    {
                        //Section to be parsed
                        //------------------------------------------------------------------------------------------------------------------------
                        name        = document.DocumentNode.SelectSingleNode($"//table[{i + 1}]//h3").InnerText;
                        description = document.DocumentNode.SelectSingleNode($"//table[{i + 2}]//td[2]").InnerText;
                        credits     = document.DocumentNode.SelectSingleNode($"//table[{i + 1}]//td[2]").InnerText;
                        HtmlNodeCollection tempPrereqs = document.DocumentNode.SelectNodes($"//table[{i + 2}]//tr[2]//a[@class='topictooltip']");
                        HtmlNodeCollection tempOffered = document.DocumentNode.SelectNodes($"//table[{i + 2}]//tr[last()]//li");

                        //------------------------------------------------------------------------------------------------------------------------



                        shortVersion = name.Split(' ')[0];
                        int.TryParse(name.Split(' ')[1].Replace(".", String.Empty), out courseNumber);
                        name    = name.Substring(name.LastIndexOf('.') + 1).TrimStart();
                        credits = credits.Substring(credits.LastIndexOf(':') + 2);

                        Console.WriteLine(name + " " + courseNumber);

                        //Database push
                        //----------------------------------------------------
                        Console.WriteLine("Trying to input this into the database:");
                        Console.WriteLine($"Course number: {courseNumber}, Short Version: {shortVersion}, Name: {name}\nDescription: {description}");
                        cmd.CommandText = string.Format("INSERT IGNORE INTO idp.course_collection(course_number,short,name,description) VALUES('{0}','{1}','{2}','{3}')", courseNumber, rgx.Replace(shortVersion, ""), rgx.Replace(name, ""), rgx.Replace(description, ""));
                        cmd.ExecuteNonQuery();

                        if (tempOffered != null)
                        {
                            foreach (HtmlNode item1 in tempOffered)
                            {
                                Console.WriteLine(item1.InnerText + ", ");
                                if (!item1.InnerText.Contains("GOAL"))
                                {
                                    cmd.CommandText = string.Format("INSERT  INTO idp.seasons(course_number,seasons) VALUES('{0}','{1}')", courseNumber, item1.InnerText);

                                    cmd.ExecuteNonQuery();
                                }
                                else
                                {
                                    Console.WriteLine($"********************{item1.InnerText}**************************");
                                }
                            }
                        }
                        else
                        {
                            cmd.CommandText = string.Format("INSERT INTO idp.seasons(course_number,seasons) VALUES('{0}','{1}')", courseNumber, "Demand");
                            cmd.ExecuteNonQuery();
                        }


                        if (tempPrereqs != null)
                        {
                            foreach (HtmlNode item1 in tempPrereqs)
                            {
                                Console.Write(item1.InnerText + ", ");
                                cmd.CommandText = string.Format("INSERT IGNORE INTO idp.prereqs(course_number,prereq,link) VALUES('{0}','{1}','{2}')", courseNumber, item1.InnerText, item1.Attributes["href"].Value);

                                cmd.ExecuteNonQuery();
                            }
                        }
                        Console.WriteLine();
                    }
                }
            }

            comm.Close();
        }