/// <summary> /// Parses the raw course data. /// </summary> /// <param name="content">The content.</param> /// <param name="semesterID">The semester ID.</param> /// <returns></returns> private Department ParseRawCourseData(string content, Department department) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[2]/tr"); //Department department = null; Course course = null; Section section = null; bool readAdditionalInfo = false; bool addNewCourse = false; if(htmlNodes == null || htmlNodes.Count == 0) { return department; } foreach(HtmlNode node in htmlNodes) { if(node.NodeType == HtmlNodeType.Element && node.HasAttributes && node.Attributes["bgcolor"].Value == "#DFE4FF") { //Add previously collected course information if(course != null) { //Store Course to database m_RavenSession.Store(course); department.CourseIds.Add(course.Id); readAdditionalInfo = false; addNewCourse = false; course = null; } string[] value = node.InnerText.Trim().Replace(" ", string.Empty).Split(new string[] { "\r\n\t\t\t" }, StringSplitOptions.RemoveEmptyEntries); if(value != null && value.Length == 3) { course = new Course(); course.SemesterID = department.SemesterID; //Department course.DepartmentID = value[0].Trim().Replace(@" ", string.Empty); //courseID and Name string[] courseIDandName = value[1].Split(new string[] { "–" }, StringSplitOptions.None); course.CourseNumber = courseIDandName[0].Trim(); course.CourseName = courseIDandName[1].Trim(); //Number of credits course.Credits = value[2].RemoveValues("(", "Credits", ")"); //Course Pre-requisite course.Prerequisites = GetCoursePrerequisitesByID(course.CourseID); //Course rating course.CourseRating = GetCourseRatingByID(course.CourseID); addNewCourse = true; readAdditionalInfo = true; section = new Section(); } } else if(readAdditionalInfo && node.NodeType == HtmlNodeType.Element && node.HasAttributes && (node.Attributes["bgcolor"].Value == "#E1E1CC" || node.Attributes["bgcolor"].Value == "#FFFFFF")) { //Read Additional information int currentCell = 1; int columnsSkipped = 0; section = new Section(); HtmlNodeCollection htmlTDNodes = node.SelectNodes("td"); foreach(HtmlNode tdNode in htmlTDNodes) { if(tdNode.NodeType == HtmlNodeType.Element && tdNode.HasAttributes) { //Check if there is colspan skipping data blocks if(tdNode.Attributes["colspan"] != null && !string.IsNullOrEmpty(tdNode.Attributes["colspan"].Value)) { int.TryParse(tdNode.Attributes["colspan"].Value, out columnsSkipped); currentCell = currentCell + (columnsSkipped - 1); //We are continuing the previous section, get the last section if(columnsSkipped == 3) { section = course.Sections[course.Sections.Count - 1]; } } } string innerText = HttpUtility.HtmlDecode(tdNode.InnerText).Trim(); switch(currentCell) { case 1: section.SectionID = string.IsNullOrEmpty(section.SectionID) ? innerText : section.SectionID; break; case 2: break; case 3: section.GradeMethod = string.IsNullOrEmpty(section.GradeMethod) ? innerText : section.GradeMethod; break; case 4: section.Days.Add(string.Join(" ", (IEnumerable<char>)innerText.Replace(" ", string.Empty))); break; case 5: section.Time.Add(innerText); break; case 6: section.Dates.Add(innerText); break; case 7: section.Room.Add(innerText); break; case 8: section.Instructor.Add(innerText); break; case 9: int size = 0; int.TryParse(innerText, out size); section.Size = size; break; case 10: int enrolled = 0; int.TryParse(innerText, out enrolled); section.Enrolled = enrolled; break; case 11: section.Status = innerText; break; case 12: //END of ROW if(columnsSkipped == 0) { course.Sections.Add(section); section = null; } else if(columnsSkipped == 12) { course.Sections[course.Sections.Count - 1].AdditionalNotes.Add(innerText); } break; default: break; } currentCell++; } } } //Add last collected information if(department != null) { if(addNewCourse) { if(section != null) { course.Sections.Add(section); section = null; } //Store Course to database m_RavenSession.Store(course); department.CourseIds.Add(course.Id); readAdditionalInfo = false; addNewCourse = false; course = null; } //Store Department to database //m_RavenSession.Store(department); //m_RavenSession.Advanced.AddCascadeDeleteReference(department, department.CourseIds.ToArray()); //semester.DepartmentIds.Add(department.Id); } //Store Semester to database //m_RavenSession.Store(semester); //m_RavenSession.Advanced.AddCascadeDeleteReference(semester, semester.DepartmentIds.ToArray()); return department; }
public ActionResult Lab7() { Lab7ViewModel model = new Lab7ViewModel(); RestClientSettings settings = new RestClientSettings(); settings.URL = "http://www3.mnsu.edu/courses/selectform.asp"; settings.Method = Method.POST; //add valid header for kicks although they are not looking for it settings.Parameters.Add(new Parameter() { Name = "Accept", Value = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", Type = ParameterType.HttpHeader }); settings.Parameters.Add(new Parameter() { Name = "Accept-Encoding", Value = "gzip, deflate", Type = ParameterType.HttpHeader }); settings.Parameters.Add(new Parameter() { Name = "Accept-Language", Value = "en-US,en;q=0.5", Type = ParameterType.HttpHeader }); settings.Parameters.Add(new Parameter() { Name = "Host", Value = "www3.mnsu.edu", Type = ParameterType.HttpHeader }); settings.Parameters.Add(new Parameter() { Name = "Referer", Value = "http://www3.mnsu.edu/courses/", Type = ParameterType.HttpHeader }); settings.Parameters.Add(new Parameter() { Name = "User-Agent", Value = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:19.0) Gecko/20100101 Firefox/19.0", Type = ParameterType.HttpHeader }); // adds to POST or URL query string based on Method settings.Parameters.Add(new Parameter() { Name = "All", Value = "All Sections", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "campus", Value = "1,2,3,4,5,6,7,9,A,B,C,I,L,M,N,P,Q,R,S,T,W,U,V,X,Z", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "college", Value = "", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "courseid", Value = "", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "courselevel", Value = "", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "coursenum", Value = "", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "days", Value = "ALL", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "endTime", Value = "2359", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "semester", Value = "20143Fall 2013", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "startTime", Value = "0600", Type = ParameterType.GetOrPost }); settings.Parameters.Add(new Parameter() { Name = "subject", Value = "CS", Type = ParameterType.GetOrPost }); string content = CommonFunctions.MakeRestSharpRequest(settings); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(content); HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//table[2]/tr"); Course course = null; Section section = null; List<Course> courses = new List<Course>(); bool readAdditionalInfo = false; foreach(HtmlNode node in htmlNodes) { if(node.NodeType == HtmlNodeType.Element && node.HasAttributes && node.Attributes["bgcolor"].Value == "#DFE4FF") { //Add previously collected course information if(course != null) { course.Sections.Add(section); courses.Add(course); readAdditionalInfo = false; } course = new Course(); section = new Section(); readAdditionalInfo = true; string[] value = node.InnerText.Trim().Replace(" ", string.Empty).Split(new string[] { "\r\n\t\t\t" }, StringSplitOptions.RemoveEmptyEntries); if(value != null && value.Length == 3) { //Department course.DepartmentID = value[0].Trim().Replace(@" ", string.Empty); //courseID and Name string[] courseIDandName = value[1].Split(new string[] { "–" }, StringSplitOptions.None); course.CourseNumber = courseIDandName[0].Trim(); course.CourseName = courseIDandName[1].Trim(); //Number of credits course.Credits = value[2].Replace("(", string.Empty).Replace(" Credits)", string.Empty).Trim(); } } else if(readAdditionalInfo && node.NodeType == HtmlNodeType.Element && node.HasAttributes && (node.Attributes["bgcolor"].Value == "#E1E1CC" || node.Attributes["bgcolor"].Value == "#FFFFFF")) { //Read Additional information int currentCell = 1; HtmlNodeCollection htmlTDNodes = node.SelectNodes("td"); foreach(HtmlNode tdNode in htmlTDNodes) { if(tdNode.NodeType == HtmlNodeType.Element && tdNode.HasAttributes) { //Check if there is colspan skipping data blocks if(tdNode.Attributes["colspan"] != null && !string.IsNullOrEmpty(tdNode.Attributes["colspan"].Value)) { int columnsSkipped = 0; int.TryParse(tdNode.Attributes["colspan"].Value, out columnsSkipped); currentCell = currentCell + (columnsSkipped - 1); } } string innerText = HttpUtility.HtmlDecode(tdNode.InnerText).Trim(); if(!string.IsNullOrEmpty(innerText)) { switch(currentCell) { case 1: section.SectionID = string.IsNullOrEmpty(section.SectionID) ? innerText : section.SectionID; break; case 2: break; case 3: section.GradeMethod = string.IsNullOrEmpty(section.GradeMethod) ? innerText : section.GradeMethod; break; case 4: section.Days.Add(string.Join(" ", (IEnumerable<char>)innerText.Replace(" ", string.Empty))); break; case 5: section.Time.Add(innerText); break; case 6: section.Dates.Add(innerText); break; case 7: section.Room.Add(innerText); break; case 8: section.Instructor.Add(innerText); break; case 9: int size = 0; int.TryParse(innerText, out size); section.Size = size; break; case 10: int enrolled = 0; int.TryParse(innerText, out enrolled); section.Enrolled = enrolled; break; case 11: section.Status = innerText; break; default: break; } } currentCell++; } } } model.Courses = courses; return View("Lab7", model); }