/// <summary> /// Returns a list of recordings scraped from the URLS provided /// by the course instance object.</summary> /// <param name="document"></param> /// <returns></returns> public List <Recording> GetRecordings(CourseInstance course, IProgress <Double> callback = null) { var recordingList = new List <Recording>(); // Retrieve recordings from each page associated with the given course instance for (var i = 0; i < course.PageLinks.Count(); i++) { var link = BASE_URL + course.PageLinks[i]; HtmlDocument document = LoadDocument(link); // TODO: Figure out what to do on document load failure if (document != null) { // Retrieve nodes containing recording data var recordingNodes = document.DocumentNode.SelectNodes( "//table[@class='mainindex']"); if (recordingNodes != null) { // Retrieve information from each node foreach (var node in recordingNodes) { var headingNode = node.SelectSingleNode( ".//tr[@class='sectionHeading']//h3"); // Retrieve timestamp var timestamp = headingNode.InnerText; timestamp = Regex.Replace(timestamp, " ", ""); timestamp = Regex.Replace(timestamp, @"\s+", " ").Trim(); var recordingDate = DateTime.ParseExact( timestamp, "dd MMM yyyy - HH:mm", CultureInfo.CurrentCulture); // Retrieve recording ID var recordingID = Convert.ToInt32( headingNode.SelectSingleNode(".//a") .GetAttributeValue("id", 0)); // Retrieve recording duration var duration = node.SelectSingleNode( ".//tr[@class='sectionHeading']/td[2]").InnerText.Trim(); // Construct recording instance var recording = new Recording(recordingID, recordingDate, duration); // Retrieve list of file formats var formatList = GetRecordingFormats(node); formatList.ForEach(f => recording.Formats.Add(f)); // Add recording to course instance recordingList.Add(recording); } } // Report progress if (callback != null) { lock (callback) { callback.Report((double)i / course.PageLinks.Count() * 100); } } } } return(recordingList); }
/// <summary> /// Attempts to load the Lectopia recording list with the given ID. On a successful load, /// this method will parse the contents of the page and append the parsed data to the model /// in a form of a course instance object. /// </summary> /// <param name="id"></param> public CourseInstance ReadCourseInformation(int id) { String URL = RECORDINGS_URL + id; // Check if given URL points to a valid listing // If list loaded successfully, load document HtmlDocument document = null; try { document = LoadDocument(URL); } // In event of a connection timeout, add attempted URL to collection for a later retry catch (WebException) { // TODO: Log error lock (FailedReads) { FailedReads.Add(id); } } // If document loaded successfully, parse contents if (document != null) { // Retrieve subject title var title = document.DocumentNode.SelectSingleNode( "//table[@id='header']//h2").InnerText.Trim(); // Retrieve date of last recording DateTime?lastUpdated = null; var lastRecorded = document.DocumentNode.SelectSingleNode( "//table[@class='mainindex'][1]//tr[@class='sectionHeading']//h3"); if (lastRecorded != null) { var timestamp = lastRecorded.InnerText; timestamp = Regex.Replace(timestamp, " ", ""); timestamp = Regex.Replace(timestamp, @"\s+", " ").Trim(); lastUpdated = DateTime.ParseExact( timestamp, "dd MMM yyyy - HH:mm", CultureInfo.CurrentCulture); } // Construct course instance var course = new CourseInstance(id, title, lastUpdated); course.PageLinks.Add(TruncatePageURL(URL)); // Extract page links var pageNodes = document.DocumentNode.SelectNodes( "(//td[@class='noNesting'])[1]/a"); if (pageNodes != null) { var pageLinks = from a in pageNodes select TruncatePageURL(BASE_URL + a.GetAttributeValue("href", "")); course.PageLinks.AddRange(pageLinks); } // Return constructed instance return(course); } return(null); }