private static string ExtractName(ScheduleReference reference, IDocument scheduleDocument) { var nameElement = scheduleDocument.QuerySelector("h2.pageTitle"); if (nameElement == null) { throw new ConnectorRideException($"The schedule name could not be found on the schedule page: {reference.Href}"); } var name = nameElement.TextContent.Trim(); return(name); }
private MapReference ExtractMapReference(ScheduleReference reference, IDocument scheduleDocument) { var mapLink = scheduleDocument .QuerySelectorAll("a[href]") .OfType <IHtmlAnchorElement>() .FirstOrDefault(a => a.Href.Contains("Schedules/Map?name=")); if (mapLink == null) { throw new ConnectorRideException($"The map link could not be found on the schedule page: {reference.Href}"); } return(new MapReference { Href = mapLink.Href }); }
public async Task <Schedule> GetScheduleAsync(ScheduleReference reference) { // get the schedule page var scheduleDocument = await GetDocumentAsync(reference.Href); // extract the name var name = ExtractName(reference, scheduleDocument); var table = ExtractTable(reference, scheduleDocument); var mapReference = ExtractMapReference(reference, scheduleDocument); return(new Schedule { Name = name, Table = table, MapReference = mapReference }); }
private static Table ExtractTable(ScheduleReference reference, IDocument scheduleDocument) { var scheduleTable = scheduleDocument .QuerySelectorAll("h2+div table") .OfType <IHtmlTableElement>() .FirstOrDefault(); if (scheduleTable == null) { throw new ConnectorRideException($"The schedule table could not be found on the schedule page: {reference.Href}"); } var stops = scheduleTable .QuerySelectorAll("th") .Where(headerCell => !headerCell.ClassList.Contains("IsHeading")) .Select(headerCell => new TableStop { Name = headerCell.TextContent.Trim(), IsPick = headerCell.ClassList.Contains("ispick"), IsHub = headerCell.ClassList.Contains("ishub") }) .ToList(); var trips = new List <TableTrip>(); var rows = scheduleTable .Children .FirstOrDefault(x => x.NodeName == "TBODY")? .Children? .Where(x => x.NodeName == "TR")? .ToList(); if (rows != null) { var rowCells = new List <List <IHtmlTableDataCellElement> >(); foreach (var row in rows) { var cells = row .Children .Where(x => x.NodeName == "TD") .OfType <IHtmlTableDataCellElement>() .Skip(1) .ToList(); rowCells.Add(cells); } if (rowCells.Any() && rowCells.All(x => x.Count == rowCells[0].Count) && rowCells[0].Count < stops.Count) { stops = stops.GetRange(0, rowCells[0].Count); } foreach (var cells in rowCells) { if (cells.Count != stops.Count) { throw new ConnectorRideException($"One of rows of the schedule table does not have the right number of columns on the schedule page: {reference.Href}"); } var stopTimes = new List <TableStopTime>(); for (int i = 0; i < cells.Count; i++) { var cellText = cells[i].TextContent.Trim(); if (cellText == "----") { continue; } var stopTimeMatch = StopTimeRegex.Match(cellText); int hour = int.Parse(stopTimeMatch.Groups["Hour"].Value); int minute = int.Parse(stopTimeMatch.Groups["Minute"].Value); string period = stopTimeMatch.Groups["Period"].Value.ToUpper(); stopTimes.Add(new TableStopTime { StopName = stops[i].Name, Hour = period == "PM" ? hour + 12 : hour, Minute = minute }); } trips.Add(new TableTrip { StopTimes = stopTimes.ToList() }); } } return(new Table { Stops = stops.ToList(), Trips = trips.ToList() }); }