private void populateTreeview() { Tokeniser tokeniser = new Tokeniser(); string html = ""; if (txtURL.Text == "") { FileInfo fi = new FileInfo(lblFilename.Text); StreamReader sr = fi.OpenText(); html = sr.ReadToEnd(); sr.Close(); } else { Uri site = new Uri(txtURL.Text); WebRequest wReq = WebRequest.Create(site); WebResponse wResp = wReq.GetResponse(); if (wResp != null) { Stream respStream = wResp.GetResponseStream(); if (respStream != null) { StreamReader reader = new StreamReader(respStream, Encoding.ASCII); html = reader.ReadToEnd(); } } } List<WebItem> webItems = tokeniser.Tokenise(html); if (webItems.Count != 1) { MessageBox.Show("No single root to webitem"); return; } //WebItem detailRows = Tokeniser.ExtractSection(webItems[0], "table", "title", "Previous Calling Points"); WebItem detailRows = Tokeniser.ExtractSection(webItems[0], "table", "id", "TrainTable"); detailRows = Tokeniser.ExtractSection(detailRows, "tbody", "", ""); //3924208 showWebitem(null, detailRows); }
private IEnumerable<FromStation> ConfirmStartingStations(string parialUrlForStationList, List<Station> fromStations, string details, bool isConnected) { ConsoleMsg("ConfirmStartingStations", MsgType.FunctionCall); var rv = new List<FromStation>(); var html = ""; //Get HTML FileInfo fiStationList = null; if (isConnected) { var url = Properties.Settings.Default.URLDetailFragment; url += details.Replace("\"", "").Replace(";", ""); html = GetHtmlFromURL(url); } else { var partFileName = parialUrlForStationList.Replace("term.aspx?", ""); partFileName = partFileName.Replace("train.aspx?", ""); partFileName = partFileName.Replace(";", ""); foreach (var fi in GetLatestWebPagesFolder().GetFiles()) { if (!fi.Name.Contains(partFileName)) continue; if (fiStationList == null) fiStationList = fi; else { if (fiStationList.CreationTime < fi.CreationTime) fiStationList = fi; } } if (fiStationList != null) { var sr = fiStationList.OpenText(); html = sr.ReadToEnd(); sr.Close(); } } //Get "Previous Calling Points" stations var tokeniser = new Tokeniser(); var detailPage = tokeniser.Tokenise(html)[0]; var detailRows = Tokeniser.ExtractSection(detailPage, "table", "title", "Previous Calling Points"); if (detailRows == null) return rv; detailRows = Tokeniser.ExtractSection(detailRows, "tbody", "", ""); if (detailRows == null) return rv; foreach (var detailRow in detailRows.Children) { if (detailRow.Text.ToLower() != "tr") continue; if (detailRow.Children.Count < 2) continue; var wiStation = detailRow.Children[0]; if (wiStation.Text.ToLower() == "th") continue; var wiSchedule = detailRow.Children[1]; //Station Name string stationName; stationName = GetContent(wiStation.Children.Count > 1 ? wiStation.Children[0] : wiStation); if (stationName == " ") continue; //Scheduled Time var strTime = wiSchedule.Children[0].Text; if (strTime.Trim().Length != 4) throw new Exception("Couldn't get sceduled departure"); var hours = int.Parse(strTime.Substring(0, 2)); var minutes = int.Parse(strTime.Substring(2)); var scheduledDep = DateTime.Today.AddHours(hours).AddMinutes(minutes); if (DateTime.Now < scheduledDep) scheduledDep.AddDays(-1); //Station Code var station = (from s in _db.Stations where s.StationName == stationName select s).FirstOrDefault(); if (station == null) continue; //Is this one from the fromlist? var fromStation = (from fs in fromStations where fs.Id == station.Id select fs).FirstOrDefault(); if (fromStation != null) rv.Add(new FromStation { ScheduledDeparture = scheduledDep, StationId = station.Id }); } return rv; }
private void HarvestTrainJourneyInfo(bool isConnected) { ConsoleMsg("HarvestTrainJourneyInfo", MsgType.FunctionCall); IEnumerable<Station> stations = GetDistinctListOfStations(); foreach (Station destinationStation in stations) { //Get HTML string html = getMainPageInfo(destinationStation, isConnected); //Tokenise var tokeniser = new Tokeniser(); List<WebItem> page = tokeniser.Tokenise(html); if (page.Count == 0) return; //Rip out section we want (table, class, "arrivaltable") WebItem section = Tokeniser.ExtractSection(page[0], "table", "id", "TrainTable"); if (section == null) return; //Rows WebItem rows = Tokeniser.ExtractSection(section, "tbody", "", ""); foreach (WebItem row in rows.Children) { if (row.Text.ToLower() != "tr") continue; string startingStationName; string platform = ""; string timetable; string trainOperator; if (row.Children.Count < 5) continue; if (GetContent(row.Children[0]).ToLower().Trim() == "from") continue; if (row.Children.Count == 5) { startingStationName = GetContent(row.Children[0]); timetable = GetContent(row.Children[1]); platform = GetContent(row.Children[3]); trainOperator = GetContent(row.Children[4]); } else { startingStationName = GetContent(row.Children[0]); platform = GetContent(row.Children[1]); timetable = GetContent(row.Children[2]); trainOperator = GetContent(row.Children[4]); } var partialUrlForStationList = row.Children[0].Children[0].Attributes[0].Content.Replace("&", "&"); if (partialUrlForStationList.Substring(0, 1) == "\"") { partialUrlForStationList = partialUrlForStationList.Substring(1); partialUrlForStationList = partialUrlForStationList.Substring(0, partialUrlForStationList.Length - 1); } var details = row.Children[0].Children[0].Attributes[0].Content.Replace("&", "&"); //Get J-Code (seems to be unique id for a train journey) string jCode = ""; string[] urlParams = partialUrlForStationList.Split('&'); foreach (string t in urlParams) { if (t.ToUpper().StartsWith("J=")) jCode = t.Substring(2); if (t.ToUpper().StartsWith(";J=")) jCode = t.Substring(3); } if (String.IsNullOrEmpty(jCode)) continue; if (String.IsNullOrEmpty(timetable)) continue; //Get Train Journey Journey journey = (from j in _db.Journeys where j.JCode == jCode select j).FirstOrDefault() ?? new Journey { JCode = jCode, TrainOperator = trainOperator }; //Get Destination Destination destination = (from d in journey.Destinations where d.Station.Id == destinationStation.Id select d).FirstOrDefault(); if (destination == null) { //Timetabled arrival time DateTime now = DateTime.Now; int hour = int.Parse(timetable.Substring(0, 2)); int minute = int.Parse(timetable.Length == 4 ? timetable.Substring(2, 2) : timetable.Substring(3, 2)); var timetabledArrival = new DateTime(now.Year, now.Month, now.Day, hour, minute, 0); if (timetabledArrival.AddHours(12) < DateTime.Now) timetabledArrival = timetabledArrival.AddDays(1); destination = new Destination { Station = destinationStation, ScheduledArrival = timetabledArrival }; } destination.ActualArrival = DateTime.Now; //Which starting stations are we interesting in for this destination? var fromStations = new List<Station>(); List<Station> tempStations1 = (from u in _db.Users where u.HomeStation.Id == destinationStation.Id select u.DestinationStation).ToList(); List<Station> tempStations2 = (from u in _db.Users where u.DestinationStation.Id == destinationStation.Id select u.HomeStation).ToList(); foreach (Station s1 in tempStations1) { if ((from s in fromStations where s.Id == s1.Id select s).FirstOrDefault() == null) fromStations.Add(s1); } foreach (Station s2 in tempStations2) { if ((from s in fromStations where s.Id == s2.Id select s).FirstOrDefault() == null) fromStations.Add(s2); } //Look for fromStations in "Previous Calling Points" IEnumerable<FromStation> validStartingStations = ConfirmStartingStations(partialUrlForStationList, fromStations, details, isConnected); foreach (FromStation fs in validStartingStations) { FromStation fromStation = (from fr in destination.FromStations where fr.Station.Id == fs.StationId select fr).FirstOrDefault(); if (fromStation == null) destination.FromStations.Add(fs); } //Is this worth saving? if (destination.FromStations.Count <= 0) continue; if (destination.Journey == null) journey.Destinations.Add(destination); if (journey.Id == 0) _db.Journeys.Add(journey); _db.SaveChanges(); } } }