private Dictionary<String, TrackingEvent> ParseMultiObjectTrackHTML(String html) { var ret = new Dictionary<String, TrackingEvent>(); //If something was downloaded... if (!String.IsNullOrWhiteSpace(html)) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); //Jump to the first grandchild <tr>, that is, <tag1><tag2><tr> var trs = doc.DocumentNode.SelectNodes("//tr"); if (trs == null) { if (html.IndexOf(PACKAGE_NOT_FOUND_MESSAGE) != 0) { //throw new Exception("Package not found."); return ret; } throw new Exception("no <tr> found"); } //Skips the first <tr> as it's a header foreach (HtmlNode tr in trs.Skip(1)) { var ev = new TrackingEvent(); HtmlNodeCollection tds = tr.ChildNodes; ev.TrackingNumber = tds[0].FirstChild.InnerText; ev.Description = tds[2].FirstChild.InnerText; // Status String dataString = tds[4].InnerText; ev.Date = DateTime.Parse(dataString, dateCulture); ev.Place = tds[6].InnerText; if (ev.Place.Contains("/")) { string[] city = ev.Place.Trim().Split("/".ToCharArray()); ev.City = city[0]; if (city.Count() == 2) ev.UF = city[1]; } ret.Add(ev.TrackingNumber, ev); } } return ret; }
private List<TrackingEvent> ParseSingleObjectTrackHTML(String html, bool lastOnly) { var ret = new List<TrackingEvent>(); //If something was downloaded... if (!String.IsNullOrWhiteSpace(html)) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); //Jump to the first grandchild <tr>, that is, <tag1><tag2><tr> var trs = doc.DocumentNode.SelectNodes("//tr"); if (trs == null) { if (html.IndexOf(PACKAGE_NOT_FOUND_MESSAGE) != 0) { /* JC: I changed this because exception costs more than a return with empty results. * This will result in a faster tracking */ //throw new Exception("Package not found."); return ret; } throw new Exception("no <tr> found"); } var fonts = doc.DocumentNode.SelectNodes("//font"); var trackingNumber = fonts[0].FirstChild.InnerText.Substring(1, 13); //Skips the first <tr> as it's a header foreach (HtmlNode tr in trs.Skip(1)) { //If it's a destination row "semirow", not a new table row, then we should treat it later. if (tr.FirstChild.Attributes.Contains("colspan") && tr.FirstChild.Attributes["colspan"].Value == "2") { continue; } //If only the most recent event must be returned, then it's time to stop if (lastOnly && ret.Count > 0) { break; } var ev = new TrackingEvent(); ev.TrackingNumber = trackingNumber; HtmlNodeCollection tds = tr.ChildNodes; String dataString = tds[0].InnerText; ev.Date = DateTime.Parse(dataString, dateCulture); string[] place = tds[1].InnerText.Split("-".ToCharArray()); ev.Place = place[0].Trim(); if (place.Count() == 2) { string[] city = place[1].Trim().Split("/".ToCharArray()); ev.City = city[0]; if (city.Count() == 2) ev.UF = city[1]; } String status = tds[2].FirstChild.InnerText; ev.Description = status; ret.Add(ev); } } return ret; }
private TrackingEvent ParseSingleObjectTrackHTML(String html) { //If something was downloaded... if (!String.IsNullOrWhiteSpace(html)) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); //Jump to the first grandchild <tr>, that is, <tag1><tag2><tr> var tds = doc.DocumentNode.SelectNodes("//td[@bgcolor=\"#f5f5f5\"]"); if (tds == null) { if (html.IndexOf(PACKAGE_NOT_FOUND_MESSAGE) != 0) { return null; } throw new Exception("CN: Unable to find the track info table"); } var ev = new TrackingEvent(); ev.TrackingNumber = tds[0].InnerText.Trim().Substring(0,13); ev.Description = tds[2].InnerText.Trim(); String dataString = tds[5].InnerText.Trim().Substring(0, 10); ev.Date = DateTime.Parse(dataString); ev.Place = "CHINA-" + tds[4].InnerText.Trim(); return ev; } return null; }