List<Show> Grab(GrabParametersBase p) { var pp = (GrabParameters)p; var url = string.Format(URL, pp.FromDate.ToString("dd/MM/yyyy"), pp.ToDate.ToString("dd/MM/yyyy")); var wr = WebRequest.Create(url); _logger.WriteEntry("Grabbing BBCW", LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); using (var sr = new StreamReader(res.GetResponseStream())) { var lst = new List<Show>(); sr.ReadLine(); // first line while (!sr.EndOfStream) { var line = sr.ReadLine(); if (!string.IsNullOrEmpty(line) && line.Length > 10) { var show = new Show(); show.Channel = "BBC World News"; var tokens = line.Split('\t'); show.StartTime = DateTime.SpecifyKind(Convert.ToDateTime(tokens[0]) + Convert.ToDateTime(tokens[1]).TimeOfDay, DateTimeKind.Unspecified); show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc); show.Title = tokens[2]; //show.Episode = string.IsNullOrEmpty(tokens[3]) ? null : (int?)Convert.ToInt32(tokens[3]); // not number show.Description = tokens[4]; lst.Add(show); } } return lst; } }
List<Show> Grab(GrabParametersBase p, ILogger logger) { var pp = (GrabParameters)p; var url = string.Format(URL, pp.Channel.ToString().Replace("_", "-").Replace("AANNDD", "%26").Replace("PPLLUUSS", "%2B")); var wr = WebRequest.Create(url); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); logger.WriteEntry(string.Format("Grabbing Channel {0}", pp.Channel), LogType.Info); doc.Load(res.GetResponseStream()); var shows = new List<Show>(); foreach (Day d in Enum.GetValues(typeof(Day))) { var dayOfWeek = (DayOfWeek)d; var div = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value == d.ToString()); if (div != null) { var date = NextDateOfDayOfWeek(dayOfWeek); foreach (var ul in div.Descendants("ul")) { foreach (var li in ul.Descendants("li")) { var par = li.Descendants("p").First(); var a = li.Descendants("a").First(); var show = new Show(); show.Channel = pp.Channel.ToString(); show.Title = a.InnerText.Trim(); show.StartTime = DateTime.SpecifyKind(date + Convert.ToDateTime(par.InnerText.Trim()).TimeOfDay, DateTimeKind.Unspecified); show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc); shows.Add(show); } } } } return shows; }
List<Show> Grab(GrabParametersBase p) { var shows = new List<Show>(); try { var param = (GrabParameters)p; var wr = WebRequest.Create(string.Format(urlFormat, (int)param.ChannelId)); _logger.WriteEntry(string.Format("Grabbing Channel {0} ...", param.ChannelId), LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); doc.OptionOutputAsXml = true; var writer = new StringWriter(); doc.Save(writer); var xml = XDocument.Load(new StringReader(writer.ToString())); FillShows(xml, shows); for (int i = shows.Count - 1; i >= 0; i--) { var show = shows[i]; show.Channel = param.ChannelId.ToString(); if (i == shows.Count - 1) show.EndTime = show.StartTime.AddHours(12);// usually 3-4 days from now , not that important else show.EndTime = shows[i + 1].StartTime; } } catch (Exception ex) { _logger.WriteEntry(ex.Message, LogType.Error); } _logger.WriteEntry(string.Format("Found {0} Shows", shows.Count), LogType.Info); return shows; }
List <Show> Grab(GrabParametersBase p, ILogger logger) { var pp = (CyfraPlus.GrabParameters)p; var shows = new List <Show>(); var wr = WebRequest.Create(string.Format(urlFormat, pp.Date.ToString(DateFormat))); logger.WriteEntry(string.Format("Grabbing Cyfra+ date {0} ...", pp.Date.ToString(DateFormat)), LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); const int ChannelDepth = 2; using (var sr = new StreamReader(res.GetResponseStream())) { var startDownloadTime = DateTime.Now; var data = new StringBuilder(); int blockSize = 16384; while (!sr.EndOfStream) { var buf = new char[blockSize]; var totalRead = sr.ReadBlock(buf, 0, blockSize); data.Append(buf); if (DateTime.Now - startDownloadTime > TimeSpan.FromSeconds(1)) { startDownloadTime = DateTime.Now; logger.WriteEntry(string.Format("Downloaded {0:#,##0} bytes so far", data.Length), LogType.Info); } } var r = new Newtonsoft.Json.JsonTextReader(new StringReader(data.ToString())); while (r.Read()) { r.Read(); var channelNumber = r.ReadAsInt32(); var channelName = r.ReadAsString(); r.Read(); r.Read(); while (r.Depth > ChannelDepth) { var show = new Show(); show.Channel = channelName.Trim(); var programId = r.ReadAsInt32(); show.Title = Tools.CleanupText(r.ReadAsString()); show.StartTime = new DateTime(1970, 1, 1).Add(TimeSpan.FromSeconds(r.ReadAsInt32().Value)); show.EndTime = show.StartTime.Add(TimeSpan.FromSeconds(Convert.ToDouble(r.ReadAsInt32()))); var num = r.ReadAsInt32(); shows.Add(show); var depth = r.Depth; while (r.Depth == depth) { r.Read(); } r.Read(); } } } return(shows); }
List<Show> Grab(GrabParametersBase p,ILogger logger) { var pp = (CyfraPlus.GrabParameters)p; var shows = new List<Show>(); var wr = WebRequest.Create(string.Format(urlFormat, pp.Date.ToString(DateFormat))); logger.WriteEntry(string.Format("Grabbing Cyfra+ date {0} ...", pp.Date.ToString(DateFormat)), LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); const int ChannelDepth = 2; using (var sr = new StreamReader(res.GetResponseStream())) { var startDownloadTime = DateTime.Now; var data = new StringBuilder(); int blockSize = 16384; while (!sr.EndOfStream) { var buf = new char[blockSize]; var totalRead = sr.ReadBlock(buf, 0, blockSize); data.Append(buf); if (DateTime.Now - startDownloadTime > TimeSpan.FromSeconds(1)) { startDownloadTime = DateTime.Now; logger.WriteEntry(string.Format("Downloaded {0:#,##0} bytes so far", data.Length), LogType.Info); } } var r = new Newtonsoft.Json.JsonTextReader(new StringReader(data.ToString())); while (r.Read()) { r.Read(); var channelNumber = r.ReadAsInt32(); var channelName = r.ReadAsString(); r.Read(); r.Read(); while (r.Depth > ChannelDepth) { var show = new Show(); show.Channel = channelName.Trim(); var programId = r.ReadAsInt32(); show.Title = Tools.CleanupText(r.ReadAsString()); show.StartTime = new DateTime(1970, 1, 1).Add(TimeSpan.FromSeconds(r.ReadAsInt32().Value)); show.EndTime = show.StartTime.Add(TimeSpan.FromSeconds(Convert.ToDouble(r.ReadAsInt32()))); var num = r.ReadAsInt32(); shows.Add(show); var depth = r.Depth; while (r.Depth == depth) r.Read(); r.Read(); } } } return shows; }
List <Show> Grab(GrabParametersBase p) { var pp = (GrabParameters)p; var url = string.Format(URL, pp.FromDate.ToString("dd/MM/yyyy"), pp.ToDate.ToString("dd/MM/yyyy")); var wr = WebRequest.Create(url); _logger.WriteEntry("Grabbing BBCW", LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); using (var sr = new StreamReader(res.GetResponseStream())) { var lst = new List <Show>(); sr.ReadLine(); // first line while (!sr.EndOfStream) { var line = sr.ReadLine(); if (!string.IsNullOrEmpty(line) && line.Length > 10) { var show = new Show(); show.Channel = "BBC World News"; var tokens = line.Split('\t'); if (tokens.Length >= 5) { show.StartTime = DateTime.SpecifyKind(Convert.ToDateTime(tokens[0]) + Convert.ToDateTime(tokens[1]).TimeOfDay, DateTimeKind.Unspecified); show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc); show.Title = tokens[2]; show.Description = tokens[4]; lst.Add(show); } else { _logger.WriteEntry("invalid line in bbcw grabber : " + line, LogType.Warning); } } } return(lst); } }
List <Show> Grab(GrabParametersBase p) { var shows = new List <Show>(); try { var param = (GrabParameters)p; var wr = WebRequest.Create(string.Format(urlFormat, (int)param.ChannelId)); _logger.WriteEntry(string.Format("Grabbing Channel {0} ...", param.ChannelId), LogType.Info); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); doc.OptionOutputAsXml = true; var writer = new StringWriter(); doc.Save(writer); var xml = XDocument.Load(new StringReader(writer.ToString())); FillShows(xml, shows); for (int i = shows.Count - 1; i >= 0; i--) { var show = shows[i]; show.Channel = param.ChannelId.ToString(); if (i == shows.Count - 1) { show.EndTime = show.StartTime.AddHours(12);// usually 3-4 days from now , not that important } else { show.EndTime = shows[i + 1].StartTime; } } } catch (Exception ex) { _logger.WriteEntry(ex.Message, LogType.Error); } _logger.WriteEntry(string.Format("Found {0} Shows", shows.Count), LogType.Info); return(shows); }
public List <Show> Grab(GrabParametersBase p, ILogger logger) { currentNow = TimeZoneInfo.ConvertTime(DateTime.Now, TimeZoneInfo.Local, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time")); var pp = (GrabParameters)p; logger.WriteEntry("grabbing CyfrowyPolsat.pl channel : " + pp.Channel, LogType.Info); var wr = (HttpWebRequest)WebRequest.Create(string.Format(URL, pp.Channel.ToString().Replace("_", "-"))); wr.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0"; wr.Timeout = 5000; var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); var lst = new List <Show>(); var div = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "main col"); var times = div.Descendants("span").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "time").ToList(); var names = div.Descendants("a").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "name").ToList(); var metas = div.Descendants("div").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "meta").ToList(); for (int i = 0; i < times.Count; i++) { var s = new Show(); s.Channel = pp.Channel.ToString(); var startTime = Convert.ToDateTime(times[i].InnerText); s.StartTime = DateTime.SpecifyKind(currentNow.Date + Convert.ToDateTime(startTime).TimeOfDay, DateTimeKind.Unspecified); s.StartTime = TimeZoneInfo.ConvertTime(s.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc); if (lst.Count > 0 && s.StartTime <= lst[lst.Count - 1].StartTime) { s.StartTime = s.StartTime.AddDays(1); currentNow = currentNow.AddDays(1); } s.Title = HttpUtility.HtmlDecode(names[i].InnerText); s.Description = HttpUtility.HtmlDecode(metas[i * 2].InnerText + "\n" + metas[(i * 2) + 1].InnerText).Trim(); lst.Add(s); } return(lst); }
List <Show> Grab(GrabParametersBase p, ILogger logger) { var pp = (GrabParameters)p; var url = string.Format(URL, pp.Channel.ToString().Replace("_", "-").Replace("AANNDD", "%26").Replace("PPLLUUSS", "%2B")); var wr = WebRequest.Create(url); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); logger.WriteEntry(string.Format("Grabbing Channel {0}", pp.Channel), LogType.Info); doc.Load(res.GetResponseStream()); var shows = new List <Show>(); foreach (Day d in Enum.GetValues(typeof(Day))) { var dayOfWeek = (DayOfWeek)d; var div = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.Attributes.Contains("id") && x.Attributes["id"].Value == d.ToString()); if (div != null) { var date = NextDateOfDayOfWeek(dayOfWeek); foreach (var ul in div.Descendants("ul")) { foreach (var li in ul.Descendants("li")) { var par = li.Descendants("p").First(); var a = li.Descendants("a").First(); var show = new Show(); show.Channel = pp.Channel.ToString(); show.Title = a.InnerText.Trim(); show.StartTime = DateTime.SpecifyKind(date + Convert.ToDateTime(par.InnerText.Trim()).TimeOfDay, DateTimeKind.Unspecified); show.StartTime = TimeZoneInfo.ConvertTime(show.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc); shows.Add(show); } } } } return(shows); }
public List<Show> Grab(GrabParametersBase p, ILogger logger) { currentNow = TimeZoneInfo.ConvertTime(DateTime.Now, TimeZoneInfo.Local, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time")); var pp = (GrabParameters)p; logger.WriteEntry("grabbing CyfrowyPolsat.pl channel : " + pp.Channel, LogType.Info); var wr = (HttpWebRequest)WebRequest.Create(string.Format(URL, pp.Channel.ToString().Replace("_", "-"))); wr.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0"; wr.Timeout = 5000; var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); var lst = new List<Show>(); var div = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "main col"); var times = div.Descendants("span").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "time").ToList(); var names = div.Descendants("a").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "name").ToList(); var metas = div.Descendants("div").Where(x => x.Attributes.Contains("class") && x.Attributes["class"].Value == "meta").ToList(); for (int i = 0; i < times.Count; i++) { var s = new Show(); s.Channel = pp.Channel.ToString(); var startTime = Convert.ToDateTime(times[i].InnerText); s.StartTime = DateTime.SpecifyKind(currentNow.Date + Convert.ToDateTime(startTime).TimeOfDay, DateTimeKind.Unspecified); s.StartTime = TimeZoneInfo.ConvertTime(s.StartTime, TimeZoneInfo.FindSystemTimeZoneById("Central European Standard Time"), TimeZoneInfo.Utc); if (lst.Count > 0 && s.StartTime <= lst[lst.Count - 1].StartTime) { s.StartTime = s.StartTime.AddDays(1); currentNow = currentNow.AddDays(1); } s.Title = HttpUtility.HtmlDecode(names[i].InnerText); s.Description = HttpUtility.HtmlDecode(metas[i * 2].InnerText + "\n" + metas[(i * 2) + 1].InnerText).Trim(); lst.Add(s); } return lst; }
List <Show> Grab(GrabParametersBase p) { var pp = (GrabParameters)p; var url = GetUrl(pp); _logger.WriteEntry(string.Format("Grabbing rashut2 {0} for date {1}", pp.Channel, pp.Date.ToString("d")), LogType.Info); var wr = WebRequest.Create(string.Format(url, pp.Date.ToString(DateFormat))); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); var nodes = doc.DocumentNode.SelectNodes("//comment()"); if (nodes != null) { foreach (HtmlAgilityPack.HtmlNode comment in nodes) { if (!comment.InnerText.StartsWith("DOCTYPE")) { comment.ParentNode.RemoveChild(comment); } } } int cellDelta = 0; var tbl = doc.GetElementbyId("table5"); if (pp.Channel == Channel.Channel10) { tbl = doc.GetElementbyId("table1"); cellDelta = 1; } const int childStart = 9; var shows = new List <Show>(); for (int i = childStart; i < tbl.ChildNodes.Count; i += 2) { var show = new Show(); var d = Convert.ToDateTime(Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[1].InnerText)); show.StartTime = pp.Date.AddHours(d.Hour).AddMinutes(d.Minute); if (d.Hour < 6) // data is shown from 6 (AM) till next day 6 (AM) so after midnight we need to increase the date { show.StartTime = show.StartTime.AddDays(1); } try { show.StartTime = TimeZoneInfo.ConvertTimeToUtc(show.StartTime, TimeZoneInfo.Local); } catch (Exception ex) // error on the verge of daylight saving start { _logger.WriteEntry(ex.Message, LogType.Error); continue; } show.Title = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[3 + cellDelta].InnerText); var episodeName = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[5 + cellDelta].InnerText); var episodeNumber = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[7 + cellDelta].InnerText); var genere = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[9 + cellDelta].InnerText); show.Description = string.Empty; if (!string.IsNullOrEmpty(episodeName)) { show.Description += string.Format("שם הפרק : {0}\n", episodeName); } if (!string.IsNullOrEmpty(episodeNumber)) { show.Description += string.Format("מספר הפרק : {0}\n", episodeNumber); int num; if (int.TryParse(episodeNumber, out num)) { show.Episode = num; } } if (!string.IsNullOrEmpty(genere)) { show.Description += string.Format("סוג תכנית : {0}\n", genere); } shows.Add(show); } for (int i = shows.Count - 1; i >= 0; i--) { var show = shows[i]; if (show.Description != null) { show.Description = show.Description.Trim(); } show.Channel = pp.Channel.ToString(); if (i == shows.Count - 1) { show.EndTime = show.StartTime.AddHours(1); } else { show.EndTime = shows[i + 1].StartTime; } } return(shows); }
List<Show> Grab(GrabParametersBase p) { var pp = (GrabParameters)p; var url = GetUrl(pp); _logger.WriteEntry(string.Format("Grabbing rashut2 {0} for date {1}", pp.Channel, pp.Date.ToString("d")), LogType.Info); var wr = WebRequest.Create(string.Format(url, pp.Date.ToString(DateFormat))); var res = (HttpWebResponse)wr.GetResponse(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(res.GetResponseStream()); var nodes = doc.DocumentNode.SelectNodes("//comment()"); if (nodes != null) { foreach (HtmlAgilityPack.HtmlNode comment in nodes) { if (!comment.InnerText.StartsWith("DOCTYPE")) comment.ParentNode.RemoveChild(comment); } } int cellDelta = 0; var tbl = doc.GetElementbyId("table5"); if (pp.Channel == Channel.Channel10) { tbl = doc.GetElementbyId("table1"); cellDelta = 1; } const int childStart = 9; var shows = new List<Show>(); for (int i = childStart; i < tbl.ChildNodes.Count; i += 2) { var show = new Show(); var d = Convert.ToDateTime(Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[1].InnerText)); show.StartTime = pp.Date.AddHours(d.Hour).AddMinutes(d.Minute); if (d.Hour < 6) // data is shown from 6 (AM) till next day 6 (AM) so after midnight we need to increase the date show.StartTime = show.StartTime.AddDays(1); try { show.StartTime = TimeZoneInfo.ConvertTimeToUtc(show.StartTime, TimeZoneInfo.Local); } catch (Exception ex) // error on the verge of daylight saving start { _logger.WriteEntry(ex.Message, LogType.Error); continue; } show.Title = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[3 + cellDelta].InnerText); var episodeName = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[5 + cellDelta].InnerText); var episodeNumber = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[7 + cellDelta].InnerText); var genere = Tools.CleanupText(tbl.ChildNodes[i].ChildNodes[9 + cellDelta].InnerText); show.Description = string.Empty; if (!string.IsNullOrEmpty(episodeName)) show.Description += string.Format("שם הפרק : {0}\n", episodeName); if (!string.IsNullOrEmpty(episodeNumber)) { show.Description += string.Format("מספר הפרק : {0}\n", episodeNumber); int num; if (int.TryParse(episodeNumber, out num)) show.Episode = num; } if (!string.IsNullOrEmpty(genere)) show.Description += string.Format("סוג תכנית : {0}\n", genere); shows.Add(show); } for (int i = shows.Count - 1; i >= 0; i--) { var show = shows[i]; if (show.Description != null) show.Description = show.Description.Trim(); show.Channel = pp.Channel.ToString(); if (i == shows.Count - 1) show.EndTime = show.StartTime.AddHours(1); else show.EndTime = shows[i + 1].StartTime; } return shows; }