public Slug Scrape(Uri programUri) { if (programUri == null) { throw new ArgumentNullException("programUri"); } if (!programUri.IsAbsoluteUri) { throw new ArgumentException("Only absolute URIs are supported.", "programUri"); } var request = _webRequestWrapper.CreateHttp(programUri); var response = request.GetResponse(); string html; using (var streamReader = new StreamReader(response.GetResponseStream())) { html = streamReader.ReadToEnd(); } var regex = new Regex("programSerieSlug:\\s*\"(?<slug>.*)\""); var match = regex.Match(html); if (!match.Success) { throw new ScraperException("Unable to find programSerieSlug."); } return(match.Groups["slug"].Value); }
public IEnumerable <Uri> Scrape(Slug slug) { if (slug == null) { throw new ArgumentNullException("slug"); } var episodesUri = new Uri(string.Format("http://www.dr.dk/TV/play/AllEpisodes?slug={0}&episodesperpage=100&pagenumber=1", slug)); var request = _webRequestWrapper.CreateHttp(episodesUri); var response = request.GetResponse(); var htmlDocument = new HtmlDocument(); htmlDocument.Load(response.GetResponseStream()); var liElements = htmlDocument.DocumentNode.SelectNodes("//li"); if (liElements == null) { throw new ScraperException("No li elements found."); } var uris = new List <Uri>(); foreach (var liElement in liElements) { var aElement = (from ae in liElement.ChildNodes where ae.Name == "a" select ae).FirstOrDefault(); if (aElement == null || aElement.Attributes["href"] == null) { continue; } uris.Add(new Uri(string.Format("http://www.dr.dk{0}", aElement.Attributes["href"].Value))); } if (!uris.Any()) { throw new ScraperException("No program URIs found."); } return(uris); }
public Resource Scrape(Uri resourceUri) { if (resourceUri == null) { throw new ArgumentNullException("resourceUri"); } if (!resourceUri.IsAbsoluteUri) { throw new ArgumentException("Only absolute URIs are supported.", "resourceUri"); } var request = _webRequestWrapper.CreateHttp(resourceUri); var response = request.GetResponse(); string json; using (var streamReader = new StreamReader(response.GetResponseStream())) { json = streamReader.ReadToEnd(); } return(_jsonConvertWrapper.DeserializeObject <Resource>(json)); }
public Uri Scrape(Uri programUri) { if (programUri == null) { throw new ArgumentNullException("programUri"); } if (!programUri.IsAbsoluteUri) { throw new ArgumentException("Only absolute URIs are supported.", "programUri"); } try { var request = _webRequestWrapper.CreateHttp(programUri); var response = request.GetResponse(); string html; using (var streamReader = new StreamReader(response.GetResponseStream())) { html = streamReader.ReadToEnd(); } var regex = new Regex("resource:\\s*\"(?<uri>.*)\""); var match = regex.Match(html); if (!match.Success) { throw new ScraperException("Unable to find resource."); } return(new Uri(match.Groups["uri"].Value)); } catch (WebException exception) { throw new ScraperException("Unable to find resource.", exception); } }