Ejemplo n.º 1
0
        public Slug Scrape(Uri programUri)
        {
            if (programUri == null)
            {
                throw new ArgumentNullException("programUri");
            }
            if (!programUri.IsAbsoluteUri)
            {
                throw new ArgumentException("Only absolute URIs are supported.", "programUri");
            }

            var request  = _webRequestWrapper.CreateHttp(programUri);
            var response = request.GetResponse();

            string html;

            using (var streamReader = new StreamReader(response.GetResponseStream()))
            {
                html = streamReader.ReadToEnd();
            }

            var regex = new Regex("programSerieSlug:\\s*\"(?<slug>.*)\"");
            var match = regex.Match(html);

            if (!match.Success)
            {
                throw new ScraperException("Unable to find programSerieSlug.");
            }

            return(match.Groups["slug"].Value);
        }
Ejemplo n.º 2
0
        public IEnumerable <Uri> Scrape(Slug slug)
        {
            if (slug == null)
            {
                throw new ArgumentNullException("slug");
            }

            var episodesUri = new Uri(string.Format("http://www.dr.dk/TV/play/AllEpisodes?slug={0}&episodesperpage=100&pagenumber=1", slug));
            var request     = _webRequestWrapper.CreateHttp(episodesUri);
            var response    = request.GetResponse();

            var htmlDocument = new HtmlDocument();

            htmlDocument.Load(response.GetResponseStream());

            var liElements = htmlDocument.DocumentNode.SelectNodes("//li");

            if (liElements == null)
            {
                throw new ScraperException("No li elements found.");
            }

            var uris = new List <Uri>();

            foreach (var liElement in liElements)
            {
                var aElement = (from ae in liElement.ChildNodes
                                where ae.Name == "a"
                                select ae).FirstOrDefault();
                if (aElement == null || aElement.Attributes["href"] == null)
                {
                    continue;
                }

                uris.Add(new Uri(string.Format("http://www.dr.dk{0}", aElement.Attributes["href"].Value)));
            }

            if (!uris.Any())
            {
                throw new ScraperException("No program URIs found.");
            }

            return(uris);
        }
Ejemplo n.º 3
0
        public Resource Scrape(Uri resourceUri)
        {
            if (resourceUri == null)
            {
                throw new ArgumentNullException("resourceUri");
            }
            if (!resourceUri.IsAbsoluteUri)
            {
                throw new ArgumentException("Only absolute URIs are supported.", "resourceUri");
            }

            var request  = _webRequestWrapper.CreateHttp(resourceUri);
            var response = request.GetResponse();

            string json;

            using (var streamReader = new StreamReader(response.GetResponseStream()))
            {
                json = streamReader.ReadToEnd();
            }

            return(_jsonConvertWrapper.DeserializeObject <Resource>(json));
        }
Ejemplo n.º 4
0
        public Uri Scrape(Uri programUri)
        {
            if (programUri == null)
            {
                throw new ArgumentNullException("programUri");
            }
            if (!programUri.IsAbsoluteUri)
            {
                throw new ArgumentException("Only absolute URIs are supported.", "programUri");
            }

            try
            {
                var request  = _webRequestWrapper.CreateHttp(programUri);
                var response = request.GetResponse();

                string html;
                using (var streamReader = new StreamReader(response.GetResponseStream()))
                {
                    html = streamReader.ReadToEnd();
                }

                var regex = new Regex("resource:\\s*\"(?<uri>.*)\"");
                var match = regex.Match(html);
                if (!match.Success)
                {
                    throw new ScraperException("Unable to find resource.");
                }

                return(new Uri(match.Groups["uri"].Value));
            }
            catch (WebException exception)
            {
                throw new ScraperException("Unable to find resource.", exception);
            }
        }