示例#1
0
        private void CrawlDetails(CrawledMovie movie)
        {
            string html =
                CrawlerUtility.Crawl("http://www.paradisbio.dk/" + movie.CrawledMovieUrl);

            const string detailsSelector =
                @"table#top_tabel td.forside_bg table table";
            var htmlDocument = new HtmlDocument();
            htmlDocument.LoadHtml(html);
            HtmlNode detailsNode =
                htmlDocument.DocumentNode.QuerySelectorAll(detailsSelector).Last();

            foreach (HtmlNode rowNode in detailsNode.QuerySelectorAll("tr"))
            {
                IEnumerable<HtmlNode> columnNodes = rowNode.QuerySelectorAll("td");
                string key = columnNodes.First().InnerText;
                if (key.StartsWith("Originaltitel"))
                    movie.OriginalTitle = columnNodes.Last().InnerText;
                if (key.StartsWith("Læs mere"))
                {
                    foreach (HtmlNode link in columnNodes.Last().QuerySelectorAll("a"))
                    {
                        if (link.InnerText.Contains("IMDb"))
                        {
                            string imdbUrl = link.Attributes["href"].Value;
                            movie.ImdbId = GoogleImdbCrawler.ExtractImdbId(imdbUrl);
                        }
                    }
                }
            }
        }
示例#2
0
 private void CrawlImdbId(CrawledMovie movie)
 {
     movie.ImdbId =
         _googleImdbCrawler.QueryImdbId(movie.OriginalTitle ?? movie.Title);
 }