public BasicIMDBPosterSearch() { this.Crawler = new Library.BasicWebCrawler("www.imdb.com", 80) { CoralEnabled = true }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return(new { Source, Alt, Title }); } ); string location = null; this.Crawler.AllHeadersSent += () => { location = null; }; this.Crawler.LocationReceived += value => { location = value; }; this.Crawler.DataReceived += document => { if (!string.IsNullOrEmpty(location)) { var u = new Uri(location); this.Crawler.Crawl(u.PathAndQuery); return; } var poster_tag = "<table id=\"principal\">"; var poster_i = document.IndexOf(poster_tag); var poster_close_tag = "</table>"; var poster_close_i = document.IndexOf(poster_close_tag, poster_i); var poster = ParseImage( BasicElementParser.GetContent( document.Substring(poster_i, poster_close_i + poster_close_tag.Length - poster_i) , "td") ); if (this.AddEntry != null) { this.AddEntry(poster.Source); } }; }
public static void Search(string title, Action <AliasEntry> handler) { var t = new Uri("http://www.movieposterdb.com/browse/search?search_type=movies&title="); var c = new BasicWebCrawler(t.Host, 80); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultSpan = new { Text = "", Title = "" }; var ParseSpan = DefaultSpan.ToAnonymousConstructor( (string element) => { var Text = ""; var Title = ""; element. ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("span"); return(new { Text, Title }); } ); var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("a"); return(new { Link, Title, Text }); } ); c.DataReceived += document => { var trigger = "Movies</h3>"; var trigger_i = document.IndexOf(trigger); var data = BasicElementParser.GetContent(document.Substring(trigger_i), "table"); BasicElementParser.Parse(data, "tr", (element, index) => { /* * <td valign="middle" style="font-size: 0pt; border-bottom: 1px solid #D2D2D2; height: 54px; width: 44px;"> * <img src="http://www.movieposterdb.com/posters/08_09/2008/1179855/m_1179855_4fb9999f.jpg" style="margin-right: 8px; padding: 2px; border: 1px solid #D2D2D2; float: left;" /> * </td> * <td valign="middle" style="border-bottom: 1px solid #D2D2D2; width: 60%;"> * <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b> * </td> * <td style="border-bottom: 1px solid #D2D2D2; font-size: 8pt; color: #808080;"> * * </td> */ BasicElementParser.Parse(element, "td", (tdelement, tdindex) => { if (tdindex == 1) { // <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b> var _title = ParseLink(tdelement); var _year = ParseSpan(tdelement); handler( new AliasEntry { Link = _title.Link, Title = _title.Text, Year = _year.Text } ); } } ); } ); }; c.Crawl(t.PathAndQuery + title.URLEncode()); }
public BasicIMDBCrawler() { this.Crawler = new Library.BasicWebCrawler("www.imdb.com", 80) { //CoralEnabled = true }; var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return(new { Link, Title, Text }); } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return(new { Source, Alt, Title }); } ); this.Crawler.DataReceived += document => { var entry = new Entry(); var title = BasicElementParser.GetContent(document, "title"); var title_i = title.IndexOf("("); entry.Title = title.Substring(0, title_i).Trim(); // remove qoutes from the title entry.Title = entry.Title.Replace(""", ""); entry.Year = title.Substring(title_i + 1, title.IndexOf(")", title_i + 1) - (title_i + 1)); var poster_i = document.IndexOf("name=\"poster\""); // no poster - the poster may be found on other services if (poster_i < 0) { } else { var poster_j = document.Substring(0, poster_i).LastIndexOf("<a"); var poster_q = document.IndexOf("</a>", poster_i); var poster = ParseLink(document.Substring(poster_j, poster_q - poster_j + 4)); var poster_image = ParseImage(poster.Text); entry.MediumPosterImageProvider = "imdb"; entry.MediumPosterImage = poster_image.Source; entry.MediumPosterImagePage = poster.Link; entry.MediumPosterTitle = poster.Title; } #region UserRating var meta_tag = "<div class=\"meta\">"; var meta_i = document.IndexOf(meta_tag); if (meta_i < 0) { entry.UserRating = ""; } else { var meta = document.Substring(meta_i + meta_tag.Length, document.IndexOf("</div>", meta_i) - meta_i - meta_tag.Length); entry.UserRating = BasicElementParser.GetContent(meta, "b"); } #endregion #region Genres var genre_tag = "<h5>Genre:</h5>"; var genre_i = document.IndexOf(genre_tag); var genres = new List <string>(); if (genre_i < 0) { } else { var genre = document.Substring(genre_i + genre_tag.Length, document.IndexOf("</div>", genre_i) - genre_i - genre_tag.Length); BasicElementParser.Parse(genre, "a", (text, index) => { if (text == "more") { return; } genres.Add(text); } ); } entry.Genres = genres.ToArray(); #endregion #region Runtime var runtime_tag = "<h5>Runtime:</h5>"; if (genre_i < 0) { genre_i = 0; } var runtime_i = document.IndexOf(runtime_tag, genre_i); if (runtime_i < 0) { entry.Runtime = ""; } else { var runtime = document.Substring(runtime_i + runtime_tag.Length, document.IndexOf("</div>", runtime_i) - runtime_i - runtime_tag.Length); entry.Runtime = runtime.Trim(); } #endregion #region Tagline var Tagline_tag = "<h5>Tagline:</h5>"; var Tagline_i = document.IndexOf(Tagline_tag, genre_i); if (Tagline_i < 0) { entry.Tagline = ""; } else { var Tagline = document.Substring(Tagline_i + Tagline_tag.Length, document.IndexOf("<", Tagline_i + Tagline_tag.Length) - Tagline_i - Tagline_tag.Length); entry.Tagline = Tagline.Trim(); } #endregion if (AddEntry != null) { AddEntry(entry); } }; }
public BasicTinyURLCrawler() { this.Crawler = new Library.BasicWebCrawler("tinyurl.com", 80) { //CoralEnabled = true }; this.Crawler.DataReceived += document => { var entry = new Entry(); if (APIMode) { entry.Alias = document; } else { var trigger = "<h1>TinyURL was created!</h1>"; var trigger_i = document.IndexOf(trigger); if (trigger_i < 0) { return; } // we are still in the business... //<h1>TinyURL was created!</h1> //<p>The following URL: //<blockquote><b>http://thepiratebay.org<br /> //</b></blockquote> //has a length of 23 characters and resulted in the following TinyURL which has a length of 24 characters: //<blockquote><b>http://tinyurl.com/5umsn</b><br><small>[<a href="http://tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small></blockquote> //Or, give your recipients confidence with a preview TinyURL: //<blockquote><b>http://preview.tinyurl.com/5umsn</b><br><small>[<a href="http://preview.tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small> //</blockquote> //</p> var start_tag = "<p>"; var start_i = document.IndexOf(start_tag, trigger_i); if (start_i < 0) { return; } var end_tag = "</p>"; var end_i = document.IndexOf(end_tag, start_i); var data = document.Substring(start_i + start_tag.Length, end_i - start_i + start_tag.Length); BasicElementParser.Parse(data, "blockquote", (value, index) => { if (index == 0) { entry.URL = BasicElementParser.GetContent(value, "b"); var br_tag = "<br />"; var br_i = entry.URL.IndexOf(br_tag); if (br_i >= 0) { entry.URL = entry.URL.Substring(0, br_i); } return; } if (index == 1) { entry.Alias = BasicElementParser.GetContent(value, "b"); return; } } ); } if (this.AddEntry != null) { this.AddEntry(entry); } }; }