public BasicIMDBCrawler() { this.Crawler = new Library.BasicWebCrawler("www.imdb.com", 80) { //CoralEnabled = true }; var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return new { Source, Alt, Title }; } ); this.Crawler.DataReceived += document => { var entry = new Entry(); var title = BasicElementParser.GetContent(document, "title"); var title_i = title.IndexOf("("); entry.Title = title.Substring(0, title_i).Trim(); // remove qoutes from the title entry.Title = entry.Title.Replace(""", ""); entry.Year = title.Substring(title_i + 1, title.IndexOf(")", title_i + 1) - (title_i + 1)); var poster_i = document.IndexOf("name=\"poster\""); // no poster - the poster may be found on other services if (poster_i < 0) { } else { var poster_j = document.Substring(0, poster_i).LastIndexOf("<a"); var poster_q = document.IndexOf("</a>", poster_i); var poster = ParseLink(document.Substring(poster_j, poster_q - poster_j + 4)); var poster_image = ParseImage(poster.Text); entry.MediumPosterImageProvider = "imdb"; entry.MediumPosterImage = poster_image.Source; entry.MediumPosterImagePage = poster.Link; entry.MediumPosterTitle = poster.Title; } #region UserRating var meta_tag = "<div class=\"meta\">"; var meta_i = document.IndexOf(meta_tag); if (meta_i < 0) entry.UserRating = ""; else { var meta = document.Substring(meta_i + meta_tag.Length, document.IndexOf("</div>", meta_i) - meta_i - meta_tag.Length); entry.UserRating = BasicElementParser.GetContent(meta, "b"); } #endregion #region Genres var genre_tag = "<h5>Genre:</h5>"; var genre_i = document.IndexOf(genre_tag); var genres = new List<string>(); if (genre_i < 0) { } else { var genre = document.Substring(genre_i + genre_tag.Length, document.IndexOf("</div>", genre_i) - genre_i - genre_tag.Length); BasicElementParser.Parse(genre, "a", (text, index) => { if (text == "more") return; genres.Add(text); } ); } entry.Genres = genres.ToArray(); #endregion #region Runtime var runtime_tag = "<h5>Runtime:</h5>"; if (genre_i < 0) genre_i = 0; var runtime_i = document.IndexOf(runtime_tag, genre_i); if (runtime_i < 0) entry.Runtime = ""; else { var runtime = document.Substring(runtime_i + runtime_tag.Length, document.IndexOf("</div>", runtime_i) - runtime_i - runtime_tag.Length); entry.Runtime = runtime.Trim(); } #endregion #region Tagline var Tagline_tag = "<h5>Tagline:</h5>"; var Tagline_i = document.IndexOf(Tagline_tag, genre_i); if (Tagline_i < 0) entry.Tagline = ""; else { var Tagline = document.Substring(Tagline_i + Tagline_tag.Length, document.IndexOf("<", Tagline_i + Tagline_tag.Length) - Tagline_i - Tagline_tag.Length); entry.Tagline = Tagline.Trim(); } #endregion if (AddEntry != null) AddEntry(entry); }; }
public static void ParseMovieItem(this string data, Action<MovieItem> handler) { var n = new MovieItem(); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var DefaultHeader = new { Title = "", Text = "" }; var ParseHeader = DefaultHeader.ToAnonymousConstructor( (string element) => { var Title = ""; var Text = ""; element. ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Title, Text }; } ); var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("a"); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse("img"); return new { Source, Alt, Title }; } ); data.ParseElements( (tag, index, element) => { if (tag == "h2") { var h = ParseHeader(element); var a = ParseLink(h.Text); if (a != null) { n.TorrentCommentLink = a.Link; n.SmartTitle = a.Text; } } else if (tag == "div") { var h = ParseHeader(element); if (h.Title == "raiting") n.IMDBRaiting = h.Text; else if (h.Title == "runtime") n.IMDBRuntime = h.Text; else if (h.Title == "tagline") n.IMDBTagline = h.Text; else if (h.Title == "genres") n.IMDBGenres = h.Text; else if (h.Title == "episode") n.Episode = h.Text; } else if (tag == "a") { var a = ParseLink(element); var img = ParseImage(a.Text); if (img != null) { if (a.Link.StartsWith("http://www.youtube.com")) { n.YouTubeKey = img.Alt; } else if (a.Link.StartsWith("http://www.imdb.com")) { n.IMDBLink = a.Link; n.PosterLink = img.Source; } else if (a.Link.StartsWith("http://tinyurl.com")) { n.TorrentLink = a.Link; n.TorrentName = a.Text.Substring(a.Text.IndexOf(">") + 1).Trim(); } } } } ); handler(n); }
public BasicIMDBPosterSearch() { this.Crawler = new Library.BasicWebCrawler("www.imdb.com", 80) { CoralEnabled = true }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return new { Source, Alt, Title }; } ); string location = null; this.Crawler.AllHeadersSent += () => { location = null; }; this.Crawler.LocationReceived += value => { location = value; }; this.Crawler.DataReceived += document => { if (!string.IsNullOrEmpty(location)) { var u = new Uri(location); this.Crawler.Crawl(u.PathAndQuery); return; } var poster_tag = "<table id=\"principal\">"; var poster_i = document.IndexOf(poster_tag); var poster_close_tag = "</table>"; var poster_close_i = document.IndexOf(poster_close_tag, poster_i); var poster = ParseImage( BasicElementParser.GetContent( document.Substring(poster_i, poster_close_i + poster_close_tag.Length - poster_i) , "td") ); if (this.AddEntry != null) this.AddEntry(poster.Source); }; }
public BasicPirateBayImage() { this.CrawlerDownloader = new Library.BasicWebCrawler("", 80) { //CoralEnabled = true }; this.CrawlerUploader = new Library.BasicWebCrawler("bayimg.com", 80) { Method = "POST" }; #region parser var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; var width = ""; var height = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseContent(null). Parse(); return new { Source, Alt, Title, width, height }; } ); #endregion // http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html var boundary = "---------------------------" + int.MaxValue.Random(); var current_filename = "_" + int.MaxValue.Random(); #region StreamWriter Action<StreamWriter, Stream, int, string> StreamWriter = (stream, source, sourcelength, filename) => { stream.AutoFlush = true; stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"file\"; filename=\"" + filename + "\""); stream.WriteLine("Content-Type: application/octet-stream"); stream.WriteLine(); if (source == null) { stream.BaseStream.Position += sourcelength; } else { var buffer = new byte[0x1000]; var offset = 0; var size = source.Read(buffer, 0, buffer.Length); while (size > 0) { //Console.WriteLine(new { offset, size }); stream.BaseStream.Write(buffer, 0, size); offset += size; size = source.Read(buffer, 0, buffer.Length); } } stream.WriteLine(); stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"code\""); stream.WriteLine(); stream.WriteLine("tpb"); stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"tags\""); stream.WriteLine(); stream.WriteLine(""); stream.WriteLine("--" + boundary + "--"); }; #endregion this.CrawlerDownloader.ContentLengthReceived += ContentLength => { current_filename = "_" + int.MaxValue.Random(); var value = int.Parse(ContentLength); this.CrawlerUploader.HeaderWriter += stream => { stream.WriteLine("Content-Type: multipart/form-data; boundary=" + boundary); using (var v = new StreamWriter(new VoidStream())) { StreamWriter(v, null, value, current_filename); stream.WriteLine("Content-Length: " + v.BaseStream.Position ); } }; }; this.CrawlerDownloader.StreamReader += source => { this.CrawlerUploader.StreamWriter += stream => StreamWriter(stream, source, 0, current_filename); this.CrawlerUploader.Crawl("/upload"); }; this.CrawlerUploader.DataReceived += document => { var result_tag = "<div id=\"extra2\">"; var result_i = document.IndexOf(result_tag); var result_end_tag = "<br/>"; var result_end_i = document.IndexOf(result_end_tag, result_i); var data = document.Substring(result_i + result_tag.Length, result_end_i - (result_i + result_tag.Length)).Trim(); // http://bayimg.com/image/eaofgaabg.jpg var Link = ParseLink(data); var ThumbnailImage = ParseImage(Link.Text); if (this.AddEntry != null) this.AddEntry(new Entry(Link.Link.Substring(1).ToLower())); //new IHTMLImage { Source = ImageLink, Title = imdb.SmartTitle }.ToString().ToConsole(); //Console.WriteLine(ImageHTML); //Console.WriteLine(ThumbnailImageHTML); }; }
public static void Search(string stitle, Action<AliasEntry> handler) { var c = new BasicWebCrawler("www.omdb.si", 80); var DefaultLink = new { Link = "", Title = "", Text = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); c.DataReceived += document => { var trigger_tag = "<table width=\"100%\" class=\"fW\">"; var trigger_end_tag = "</table>"; Func<int, int> scan = offset => { var trigger_i = document.IndexOf(trigger_tag, offset); if (trigger_i < 0) return offset; var tirgger_end_i = document.IndexOf(trigger_end_tag, trigger_i); if (tirgger_end_i < 0) return offset; var data = document.Substring(trigger_i + trigger_tag.Length, tirgger_end_i - trigger_i - trigger_tag.Length); /* <tr> <td class="bTl"><img alt="" src="/images/default/Ogrodje0.gif" width="10" height="10" /></td> <td class="bT"></td><td class="bTr"></td></tr> <tr> <td class="bL"></td> <td class="bM"> <div align="left"> <table width="100%" border="0" cellspacing="0" cellpadding="0"> <tr> <td width="5" rowspan="2"></td> <td width="444" align="left"> <div align="left"><a href="/index.php/ofilm/?i=401737">Lost <b>(2004)</b></a></div> </td> <td align="right"> <span class="forumozadje3"><b>8.9</b> <img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/d9.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellowEmpty.gif' class='slikevvrsti' /></span> </td></tr> <tr> <td align="left"> <span class="oddelki_forum_mala">Genre: <b>Drama, Adventure, Mystery, Thriller</b> Duration: <b>45 min</b></span> </td> <td align="right"><span class="oddelki_forum_mala">(174 votes)</span></td></tr> */ var title_tag = "<div align=\"left\">"; var title_end_tag = "</div>"; var title_i = data.IndexOf(title_tag); if (title_i < 0) return offset; title_i = data.IndexOf(title_tag, title_i + title_tag.Length); var title_end_i = data.IndexOf(title_end_tag, title_i); // Lost <b>(2004)</b> var title = ParseLink(data.Substring(title_i + title_tag.Length, title_end_i - title_i - title_tag.Length)); var genre_tag = "<span class=\"oddelki_forum_mala\">"; var genre_i = data.IndexOf(genre_tag, title_end_i); var genre_end_tag = "</span>"; var genre_end_i = data.IndexOf(genre_end_tag, genre_i); // Genre: <b>Drama, Adventure, Mystery, Thriller</b> Duration: <b>45 min</b> var genre = data.Substring(genre_i + genre_tag.Length, genre_end_i - genre_i - genre_tag.Length); var e = new AliasEntry { Genres = genre.Substring("Genre: <b>", "</b>").Split(new[] { ',' }).Trim(), Duration = genre.Substring("Duration: <b>", "</b>"), Link = "http://www.omdb.si" + title.Link, Title = title.Text.Substring(0, title.Text.IndexOf("<")), Year = title.Text.Substring("<b>", "</b>") }; handler(e); return tirgger_end_i + trigger_end_tag.Length; }; var start_tag = "<td align=\"right\" class=\"bM\">"; var start_offset = document.IndexOf(start_tag); start_offset = document.IndexOf(start_tag, start_offset); scan.ToChainedFunc((x, y) => y > x)(start_offset); }; // we will only look at the first result page c.Crawl("/index.php/odefault/search?sK=" + stitle.URLEncode()); }
public static void Search(Action<SearchEntry> Handler) { var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return new { Source, Alt, Title }; } ); var search = new BasicPirateBaySearch(); search.Loaded += ForEachEntry => { ForEachEntry( (entry, entryindex) => { var Type = ParseLink(entry.Type); var Name = ParseLink(entry.Name); var TorrentLink = DefaultLink; var Comment = DefaultImage; entry.Links.ParseElements( (tag, index, element) => { if (tag == "a") { TorrentLink = ParseLink(element); } if (tag == "img") { var img = ParseImage(element); if (img.Title.Contains("comment")) Comment = img; } } ); Handler( new SearchEntry { CommentText = Comment.Title, Size = entry.Size, Seeders = entry.Seeders, Leechers = entry.Leechers, Name = Name.Text, Link = Name.Link, TorrentLink = TorrentLink.Link } ); } ); }; search.Crawler.Crawl("/top/200"); }
public static void Search(string title, Action<AliasEntry> handler) { var t = new Uri("http://www.movieposterdb.com/browse/search?search_type=movies&title="); var c = new BasicWebCrawler(t.Host, 80); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultSpan = new { Text = "", Title = "" }; var ParseSpan = DefaultSpan.ToAnonymousConstructor( (string element) => { var Text = ""; var Title = ""; element. ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("span"); return new { Text, Title }; } ); var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("a"); return new { Link, Title, Text }; } ); c.DataReceived += document => { var trigger = "Movies</h3>"; var trigger_i = document.IndexOf(trigger); var data = BasicElementParser.GetContent(document.Substring(trigger_i), "table"); BasicElementParser.Parse(data, "tr", (element, index) => { /* <td valign="middle" style="font-size: 0pt; border-bottom: 1px solid #D2D2D2; height: 54px; width: 44px;"> <img src="http://www.movieposterdb.com/posters/08_09/2008/1179855/m_1179855_4fb9999f.jpg" style="margin-right: 8px; padding: 2px; border: 1px solid #D2D2D2; float: left;" /> </td> <td valign="middle" style="border-bottom: 1px solid #D2D2D2; width: 60%;"> <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b> </td> <td style="border-bottom: 1px solid #D2D2D2; font-size: 8pt; color: #808080;"> </td> */ BasicElementParser.Parse(element, "td", (tdelement, tdindex) => { if (tdindex == 1) { // <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b> var _title = ParseLink(tdelement); var _year = ParseSpan(tdelement); handler( new AliasEntry { Link = _title.Link, Title = _title.Text, Year = _year.Text } ); } } ); } ); }; c.Crawl(t.PathAndQuery + title.URLEncode()); }
public BasicIMDBAliasSearch() { var c = new BasicWebCrawler(Host, 80) { // doesnt seem to respond at 2009.03.18 //CoralEnabled = true }; this.Crawler = c; var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; var width = ""; var height = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseContent(null). Parse(); return new { Source, Alt, Title, width, height }; } ); var EntryIndex = -1; #region AddItem Action<string, string> AddItem = (ImageElement, Content) => { var ImageSource = ""; if (ImageElement.StartsWith("<a")) { var ImageLink = ParseLink(ImageElement); var Image = ParseImage(ImageLink.Text); ImageSource = Image.Source; } /* * <img src="/images/b.gif" width="1" height="6"><br> * <a href="/title/tt0397892/" onclick="(new Image()).src='/rg/find-title-1/title_popular/images/b.gif?link=/title/tt0397892/';">Bolt</a> (2008) <br> *  aka <em>"Bolt - Pes pro kazdý prípad"</em> - Czech Republic<br> *  aka <em>"Bolt - Un perro fuera de serie 3D"</em> - Chile<br> *  aka <em>"Bolt - Supercão"</em> - Brazil<br> *  aka <em>"Bolt - Un perro fuera de serie"</em> - Argentina, Mexico<br> *  aka <em>"Bolt - Ein Hund für alle Fälle"</em> - Germany */ var ContentLink_start = Content.IndexOf("<a"); var ContentLink_end = Content.IndexOf("</a>"); var ContentLink = ParseLink(Content.Substring(ContentLink_start, ContentLink_end - ContentLink_start + 4)); var Details = Content.Substring(ContentLink_end + 4); var ReleaseDate = ""; var Alias = default(AlsoKnownAs); Details.Split("<br>", (text, index) => { if (index == 0) { ReleaseDate = text; return; } Alias = new AlsoKnownAs { Text = text, Alias = Alias }; } ); EntryIndex++; if (this.AddEntry != null) { this.AddEntry( new Entry("http://" + Host + ContentLink.Link) { OptionalAlias = Alias, OptionalReleaseDate = ReleaseDate, OptionalTitle = ContentLink.Text, OptionalImage = ImageSource }, EntryIndex ); } }; #endregion // http://www.imdb.com/find?s=tt;site=aka;q=The%20Dark%20Knight //const string Header_Location = "Location: "; string Redirect = null; // Location: http://www.imdb.com/title/tt1129442/ c.LocationReceived += href => { //Console.WriteLine("LocationReceived."); Redirect = href; }; c.DataReceivedWithTimeSpan += (document, elapsed) => { //Console.WriteLine("DataReceivedWithTimeSpan."); #region redirect if (!string.IsNullOrEmpty(Redirect)) { EntryIndex++; if (this.AddEntry != null) { this.AddEntry( new Entry(Redirect) { }, EntryIndex ); } return; } #endregion var approx_section = document.IndexOf("<b>Titles (Approx Matches)</b>"); var exact_section = document.IndexOf("<b>Titles (Exact Matches)</b>"); var popular_section = document.IndexOf("<b>Popular Titles</b>"); var first_section = popular_section; if (first_section < 0) first_section = exact_section; if (first_section < 0) first_section = approx_section; if (first_section < 0) return; var section_start = document.IndexOf("<table>", first_section); var section_end = document.IndexOf("</table>", section_start); var section = document.Substring(section_start, section_end - section_start + 8); BasicElementParser.Parse(section, "tr", (tr, tr_index) => { /* <td valign="top"> <a href="/title/tt0397892/" onClick="(new Image()).src='/rg/find-tiny-photo-1/title_popular/images/b.gif?link=/title/tt0397892/';"><img src="http://ia.media-imdb.com/images/M/MV5BNDQyNDE5NjQ1N15BMl5BanBnXkFtZTcwMDExMTAwMg@@._V1._SY30_SX23_.jpg" width="23" height="32" border="0"></a> </td> <td align="right" valign="top"><img src="/images/b.gif" width="1" height="6"><br>1.</td> <td valign="top"><img src="/images/b.gif" width="1" height="6"><br><a href="/title/tt0397892/" onclick="(new Image()).src='/rg/find-title-1/title_popular/images/b.gif?link=/title/tt0397892/';">Bolt</a> (2008) <br> aka <em>"Bolt - Pes pro kazdý prípad"</em> - Czech Republic<br> aka <em>"Bolt - Un perro fuera de serie 3D"</em> - Chile<br> aka <em>"Bolt - Supercão"</em> - Brazil<br> aka <em>"Bolt - Un perro fuera de serie"</em> - Argentina, Mexico<br> aka <em>"Bolt - Ein Hund für alle Fälle"</em> - Germany </td> * */ var Image = ""; var Content = ""; BasicElementParser.Parse(tr, "td", (td, td_index) => { if (td_index == 0) Image = td; if (td_index == 2) Content = td; } ); AddItem(Image, Content); } ); }; //c.Crawl("/find?s=tt;site=aka;q=" + "The Dark Knight".URLEncode()); //c.Crawl("/find?s=tt;site=aka;q=" + "Bolt".URLEncode()); }
private static void DemoGetPosterViaTinEyeAndStreamIt(string MovieTitle) { Console.WriteLine(); Console.WriteLine(MovieTitle); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; var width = ""; var height = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseContent(null). Parse(); return new { Source, Alt, Title, width, height }; } ); BasicIMDBAliasSearch.Search(MovieTitle, (e, i) => { Console.WriteLine("BasicIMDBAliasSearch"); if (i > 0) return; BasicIMDBCrawler.Search(e.Key, imdb => { Console.WriteLine("BasicIMDBCrawler"); BasicTinEyeSearch.Search(imdb.MediumPosterImage, tineye => { Console.WriteLine("BasicTinEyeSearch"); // yay, we have the thumbnail, get it #region downloader var downloader = new BasicWebCrawler(tineye.QueryLink.ToUri().Host, 80); var uploader = new BasicWebCrawler("bayimg.com", 80) { Method = "POST" }; // http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html var boundary = "---------------------------" + int.MaxValue.Random(); #region StreamWriter Action<StreamWriter, Stream, int> StreamWriter = (stream, source, sourcelength) => { stream.AutoFlush = true; stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"file\"; filename=\"" + tineye.Hash + "\""); stream.WriteLine("Content-Type: application/octet-stream"); stream.WriteLine(); if (source == null) { stream.BaseStream.Position += sourcelength; } else { var buffer = new byte[0x1000]; var offset = 0; var size = source.Read(buffer, 0, buffer.Length); while (size > 0) { stream.BaseStream.Write(buffer, 0, size); offset += size; size = source.Read(buffer, 0, buffer.Length); } } stream.WriteLine(); stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"code\""); stream.WriteLine(); stream.WriteLine("tpb"); stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"tags\""); stream.WriteLine(); stream.WriteLine(""); stream.WriteLine("--" + boundary + "--"); }; #endregion downloader.ContentLengthReceived += ContentLength => { var value = int.Parse(ContentLength); uploader.HeaderWriter += stream => { stream.WriteLine("Content-Type: multipart/form-data; boundary=" + boundary); using (var v = new StreamWriter(new VoidStream())) { StreamWriter(v, null, value); stream.WriteLine("Content-Length: " + v.BaseStream.Position ); } }; }; downloader.StreamReader += source => { uploader.StreamWriter += stream => StreamWriter(stream, source, 0); uploader.Crawl("/upload"); }; uploader.DataReceived += document => { var result_tag = "<div id=\"extra2\">"; var result_i = document.IndexOf(result_tag); var result_end_tag = "<br/>"; var result_end_i = document.IndexOf(result_end_tag, result_i); var data = document.Substring(result_i + result_tag.Length, result_end_i - (result_i + result_tag.Length)).Trim(); // http://bayimg.com/image/eaofgaabg.jpg var Link = ParseLink(data); var ThumbnailImage = ParseImage(Link.Text); var ImageLink = "http://bayimg.com/image" + Link.Link.ToLower() + ".jpg"; var ImageHTML = ImageLink.ToImage(); var ThumbnailImageLink = "http://bayimg.com/thumb" + Link.Link.ToLower() + ".jpg"; var ThumbnailImageHTML = ThumbnailImageLink.ToImage(); new IHTMLImage { src = ImageLink, title = imdb.SmartTitle }.ToString().ToConsole(); //Console.WriteLine(ImageHTML); //Console.WriteLine(ThumbnailImageHTML); }; downloader.Crawl(tineye.QueryLink.ToUri().PathAndQuery); #endregion } ); } ); } ); }
private static void ShowPirateBayWithVideo() { Console.WriteLine("<style>"); Console.WriteLine("img { border: 0; }"); //Console.WriteLine("ol { -moz-column-count: 2; }"); Console.WriteLine(@" embed { width: 100%; height: 100%; position: absolute; left: 0; top: 0; z-index: 0; } ol { display: block; width: 100%; height: 100%; position: absolute; left: 0; top: 0; z-index: 1; overflow: scroll; } li { } body{ text-align: center; font-family:Verdana, Arial, Helvetica, sans-serif; font-size:.7em; margin: 10px; color: #fff; background: #000; min-width: 520px; overflow: hidden; } a{ color: #009; text-decoration: none; border-bottom: 1px dotted #4040D9; } a:hover{ text-decoration: none; border-bottom: 1px solid #009; } li { text-align: left; margin: 1em;} "); Console.WriteLine("</style>"); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return new { Source, Alt, Title }; } ); Action<string, BasicIMDBAliasSearch.Entry> SearchPoster = (Title, e) => { //Console.WriteLine(e.Link); Native.API.set_time_limit(20); var Link = new Uri(e.Link); var Segments = Link.Segments; var Key = Segments[2]; Key = Key.Substring(0, Key.Length - 1); //Console.WriteLine(Key); BasicIMDBCrawler.Search(Key, k => { IHTMLImage Image = k.MediumPosterImageCoralCache.OriginalString; new IHTMLAnchor { Title = k.SmartTitle, URL = Link.ToCoralCache().WithoutQuery().OriginalString, innerHTML = Image.ToString() }.ToString().ToConsole(); Native.API.set_time_limit(20); BasicIMDBPosterSearch.Search( k.MediumPosterImagePage, LargePosterImage => { var ur = new Uri(LargePosterImage); IHTMLImage LargeImage = ur.ToCoralCache().OriginalString; new IHTMLAnchor { Title = k.SmartTitle, URL = Link.ToCoralCache().WithoutQuery().OriginalString, innerHTML = LargeImage.ToString() }.ToString().ToConsole(); } ); } ); }; var search = new BasicPirateBaySearch(); search.Loaded += ForEachEntry => { //Console.WriteLine("<hr />"); //var logo = "http://static.thepiratebay.org/img/tpblogo_sm_ny.gif"; //Console.WriteLine(logo.ToImage().ToLink("http://tineye.com/search?url=" + logo)); // http://code.google.com/apis/youtube/chromeless_example_1.html Console.WriteLine("<embed wmode='transparent' id='tv' src='http://www.youtube.com/apiplayer?enablejsapi=1&playerapiid=tv' allowScriptAccess='always' width='400' height='300' />"); //Console.WriteLine("<h2>Top Movies</h2>"); Console.WriteLine("<ol>"); ForEachEntry( (entry, entryindex) => { Native.API.set_time_limit(20); //if (entryindex > 2) // return; var Type = ParseLink(entry.Type); var Name = ParseLink(entry.Name); var SmartName = new BasicFileNameParser(Name.Text); var MovieInfo = default(BasicIMDBAliasSearch.Entry); BasicIMDBAliasSearch.Search(SmartName.Title, (e, index) => { if (MovieInfo == null) MovieInfo = e; } ); var c = new BasicGoogleVideoCrawler(); var Video = ""; var VideoSource = ""; c.VideoSourceFound += (video, src) => { Video = video; VideoSource = src; }; Native.API.set_time_limit(16); //Thread.Sleep(1500); c.Search(SmartName.Title + " trailer"); Console.WriteLine("<li>"); new IHTMLButton { //onclick = "if (getElementById(\"tv\").getPlayerState() != 1) getElementById(\"tv\").loadVideoById(\"" + Video + "\")", onclick = "getElementById(\"tv\").loadVideoById(\"" + Video + "\")", innerHTML = "View Trailer" }.ToString().ToConsole(); Console.WriteLine("<span style='background: white; color: black;'>"); if (MovieInfo != null) { SearchPoster(SmartName.Title, MovieInfo); //if (MovieInfo.Image != null) // Console.WriteLine(MovieInfo.Image.ToImage().ToLink(MovieInfo.Link)); Console.WriteLine("<b>" + SmartName.Title.ToLink(MovieInfo.Link) + "</b>"); } else { Console.WriteLine("<b>" + SmartName.Title.ToLink(k => "http://www.imdb.com/find?s=tt;site=aka;q=" + k) + "</b>"); } if (!string.IsNullOrEmpty(SmartName.Season)) { Console.WriteLine(" | Season <i>" + SmartName.Season + "</i>"); } if (!string.IsNullOrEmpty(SmartName.Episode)) { Console.WriteLine(" | Episode <i>" + SmartName.Episode + "</i>"); } if (!string.IsNullOrEmpty(SmartName.SubTitle)) { Console.WriteLine(" | <b>" + SmartName.SubTitle + "</b>"); } if (!string.IsNullOrEmpty(SmartName.Year)) { Console.WriteLine(" | <i>" + SmartName.Year + "</i>"); } Console.WriteLine(" | "); Console.WriteLine("<b>"); Console.WriteLine("trailer".ToLink(VideoSource, Video)); Console.WriteLine("</b>"); Console.WriteLine("<br />"); Console.WriteLine("<small>"); Console.WriteLine(SmartName.ColoredText.ToString().ToLink("http://thepiratebay.org" + Name.Link) + "<br />"); Console.WriteLine(Type.Text.ToLink("http://thepiratebay.org" + Type.Link)); entry.Links.ParseElements( (tag, index, element) => { if (tag == "a") { var a = ParseLink(element); Console.WriteLine(" | " + "torrent".ToLink(a.Link)); } if (tag == "img") { var img = ParseImage(element); if (img.Title.Contains("comment")) { Console.WriteLine(" | " + img.Title.ToLink("http://thepiratebay.org" + Name.Link)); } else { Console.WriteLine(" | " + img.Title); } } } ); Console.WriteLine(" | " + entry.Size); Console.WriteLine(" | " + entry.Seeders); Console.WriteLine(" | " + entry.Leechers + "<br />"); Console.WriteLine("</small>"); Console.WriteLine("</span>"); //Console.WriteLine("</div>"); Console.WriteLine("</li>"); } ); Console.WriteLine("</ol>"); }; search.Crawler.Crawl("/top/200"); }