public void GetPoster(Action <string> handler) { var uri = Link.ToUri(); var c = new BasicWebCrawler(uri.Host, 80); c.DataReceived += document => { var prefix = "http://www.movieposterdb.com/posters/"; var trigger = "<img src=\"" + prefix; var trigger_i = document.IndexOf(trigger); if (trigger_i < 0) { return; } var end_i = document.IndexOf("\"", trigger_i + trigger.Length); var data = prefix + document.Substring(trigger_i + trigger.Length, end_i - (trigger_i + trigger.Length)); handler(data); }; c.Crawl(uri.PathAndQuery); }
public BasicTinEyeSearch() { this.Crawler = new Library.BasicWebCrawler("tineye.com", 80) { //CoralEnabled = true, Method = "HEAD" }; this.Crawler.LocationReceived += value => { var tag = "/search/"; var i = value.LastIndexOf(tag); if (i < 0) { return; } var hash = value.Substring(i + tag.Length); var n = new Entry { Hash = hash, }; if (AddEntry != null) { AddEntry(n); } }; }
public static void Spawn(string url, Action <string> handler) { var Hosts = new[] { "xn--hgi.ws", "xn--ogi.ws", "xn--vgi.ws", "xn--3fi.ws", "xn--egi.ws", "xn--9gi.ws", "xn--5gi.ws", "xn--1ci.ws", "xn--odi.ws", "xn--rei.ws", "xn--cwg.ws", "ta.gd", }; var Host = Hosts[url.XorBytes() % Hosts.Length]; var c = new BasicWebCrawler("tinyarro.ws", 80); c.Buffer = new byte[100]; c.BinaryDataReceived += data => { var Target = "http://"; int i = Target.Length; for (; i < data.Length; i++) { if (data[i] == '/') { i++; break; } } Target += Host + "/"; for (; i < data.Length; i++) { Target += "%" + data[i].ToHexString(); } handler(Target); }; c.Crawl("/api-create.php?host=" + Host + "&url=" + url); }
public static byte[] ToBytes(Uri uri) { var x = default(byte[]); BasicTinEyeSearch.Search(uri.ToString(), e => { x = BasicWebCrawler.ToBytes(new Uri(e.QueryLink)); } ); return(x); }
static void Main(string[] args) { var hosts = new[] { "zmovies.tk" /*, "zmoviez.tk" */ }; while (true) { foreach (var h in hosts) { var c = new BasicWebCrawler(h, 80); c.DataReceived += document => { var trigger = "<frame src=\""; var i = document.IndexOf(trigger); var j = document.IndexOf("\"", i + trigger.Length); var data = document.Substring(i + trigger.Length, j - i - trigger.Length); var gmoduleprefix = "http://www.gmodules.com/ig/ifr?url="; if (data.StartsWith(gmoduleprefix)) { var module = data.Substring(gmoduleprefix.Length); if (module.StartsWith("http://zproxy.planet.ee")) { Console.ForegroundColor = ConsoleColor.Yellow; } else { Console.ForegroundColor = ConsoleColor.Green; } Console.WriteLine(DateTime.Now.ToString() + " " + h + " : " + module); } else { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(DateTime.Now.ToString() + " " + h + " : " + data); } }; c.Crawl("/"); } Thread.Sleep(15000); } }
// http://msdn.microsoft.com/en-us/library/aa373347(VS.85).aspx static void Main(string[] args) { var u = new Uri("http://zproxy.planet.ee/zmovies/server/tasks/Scheduler/Counter"); var x = 0; var skip = 5; while (true) { var c = new BasicWebCrawler(u.Host, 80); c.DataReceived += document => { var n = int.Parse(document); if (x > 0) { if (x < n) { if (x < (n - skip)) { Console.ForegroundColor = ConsoleColor.Yellow; } else { Console.ForegroundColor = ConsoleColor.Green; } } else { Console.ForegroundColor = ConsoleColor.Red; } Console.WriteLine(n); } x = n; }; c.Crawl(u.PathAndQuery); Thread.Sleep(5500 * skip); } }
public static string ToWebString(this string e) { var value = ""; var u = new Uri(e); var c = new BasicWebCrawler(u.Host, 80); c.DataReceived += document => { value = document; }; c.Crawl(u.PathAndQuery); return(value); }
public static void Search(string url, Action <string> handler) { var c = new BasicWebCrawler("kej.tw", 80); c.DataReceived += document => { var trigger_tag = "<textarea id=\"outputfield\">"; var trigger_i = document.IndexOf(trigger_tag); var trigger_end_tag = "</textarea>"; var trigger_end_i = document.IndexOf(trigger_end_tag, trigger_i + trigger_tag.Length); var data = document.Substring(trigger_i + trigger_tag.Length, trigger_end_i - trigger_i - trigger_tag.Length); handler(data); }; c.Crawl("/flvretriever/?videoUrl=" + url); }
public BasicIMDBAliasSearch() { var c = new BasicWebCrawler(Host, 80) { // doesnt seem to respond at 2009.03.18 //CoralEnabled = true }; this.Crawler = c; var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return(new { Link, Title, Text }); } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; var width = ""; var height = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseContent(null). Parse(); return(new { Source, Alt, Title, width, height }); } ); var EntryIndex = -1; #region AddItem Action <string, string> AddItem = (ImageElement, Content) => { var ImageSource = ""; if (ImageElement.StartsWith("<a")) { var ImageLink = ParseLink(ImageElement); var Image = ParseImage(ImageLink.Text); ImageSource = Image.Source; } /* * <img src="/images/b.gif" width="1" height="6"><br> * <a href="/title/tt0397892/" onclick="(new Image()).src='/rg/find-title-1/title_popular/images/b.gif?link=/title/tt0397892/';">Bolt</a> (2008) <br> *  aka <em>"Bolt - Pes pro kazdý prípad"</em> - Czech Republic<br> *  aka <em>"Bolt - Un perro fuera de serie 3D"</em> - Chile<br> *  aka <em>"Bolt - Supercão"</em> - Brazil<br> *  aka <em>"Bolt - Un perro fuera de serie"</em> - Argentina, Mexico<br> *  aka <em>"Bolt - Ein Hund für alle Fälle"</em> - Germany */ var ContentLink_start = Content.IndexOf("<a"); var ContentLink_end = Content.IndexOf("</a>"); var ContentLink = ParseLink(Content.Substring(ContentLink_start, ContentLink_end - ContentLink_start + 4)); var Details = Content.Substring(ContentLink_end + 4); var ReleaseDate = ""; var Alias = default(AlsoKnownAs); Details.Split("<br>", (text, index) => { if (index == 0) { ReleaseDate = text; return; } Alias = new AlsoKnownAs { Text = text, Alias = Alias }; } ); EntryIndex++; if (this.AddEntry != null) { this.AddEntry( new Entry("http://" + Host + ContentLink.Link) { OptionalAlias = Alias, OptionalReleaseDate = ReleaseDate, OptionalTitle = ContentLink.Text, OptionalImage = ImageSource }, EntryIndex ); } }; #endregion // http://www.imdb.com/find?s=tt;site=aka;q=The%20Dark%20Knight //const string Header_Location = "Location: "; string Redirect = null; // Location: http://www.imdb.com/title/tt1129442/ c.LocationReceived += href => { //Console.WriteLine("LocationReceived."); Redirect = href; }; c.DataReceivedWithTimeSpan += (document, elapsed) => { //Console.WriteLine("DataReceivedWithTimeSpan."); #region redirect if (!string.IsNullOrEmpty(Redirect)) { EntryIndex++; if (this.AddEntry != null) { this.AddEntry( new Entry(Redirect) { }, EntryIndex ); } return; } #endregion var approx_section = document.IndexOf("<b>Titles (Approx Matches)</b>"); var exact_section = document.IndexOf("<b>Titles (Exact Matches)</b>"); var popular_section = document.IndexOf("<b>Popular Titles</b>"); var first_section = popular_section; if (first_section < 0) { first_section = exact_section; } if (first_section < 0) { first_section = approx_section; } if (first_section < 0) { return; } var section_start = document.IndexOf("<table>", first_section); var section_end = document.IndexOf("</table>", section_start); var section = document.Substring(section_start, section_end - section_start + 8); BasicElementParser.Parse(section, "tr", (tr, tr_index) => { /* * <td valign="top"> * <a href="/title/tt0397892/" onClick="(new Image()).src='/rg/find-tiny-photo-1/title_popular/images/b.gif?link=/title/tt0397892/';"><img src="http://ia.media-imdb.com/images/M/MV5BNDQyNDE5NjQ1N15BMl5BanBnXkFtZTcwMDExMTAwMg@@._V1._SY30_SX23_.jpg" width="23" height="32" border="0"></a> </td> * <td align="right" valign="top"><img src="/images/b.gif" width="1" height="6"><br>1.</td> * <td valign="top"><img src="/images/b.gif" width="1" height="6"><br><a href="/title/tt0397892/" onclick="(new Image()).src='/rg/find-title-1/title_popular/images/b.gif?link=/title/tt0397892/';">Bolt</a> (2008) <br> aka <em>"Bolt - Pes pro kazdý prípad"</em> - Czech Republic<br> aka <em>"Bolt - Un perro fuera de serie 3D"</em> - Chile<br> aka <em>"Bolt - Supercão"</em> - Brazil<br> aka <em>"Bolt - Un perro fuera de serie"</em> - Argentina, Mexico<br> aka <em>"Bolt - Ein Hund für alle Fälle"</em> - Germany </td> * */ var Image = ""; var Content = ""; BasicElementParser.Parse(tr, "td", (td, td_index) => { if (td_index == 0) { Image = td; } if (td_index == 2) { Content = td; } } ); AddItem(Image, Content); } ); }; //c.Crawl("/find?s=tt;site=aka;q=" + "The Dark Knight".URLEncode()); //c.Crawl("/find?s=tt;site=aka;q=" + "Bolt".URLEncode()); }
public BasicPirateBaySearch() { this.Crawler = new BasicWebCrawler("thepiratebay.org", 80); this.Crawler.DataReceived += document => { var results = document.IndexOf("<table id=\"searchResult\">"); var headend = document.IndexOf("</thead>", results); var results_end = document.IndexOf("</table>", headend); int entryindex = -1; Action <Action <Entry, int> > ForEachEntry = AddEntry => { #region ScanSingleResultOrReturn Func <int, int> ScanSingleResultOrReturn = offset => { var itemstart = document.IndexOf("<tr>", offset); if (itemstart < 0) { return(offset); } if (itemstart > results_end) { return(offset); } var itemend = document.IndexOf("</tr>", itemstart); if (itemend < 0) { return(offset); } if (itemend > results_end) { return(offset); } var itemdata = document.Substring(itemstart, itemend - itemstart); //<tr> //<td class="vertTh"><a href="/browse/205" title="More from this category">Video > TV shows</a></td> //<td><a href="/torrent/4727946/Heroes.S03E16.HDTV.XviD-XOR.avi" class="detLink" title="Details for Heroes.S03E16.HDTV.XviD-XOR.avi">Heroes.S03E16.HDTV.XviD-XOR.avi</a></td> //<td>Today 04:55</td> //<td><a href="http://torrents.thepiratebay.org/4727946/Heroes.S03E16.HDTV.XviD-XOR.avi.4727946.TPB.torrent" title="Download this torrent"><img src="http://static.thepiratebay.org/img/dl.gif" class="dl" alt="Download" /></a><img src="http://static.thepiratebay.org/img/icon_comment.gif" alt="This torrent has 22 comments." title="This torrent has 22 comments." /><img src="http://static.thepiratebay.org/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" /></td> //<td align="right">348.97 MiB</td> //<td align="right">47773</td> //<td align="right">60267</td> //Console.WriteLine("<h1>Most Popular video</h1>"); //Console.WriteLine("<table>"); // type, name, uploaded, links, size, se, le var Fields = new BasicPirateBaySearch.Entry(); Action <string> SetField = null; SetField = Type => SetField = Name => SetField = Time => SetField = Links => SetField = Size => SetField = Seeders => SetField = Leechers => { Fields = new BasicPirateBaySearch.Entry { Type = Type, Name = Name, Time = Time, Links = Links, Size = Size, Seeders = Seeders, Leechers = Leechers }; SetField = delegate { }; }; var ep = new BasicElementParser(); ep.AddContent += (value, index) => { //Console.WriteLine("AddContent start #" + index); SetField(value); //Console.WriteLine("AddContent stop #" + index); }; ep.Parse(itemdata, "td"); entryindex++; if (AddEntry != null) { AddEntry(Fields, entryindex); } return(itemend + 5); }; #endregion ScanSingleResultOrReturn.ToChainedFunc((x, y) => y > x)(headend); }; if (this.Loaded != null) { this.Loaded(ForEachEntry); } }; }
public BasicIMDBPosterSearch() { this.Crawler = new Library.BasicWebCrawler("www.imdb.com", 80) { CoralEnabled = true }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return(new { Source, Alt, Title }); } ); string location = null; this.Crawler.AllHeadersSent += () => { location = null; }; this.Crawler.LocationReceived += value => { location = value; }; this.Crawler.DataReceived += document => { if (!string.IsNullOrEmpty(location)) { var u = new Uri(location); this.Crawler.Crawl(u.PathAndQuery); return; } var poster_tag = "<table id=\"principal\">"; var poster_i = document.IndexOf(poster_tag); var poster_close_tag = "</table>"; var poster_close_i = document.IndexOf(poster_close_tag, poster_i); var poster = ParseImage( BasicElementParser.GetContent( document.Substring(poster_i, poster_close_i + poster_close_tag.Length - poster_i) , "td") ); if (this.AddEntry != null) { this.AddEntry(poster.Source); } }; }
public static void Search(string stitle, Action <AliasEntry> handler) { var c = new BasicWebCrawler("www.omdb.si", 80); var DefaultLink = new { Link = "", Title = "", Text = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return(new { Link, Title, Text }); } ); c.DataReceived += document => { var trigger_tag = "<table width=\"100%\" class=\"fW\">"; var trigger_end_tag = "</table>"; Func <int, int> scan = offset => { var trigger_i = document.IndexOf(trigger_tag, offset); if (trigger_i < 0) { return(offset); } var tirgger_end_i = document.IndexOf(trigger_end_tag, trigger_i); if (tirgger_end_i < 0) { return(offset); } var data = document.Substring(trigger_i + trigger_tag.Length, tirgger_end_i - trigger_i - trigger_tag.Length); /* * <tr> * <td class="bTl"><img alt="" src="/images/default/Ogrodje0.gif" width="10" height="10" /></td> * <td class="bT"></td><td class="bTr"></td></tr> * <tr> * <td class="bL"></td> * <td class="bM"> * <div align="left"> * <table width="100%" border="0" cellspacing="0" cellpadding="0"> * <tr> * <td width="5" rowspan="2"></td> * <td width="444" align="left"> * <div align="left"><a href="/index.php/ofilm/?i=401737">Lost <b>(2004)</b></a></div> * </td> * <td align="right"> * <span class="forumozadje3"><b>8.9</b> * <img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/d9.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellowEmpty.gif' class='slikevvrsti' /></span> * </td></tr> * <tr> * <td align="left"> * <span class="oddelki_forum_mala">Genre: <b>Drama, Adventure, Mystery, Thriller</b> Duration: <b>45 min</b></span> * </td> * <td align="right"><span class="oddelki_forum_mala">(174 votes)</span></td></tr> */ var title_tag = "<div align=\"left\">"; var title_end_tag = "</div>"; var title_i = data.IndexOf(title_tag); if (title_i < 0) { return(offset); } title_i = data.IndexOf(title_tag, title_i + title_tag.Length); var title_end_i = data.IndexOf(title_end_tag, title_i); // Lost <b>(2004)</b> var title = ParseLink(data.Substring(title_i + title_tag.Length, title_end_i - title_i - title_tag.Length)); var genre_tag = "<span class=\"oddelki_forum_mala\">"; var genre_i = data.IndexOf(genre_tag, title_end_i); var genre_end_tag = "</span>"; var genre_end_i = data.IndexOf(genre_end_tag, genre_i); // Genre: <b>Drama, Adventure, Mystery, Thriller</b> Duration: <b>45 min</b> var genre = data.Substring(genre_i + genre_tag.Length, genre_end_i - genre_i - genre_tag.Length); var e = new AliasEntry { Genres = genre.Substring("Genre: <b>", "</b>").Split(new[] { ',' }).Trim(), Duration = genre.Substring("Duration: <b>", "</b>"), Link = "http://www.omdb.si" + title.Link, Title = title.Text.Substring(0, title.Text.IndexOf("<")), Year = title.Text.Substring("<b>", "</b>") }; handler(e); return(tirgger_end_i + trigger_end_tag.Length); }; var start_tag = "<td align=\"right\" class=\"bM\">"; var start_offset = document.IndexOf(start_tag); start_offset = document.IndexOf(start_tag, start_offset); scan.ToChainedFunc((x, y) => y > x)(start_offset); }; // we will only look at the first result page c.Crawl("/index.php/odefault/search?sK=" + stitle.URLEncode()); }
public static void Search(string title, Action <AliasEntry> handler) { var t = new Uri("http://www.movieposterdb.com/browse/search?search_type=movies&title="); var c = new BasicWebCrawler(t.Host, 80); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultSpan = new { Text = "", Title = "" }; var ParseSpan = DefaultSpan.ToAnonymousConstructor( (string element) => { var Text = ""; var Title = ""; element. ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("span"); return(new { Text, Title }); } ); var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse("a"); return(new { Link, Title, Text }); } ); c.DataReceived += document => { var trigger = "Movies</h3>"; var trigger_i = document.IndexOf(trigger); var data = BasicElementParser.GetContent(document.Substring(trigger_i), "table"); BasicElementParser.Parse(data, "tr", (element, index) => { /* * <td valign="middle" style="font-size: 0pt; border-bottom: 1px solid #D2D2D2; height: 54px; width: 44px;"> * <img src="http://www.movieposterdb.com/posters/08_09/2008/1179855/m_1179855_4fb9999f.jpg" style="margin-right: 8px; padding: 2px; border: 1px solid #D2D2D2; float: left;" /> * </td> * <td valign="middle" style="border-bottom: 1px solid #D2D2D2; width: 60%;"> * <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b> * </td> * <td style="border-bottom: 1px solid #D2D2D2; font-size: 8pt; color: #808080;"> * * </td> */ BasicElementParser.Parse(element, "td", (tdelement, tdindex) => { if (tdindex == 1) { // <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b> var _title = ParseLink(tdelement); var _year = ParseSpan(tdelement); handler( new AliasEntry { Link = _title.Link, Title = _title.Text, Year = _year.Text } ); } } ); } ); }; c.Crawl(t.PathAndQuery + title.URLEncode()); }
public BasicGoogleVideoCrawler() { var c = new BasicWebCrawler("video.google.com", 80); this.Crawler = c; //<div class="embed_html" style="display: none"> // <object id="object_player_1" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0" width="100%" height="100%"><param name="movie" value="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=object_player_1"/><param name="allowFullScreen" value="true"/><param name="allowScriptAccess" value="always"/> // <embed // id="embed_player_1" // width="100%" // height="100%" // bgcolor="#000000" // type="application/x-shockwave-flash" // pluginspage="http://www.macromedia.com/go/getflashplayer" // allowScriptAccess="always" // allowFullScreen="true" // src="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=embed_player_1"/> // </object> //</div> // <embed id="embed_player_1" width="100%" height="100%" bgcolor="#000000" type="application/x-shockwave-flash" pluginspage="http://www.macromedia.com/go/getflashplayer" allowScriptAccess="always" allowFullScreen="true" src="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=embed_player_1"/> var ParseEmbed = new { id = "", width = "", height = "", bgcolor = "", type = "", pluginspage = "", allowScriptAccess = "", allowFullScreen = "", src = "", }.ToAnonymousConstructor( (string element) => { string id = "", width = "", height = "", bgcolor = "", type = "", pluginspage = "", allowScriptAccess = "", allowFullScreen = "", src = ""; element. ParseAttribute("id", value => id = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseAttribute("bgcolor", value => bgcolor = value). ParseAttribute("type", value => type = value). ParseAttribute("pluginspage", value => pluginspage = value). ParseAttribute("allowScriptAccess", value => allowScriptAccess = value). ParseAttribute("allowFullScreen", value => allowFullScreen = value). ParseAttribute("src", value => src = value). ParseContent(null). Parse(); return(new { id, width, height, bgcolor, type, pluginspage, allowScriptAccess, allowFullScreen, src }); } ); c.DataReceived += document => { var embed_start = document.IndexOf("<embed"); if (embed_start < 0) { return; } var embed_end = document.IndexOf("/>", embed_start); var embed_content = document. Substring(embed_start, embed_end - embed_start + 5). Replace(""", "\""). Replace("&", "&"). Replace("<", "<"). Replace(">", ">"); var embed = ParseEmbed(embed_content); if (string.IsNullOrEmpty(embed.src)) { return; } var video_start = embed.src.IndexOf("v/"); var video_end = embed.src.IndexOf("&", video_start); var video = embed.src.Substring(video_start + 2, video_end - video_start - 2); if (this.VideoSourceFound != null) { this.VideoSourceFound(video, embed.src); } }; }
public BasicPirateBayImage() { this.CrawlerDownloader = new Library.BasicWebCrawler("", 80) { //CoralEnabled = true }; this.CrawlerUploader = new Library.BasicWebCrawler("bayimg.com", 80) { Method = "POST" }; #region parser var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return(new { Link, Title, Text }); } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; var width = ""; var height = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseContent(null). Parse(); return(new { Source, Alt, Title, width, height }); } ); #endregion // http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html var boundary = "---------------------------" + int.MaxValue.Random(); var current_filename = "_" + int.MaxValue.Random(); #region StreamWriter Action <StreamWriter, Stream, int, string> StreamWriter = (stream, source, sourcelength, filename) => { stream.AutoFlush = true; stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"file\"; filename=\"" + filename + "\""); stream.WriteLine("Content-Type: application/octet-stream"); stream.WriteLine(); if (source == null) { stream.BaseStream.Position += sourcelength; } else { var buffer = new byte[0x1000]; var offset = 0; var size = source.Read(buffer, 0, buffer.Length); while (size > 0) { //Console.WriteLine(new { offset, size }); stream.BaseStream.Write(buffer, 0, size); offset += size; size = source.Read(buffer, 0, buffer.Length); } } stream.WriteLine(); stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"code\""); stream.WriteLine(); stream.WriteLine("tpb"); stream.WriteLine("--" + boundary); stream.WriteLine("Content-Disposition: form-data; name=\"tags\""); stream.WriteLine(); stream.WriteLine(""); stream.WriteLine("--" + boundary + "--"); }; #endregion this.CrawlerDownloader.ContentLengthReceived += ContentLength => { current_filename = "_" + int.MaxValue.Random(); var value = int.Parse(ContentLength); this.CrawlerUploader.HeaderWriter += stream => { stream.WriteLine("Content-Type: multipart/form-data; boundary=" + boundary); using (var v = new StreamWriter(new VoidStream())) { StreamWriter(v, null, value, current_filename); stream.WriteLine("Content-Length: " + v.BaseStream.Position ); } }; }; this.CrawlerDownloader.StreamReader += source => { this.CrawlerUploader.StreamWriter += stream => StreamWriter(stream, source, 0, current_filename); this.CrawlerUploader.Crawl("/upload"); }; this.CrawlerUploader.DataReceived += document => { var result_tag = "<div id=\"extra2\">"; var result_i = document.IndexOf(result_tag); var result_end_tag = "<br/>"; var result_end_i = document.IndexOf(result_end_tag, result_i); var data = document.Substring(result_i + result_tag.Length, result_end_i - (result_i + result_tag.Length)).Trim(); // http://bayimg.com/image/eaofgaabg.jpg var Link = ParseLink(data); var ThumbnailImage = ParseImage(Link.Text); if (this.AddEntry != null) { this.AddEntry(new Entry(Link.Link.Substring(1).ToLower())); } //new IHTMLImage { Source = ImageLink, Title = imdb.SmartTitle }.ToString().ToConsole(); //Console.WriteLine(ImageHTML); //Console.WriteLine(ThumbnailImageHTML); }; }
public BasicIMDBCrawler() { this.Crawler = new Library.BasicWebCrawler("www.imdb.com", 80) { //CoralEnabled = true }; var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return(new { Link, Title, Text }); } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return(new { Source, Alt, Title }); } ); this.Crawler.DataReceived += document => { var entry = new Entry(); var title = BasicElementParser.GetContent(document, "title"); var title_i = title.IndexOf("("); entry.Title = title.Substring(0, title_i).Trim(); // remove qoutes from the title entry.Title = entry.Title.Replace(""", ""); entry.Year = title.Substring(title_i + 1, title.IndexOf(")", title_i + 1) - (title_i + 1)); var poster_i = document.IndexOf("name=\"poster\""); // no poster - the poster may be found on other services if (poster_i < 0) { } else { var poster_j = document.Substring(0, poster_i).LastIndexOf("<a"); var poster_q = document.IndexOf("</a>", poster_i); var poster = ParseLink(document.Substring(poster_j, poster_q - poster_j + 4)); var poster_image = ParseImage(poster.Text); entry.MediumPosterImageProvider = "imdb"; entry.MediumPosterImage = poster_image.Source; entry.MediumPosterImagePage = poster.Link; entry.MediumPosterTitle = poster.Title; } #region UserRating var meta_tag = "<div class=\"meta\">"; var meta_i = document.IndexOf(meta_tag); if (meta_i < 0) { entry.UserRating = ""; } else { var meta = document.Substring(meta_i + meta_tag.Length, document.IndexOf("</div>", meta_i) - meta_i - meta_tag.Length); entry.UserRating = BasicElementParser.GetContent(meta, "b"); } #endregion #region Genres var genre_tag = "<h5>Genre:</h5>"; var genre_i = document.IndexOf(genre_tag); var genres = new List <string>(); if (genre_i < 0) { } else { var genre = document.Substring(genre_i + genre_tag.Length, document.IndexOf("</div>", genre_i) - genre_i - genre_tag.Length); BasicElementParser.Parse(genre, "a", (text, index) => { if (text == "more") { return; } genres.Add(text); } ); } entry.Genres = genres.ToArray(); #endregion #region Runtime var runtime_tag = "<h5>Runtime:</h5>"; if (genre_i < 0) { genre_i = 0; } var runtime_i = document.IndexOf(runtime_tag, genre_i); if (runtime_i < 0) { entry.Runtime = ""; } else { var runtime = document.Substring(runtime_i + runtime_tag.Length, document.IndexOf("</div>", runtime_i) - runtime_i - runtime_tag.Length); entry.Runtime = runtime.Trim(); } #endregion #region Tagline var Tagline_tag = "<h5>Tagline:</h5>"; var Tagline_i = document.IndexOf(Tagline_tag, genre_i); if (Tagline_i < 0) { entry.Tagline = ""; } else { var Tagline = document.Substring(Tagline_i + Tagline_tag.Length, document.IndexOf("<", Tagline_i + Tagline_tag.Length) - Tagline_i - Tagline_tag.Length); entry.Tagline = Tagline.Trim(); } #endregion if (AddEntry != null) { AddEntry(entry); } }; }
public BasicTinyURLCrawler() { this.Crawler = new Library.BasicWebCrawler("tinyurl.com", 80) { //CoralEnabled = true }; this.Crawler.DataReceived += document => { var entry = new Entry(); if (APIMode) { entry.Alias = document; } else { var trigger = "<h1>TinyURL was created!</h1>"; var trigger_i = document.IndexOf(trigger); if (trigger_i < 0) { return; } // we are still in the business... //<h1>TinyURL was created!</h1> //<p>The following URL: //<blockquote><b>http://thepiratebay.org<br /> //</b></blockquote> //has a length of 23 characters and resulted in the following TinyURL which has a length of 24 characters: //<blockquote><b>http://tinyurl.com/5umsn</b><br><small>[<a href="http://tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small></blockquote> //Or, give your recipients confidence with a preview TinyURL: //<blockquote><b>http://preview.tinyurl.com/5umsn</b><br><small>[<a href="http://preview.tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small> //</blockquote> //</p> var start_tag = "<p>"; var start_i = document.IndexOf(start_tag, trigger_i); if (start_i < 0) { return; } var end_tag = "</p>"; var end_i = document.IndexOf(end_tag, start_i); var data = document.Substring(start_i + start_tag.Length, end_i - start_i + start_tag.Length); BasicElementParser.Parse(data, "blockquote", (value, index) => { if (index == 0) { entry.URL = BasicElementParser.GetContent(value, "b"); var br_tag = "<br />"; var br_i = entry.URL.IndexOf(br_tag); if (br_i >= 0) { entry.URL = entry.URL.Substring(0, br_i); } return; } if (index == 1) { entry.Alias = BasicElementParser.GetContent(value, "b"); return; } } ); } if (this.AddEntry != null) { this.AddEntry(entry); } }; }