public BasicPirateBaySearch(BasicWebCrawler Crawler) { this.Crawler = Crawler; this.Crawler.DataReceived += document => { var results = document.IndexOf("<table id=\"searchResult\">"); var headend = document.IndexOf("</thead>", results); var results_end = document.IndexOf("</table>", headend); int entryindex = -1; Action<Action<Entry, int>> ForEachEntry = AddEntry => { #region ScanSingleResultOrReturn Func<int, int> ScanSingleResultOrReturn = offset => { var itemstart = document.IndexOf("<tr>", offset); if (itemstart < 0) return offset; if (itemstart > results_end) return offset; var itemend = document.IndexOf("</tr>", itemstart); if (itemend < 0) return offset; if (itemend > results_end) return offset; var itemdata = document.Substring(itemstart, itemend - itemstart); //<tr> //<td class="vertTh"><a href="/browse/205" title="More from this category">Video > TV shows</a></td> //<td><a href="/torrent/4727946/Heroes.S03E16.HDTV.XviD-XOR.avi" class="detLink" title="Details for Heroes.S03E16.HDTV.XviD-XOR.avi">Heroes.S03E16.HDTV.XviD-XOR.avi</a></td> //<td>Today 04:55</td> //<td><a href="http://torrents.thepiratebay.org/4727946/Heroes.S03E16.HDTV.XviD-XOR.avi.4727946.TPB.torrent" title="Download this torrent"><img src="http://static.thepiratebay.org/img/dl.gif" class="dl" alt="Download" /></a><img src="http://static.thepiratebay.org/img/icon_comment.gif" alt="This torrent has 22 comments." title="This torrent has 22 comments." /><img src="http://static.thepiratebay.org/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" /></td> //<td align="right">348.97 MiB</td> //<td align="right">47773</td> //<td align="right">60267</td> //Console.WriteLine("<h1>Most Popular video</h1>"); //Console.WriteLine("<table>"); // type, name, uploaded, links, size, se, le var Fields = new BasicPirateBaySearch.Entry(); Action<string> SetField = null; SetField = Type => SetField = Name => SetField = Time => SetField = Links => SetField = Size => SetField = Seeders => SetField = Leechers => { Fields = new BasicPirateBaySearch.Entry { Type = Type, Name = Name, Time = Time, Links = Links, Size = Size, Seeders = Seeders, Leechers = Leechers }; SetField = delegate { }; }; var ep = new BasicElementParser(); ep.AddContent += (value, index) => { //Console.WriteLine("AddContent start #" + index); SetField(value); //Console.WriteLine("AddContent stop #" + index); }; ep.Parse(itemdata, "td"); entryindex++; if (AddEntry != null) AddEntry(Fields, entryindex); return itemend + 5; }; #endregion ScanSingleResultOrReturn.ToChainedFunc((x, y) => y > x)(headend); }; if (this.Loaded != null) this.Loaded(ForEachEntry); }; }
private static void ShowPirateBay() { Console.WriteLine("<style>"); Console.WriteLine("img { border: 0; }"); //Console.WriteLine("ol { -moz-column-count: 2; }"); Console.WriteLine(@" embed { width: 100%; height: 100%; position: absolute; left: 0; top: 0; z-index: 0; } ol { display: block; width: 100%; height: 100%; position: absolute; left: 0; top: 0; z-index: 1; overflow: scroll; } li { color: white; cursor:hand; } body{ text-align: center; font-family:Verdana, Arial, Helvetica, sans-serif; font-size:.7em; margin: 10px; color: #fff; background: #fff; min-width: 520px; overflow: hidden; } a{ color: #009; text-decoration: none; border-bottom: 1px dotted #4040D9; } a:hover{ text-decoration: none; border-bottom: 1px solid #009; } li { text-align: left; margin: 1em;} "); Console.WriteLine("</style>"); var DefaultLink = new { Link = "", Title = "", Text = "" }; var DefaultImage = new { Source = "", Alt = "", Title = "" }; var ParseLink = DefaultLink.ToAnonymousConstructor( (string element) => { var Link = ""; var Title = ""; var Text = ""; element. ParseAttribute("href", value => Link = value). ParseAttribute("title", value => Title = value). ParseContent(value => Text = value). Parse(); return new { Link, Title, Text }; } ); var ParseImage = DefaultImage.ToAnonymousConstructor( (string element) => { var Source = ""; var Alt = ""; var Title = ""; element. ParseAttribute("src", value => Source = value). ParseAttribute("alt", value => Alt = value). ParseAttribute("title", value => Title = value). ParseContent(null). Parse(); return new { Source, Alt, Title }; } ); var crawler = new BasicWebCrawler("thepiratebay.org", 80); var search = new BasicPirateBaySearch(crawler); crawler.AllHeadersReceived += delegate { Native.API.set_time_limit(10); }; search.Loaded += ForEachEntry => { //Console.WriteLine("<hr />"); //var logo = "http://static.thepiratebay.org/img/tpblogo_sm_ny.gif"; //Console.WriteLine(logo.ToImage().ToLink("http://tineye.com/search?url=" + logo)); // http://code.google.com/apis/youtube/chromeless_example_1.html Console.WriteLine("<embed wmode='transparent' id='tv' src='http://www.youtube.com/apiplayer?enablejsapi=1&playerapiid=tv' allowScriptAccess='always' width='400' height='300' />"); //Console.WriteLine("<h2>Top Movies</h2>"); Console.WriteLine("<ol>"); ForEachEntry( (entry, entryindex) => { //if (entryindex > 2) // return; var Type = ParseLink(entry.Type); var Name = ParseLink(entry.Name); var SmartName = new BasicFileNameParser(Name.Text); var c = new BasicGoogleVideoCrawler(); var Video = ""; var VideoSource = ""; c.VideoSourceFound += (video, src) => { Video = video; VideoSource = src; }; Native.API.set_time_limit(6); //Thread.Sleep(1500); c.Search(SmartName.Title + " trailer"); Console.WriteLine("<li onmouseover='if (getElementById(\"tv\").getPlayerState() != 1) getElementById(\"tv\").loadVideoById(\"" + Video + "\")'>"); Console.WriteLine("<b>" + SmartName.Title.ToLink(k => "http://www.imdb.com/find?s=tt;site=aka;q=" + k) + "</b>"); if (!string.IsNullOrEmpty(SmartName.Season)) { Console.WriteLine(" | Season <i>" + SmartName.Season + "</i>"); } if (!string.IsNullOrEmpty(SmartName.Episode)) { Console.WriteLine(" | Episode <i>" + SmartName.Episode + "</i>"); } if (!string.IsNullOrEmpty(SmartName.SubTitle)) { Console.WriteLine(" | <b>" + SmartName.SubTitle + "</b>"); } if (!string.IsNullOrEmpty(SmartName.Year)) { Console.WriteLine(" | <i>" + SmartName.Year + "</i>"); } Console.WriteLine(" | "); Console.WriteLine("<b>"); Console.WriteLine("trailer".ToLink(VideoSource, Video)); Console.WriteLine("</b>"); Console.WriteLine("<br />"); Console.WriteLine("<small>"); Console.WriteLine(SmartName.ColoredText.ToString().ToLink("http://thepiratebay.org" + Name.Link) + "<br />"); Console.WriteLine(Type.Text.ToLink("http://thepiratebay.org" + Type.Link)); entry.Links.ParseElements( (tag, index, element) => { if (tag == "a") { var a = ParseLink(element); Console.WriteLine(" | " + "torrent".ToLink(a.Link)); } if (tag == "img") { var img = ParseImage(element); if (img.Title.Contains("comment")) { Console.WriteLine(" | " + img.Title.ToLink("http://thepiratebay.org" + Name.Link)); } else { Console.WriteLine(" | " + img.Title); } } } ); Console.WriteLine(" | " + entry.Size); Console.WriteLine(" | " + entry.Seeders); Console.WriteLine(" | " + entry.Leechers + "<br />"); Console.WriteLine("</small>"); //Console.WriteLine("</div>"); Console.WriteLine("</li>"); } ); Console.WriteLine("</ol>"); }; crawler.Crawl("/top/200"); }
private static void ShowExampleDotCom() { var crawler = new BasicWebCrawler("example.com", 80); var headers = 0; crawler.HeaderReceived += delegate { headers++; }; crawler.DataReceived += document => { document = document.Replace( "reached this web page", "<b>received " + headers + " HTTP header(s)</b> and you have reached this web page" ); document = document.Replace( "are reserved for use in documentation and", "are reserved for use in documentation <b>including examples</b> and" ); Console.Write(document); }; crawler.Crawl("/"); }
public BasicGoogleVideoCrawler() { var c = new BasicWebCrawler("video.google.com", 80); this.Crawler = c; //<div class="embed_html" style="display: none"> // <object id="object_player_1" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0" width="100%" height="100%"><param name="movie" value="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=object_player_1"/><param name="allowFullScreen" value="true"/><param name="allowScriptAccess" value="always"/> // <embed // id="embed_player_1" // width="100%" // height="100%" // bgcolor="#000000" // type="application/x-shockwave-flash" // pluginspage="http://www.macromedia.com/go/getflashplayer" // allowScriptAccess="always" // allowFullScreen="true" // src="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=embed_player_1"/> // </object> //</div> // <embed id="embed_player_1" width="100%" height="100%" bgcolor="#000000" type="application/x-shockwave-flash" pluginspage="http://www.macromedia.com/go/getflashplayer" allowScriptAccess="always" allowFullScreen="true" src="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=embed_player_1"/> var ParseEmbed = new { id = "", width = "", height = "", bgcolor = "", type = "", pluginspage = "", allowScriptAccess = "", allowFullScreen = "", src = "", }.ToAnonymousConstructor( (string element) => { string id = "", width = "", height = "", bgcolor = "", type = "", pluginspage = "", allowScriptAccess = "", allowFullScreen = "", src = ""; element. ParseAttribute("id", value => id = value). ParseAttribute("width", value => width = value). ParseAttribute("height", value => height = value). ParseAttribute("bgcolor", value => bgcolor = value). ParseAttribute("type", value => type = value). ParseAttribute("pluginspage", value => pluginspage = value). ParseAttribute("allowScriptAccess", value => allowScriptAccess = value). ParseAttribute("allowFullScreen", value => allowFullScreen = value). ParseAttribute("src", value => src = value). ParseContent(null). Parse(); return new { id, width, height, bgcolor, type, pluginspage, allowScriptAccess, allowFullScreen, src }; } ); c.DataReceived += document => { var embed_start = document.IndexOf("<embed"); var embed_end = document.IndexOf("/>", embed_start); var embed_content = document. Substring(embed_start, embed_end - embed_start + 5). Replace(""", "\""). Replace("&", "&"). Replace("<", "<"). Replace(">", ">"); var embed = ParseEmbed(embed_content); var video_start = embed.src.IndexOf("v/"); var video_end = embed.src.IndexOf("&", video_start); var video = embed.src.Substring(video_start + 2, video_end - video_start - 2); if (this.VideoSourceFound != null) this.VideoSourceFound(video, embed.src); }; }