示例#1
0
            public void GetPoster(Action <string> handler)
            {
                var uri = Link.ToUri();

                var c = new BasicWebCrawler(uri.Host, 80);

                c.DataReceived +=
                    document =>
                {
                    var prefix = "http://www.movieposterdb.com/posters/";

                    var trigger = "<img src=\"" + prefix;

                    var trigger_i = document.IndexOf(trigger);

                    if (trigger_i < 0)
                    {
                        return;
                    }

                    var end_i = document.IndexOf("\"", trigger_i + trigger.Length);

                    var data = prefix + document.Substring(trigger_i + trigger.Length, end_i - (trigger_i + trigger.Length));

                    handler(data);
                };

                c.Crawl(uri.PathAndQuery);
            }
示例#2
0
        public BasicTinEyeSearch()
        {
            this.Crawler =
                new Library.BasicWebCrawler("tineye.com", 80)
            {
                //CoralEnabled = true,
                Method = "HEAD"
            };

            this.Crawler.LocationReceived +=
                value =>
            {
                var tag = "/search/";
                var i   = value.LastIndexOf(tag);

                if (i < 0)
                {
                    return;
                }

                var hash = value.Substring(i + tag.Length);

                var n = new Entry
                {
                    Hash = hash,
                };

                if (AddEntry != null)
                {
                    AddEntry(n);
                }
            };
        }
示例#3
0
        public static void Spawn(string url, Action <string> handler)
        {
            var Hosts = new[]
            {
                "xn--hgi.ws",
                "xn--ogi.ws",
                "xn--vgi.ws",
                "xn--3fi.ws",
                "xn--egi.ws",
                "xn--9gi.ws",
                "xn--5gi.ws",
                "xn--1ci.ws",
                "xn--odi.ws",
                "xn--rei.ws",
                "xn--cwg.ws",
                "ta.gd",
            };


            var Host = Hosts[url.XorBytes() % Hosts.Length];

            var c = new BasicWebCrawler("tinyarro.ws", 80);

            c.Buffer = new byte[100];

            c.BinaryDataReceived +=
                data =>
            {
                var Target = "http://";

                int i = Target.Length;

                for (; i < data.Length; i++)
                {
                    if (data[i] == '/')
                    {
                        i++;
                        break;
                    }
                }

                Target += Host + "/";

                for (; i < data.Length; i++)
                {
                    Target += "%" + data[i].ToHexString();
                }

                handler(Target);
            };



            c.Crawl("/api-create.php?host=" + Host + "&url=" + url);
        }
示例#4
0
        public static byte[] ToBytes(Uri uri)
        {
            var x = default(byte[]);

            BasicTinEyeSearch.Search(uri.ToString(),
                                     e =>
            {
                x = BasicWebCrawler.ToBytes(new Uri(e.QueryLink));
            }
                                     );
            return(x);
        }
示例#5
0
        static void Main(string[] args)
        {
            var hosts = new[] { "zmovies.tk" /*, "zmoviez.tk" */ };

            while (true)
            {
                foreach (var h in hosts)
                {
                    var c = new BasicWebCrawler(h, 80);

                    c.DataReceived +=
                        document =>
                    {
                        var trigger = "<frame src=\"";
                        var i       = document.IndexOf(trigger);
                        var j       = document.IndexOf("\"", i + trigger.Length);

                        var data = document.Substring(i + trigger.Length, j - i - trigger.Length);

                        var gmoduleprefix = "http://www.gmodules.com/ig/ifr?url=";

                        if (data.StartsWith(gmoduleprefix))
                        {
                            var module = data.Substring(gmoduleprefix.Length);

                            if (module.StartsWith("http://zproxy.planet.ee"))
                            {
                                Console.ForegroundColor = ConsoleColor.Yellow;
                            }
                            else
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                            }

                            Console.WriteLine(DateTime.Now.ToString() + " " + h + " : " + module);
                        }
                        else
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine(DateTime.Now.ToString() + " " + h + " : " + data);
                        }
                    };

                    c.Crawl("/");
                }

                Thread.Sleep(15000);
            }
        }
示例#6
0
        // http://msdn.microsoft.com/en-us/library/aa373347(VS.85).aspx



        static void Main(string[] args)
        {
            var u = new Uri("http://zproxy.planet.ee/zmovies/server/tasks/Scheduler/Counter");

            var x = 0;

            var skip = 5;

            while (true)
            {
                var c = new BasicWebCrawler(u.Host, 80);

                c.DataReceived +=
                    document =>
                {
                    var n = int.Parse(document);

                    if (x > 0)
                    {
                        if (x < n)
                        {
                            if (x < (n - skip))
                            {
                                Console.ForegroundColor = ConsoleColor.Yellow;
                            }
                            else
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                            }
                        }
                        else
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                        }

                        Console.WriteLine(n);
                    }

                    x = n;
                };

                c.Crawl(u.PathAndQuery);

                Thread.Sleep(5500 * skip);
            }
        }
示例#7
0
        public static string ToWebString(this string e)
        {
            var value = "";

            var u = new Uri(e);
            var c = new BasicWebCrawler(u.Host, 80);

            c.DataReceived +=
                document =>
            {
                value = document;
            };

            c.Crawl(u.PathAndQuery);

            return(value);
        }
示例#8
0
        public static void Search(string url, Action <string> handler)
        {
            var c = new BasicWebCrawler("kej.tw", 80);

            c.DataReceived +=
                document =>
            {
                var trigger_tag = "<textarea id=\"outputfield\">";
                var trigger_i   = document.IndexOf(trigger_tag);

                var trigger_end_tag = "</textarea>";
                var trigger_end_i   = document.IndexOf(trigger_end_tag, trigger_i + trigger_tag.Length);

                var data = document.Substring(trigger_i + trigger_tag.Length, trigger_end_i - trigger_i - trigger_tag.Length);

                handler(data);
            };

            c.Crawl("/flvretriever/?videoUrl=" + url);
        }
示例#9
0
        public BasicIMDBAliasSearch()
        {
            var c = new BasicWebCrawler(Host, 80)
            {
                // doesnt seem to respond at 2009.03.18
                //CoralEnabled = true
            };

            this.Crawler = c;

            var DefaultLink  = new { Link = "", Title = "", Text = "" };
            var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" };

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse();

                return(new { Link, Title, Text });
            }
                );

            var ParseImage = DefaultImage.ToAnonymousConstructor(
                (string element) =>
            {
                var Source = "";
                var Alt    = "";
                var Title  = "";
                var width  = "";
                var height = "";

                element.
                ParseAttribute("src", value => Source    = value).
                ParseAttribute("alt", value => Alt       = value).
                ParseAttribute("title", value => Title   = value).
                ParseAttribute("width", value => width   = value).
                ParseAttribute("height", value => height = value).
                ParseContent(null).
                Parse();

                return(new { Source, Alt, Title, width, height });
            }
                );

            var EntryIndex = -1;

            #region AddItem
            Action <string, string> AddItem =
                (ImageElement, Content) =>
            {
                var ImageSource = "";

                if (ImageElement.StartsWith("<a"))
                {
                    var ImageLink = ParseLink(ImageElement);
                    var Image     = ParseImage(ImageLink.Text);

                    ImageSource = Image.Source;
                }

                /*
                 * <img src="/images/b.gif" width="1" height="6"><br>
                 * <a href="/title/tt0397892/" onclick="(new Image()).src='/rg/find-title-1/title_popular/images/b.gif?link=/title/tt0397892/';">Bolt</a> (2008)    <br>
                 * &#160;aka <em>"Bolt - Pes pro kazd&#253; pr&#237;pad"</em> - Czech Republic<br>
                 * &#160;aka <em>"Bolt - Un perro fuera de serie 3D"</em> - Chile<br>
                 * &#160;aka <em>"Bolt - Superc&#227;o"</em> - Brazil<br>
                 * &#160;aka <em>"Bolt - Un perro fuera de serie"</em> - Argentina, Mexico<br>
                 * &#160;aka <em>"Bolt - Ein Hund f&#252;r alle F&#228;lle"</em> - Germany
                 */

                var ContentLink_start = Content.IndexOf("<a");
                var ContentLink_end   = Content.IndexOf("</a>");
                var ContentLink       = ParseLink(Content.Substring(ContentLink_start, ContentLink_end - ContentLink_start + 4));

                var Details = Content.Substring(ContentLink_end + 4);

                var ReleaseDate = "";
                var Alias       = default(AlsoKnownAs);

                Details.Split("<br>",
                              (text, index) =>
                {
                    if (index == 0)
                    {
                        ReleaseDate = text;
                        return;
                    }

                    Alias = new AlsoKnownAs
                    {
                        Text  = text,
                        Alias = Alias
                    };
                }
                              );

                EntryIndex++;

                if (this.AddEntry != null)
                {
                    this.AddEntry(
                        new Entry("http://" + Host + ContentLink.Link)
                    {
                        OptionalAlias       = Alias,
                        OptionalReleaseDate = ReleaseDate,
                        OptionalTitle       = ContentLink.Text,
                        OptionalImage       = ImageSource
                    },
                        EntryIndex
                        );
                }
            };
            #endregion

            // http://www.imdb.com/find?s=tt;site=aka;q=The%20Dark%20Knight

            //const string Header_Location = "Location: ";

            string Redirect = null;

            // Location: http://www.imdb.com/title/tt1129442/
            c.LocationReceived +=
                href =>
            {
                //Console.WriteLine("LocationReceived.");

                Redirect = href;
            };


            c.DataReceivedWithTimeSpan +=
                (document, elapsed) =>
            {
                //Console.WriteLine("DataReceivedWithTimeSpan.");

                #region redirect
                if (!string.IsNullOrEmpty(Redirect))
                {
                    EntryIndex++;
                    if (this.AddEntry != null)
                    {
                        this.AddEntry(
                            new Entry(Redirect)
                        {
                        },
                            EntryIndex
                            );
                    }

                    return;
                }
                #endregion

                var approx_section  = document.IndexOf("<b>Titles (Approx Matches)</b>");
                var exact_section   = document.IndexOf("<b>Titles (Exact Matches)</b>");
                var popular_section = document.IndexOf("<b>Popular Titles</b>");

                var first_section = popular_section;

                if (first_section < 0)
                {
                    first_section = exact_section;
                }

                if (first_section < 0)
                {
                    first_section = approx_section;
                }


                if (first_section < 0)
                {
                    return;
                }

                var section_start = document.IndexOf("<table>", first_section);
                var section_end   = document.IndexOf("</table>", section_start);
                var section       = document.Substring(section_start, section_end - section_start + 8);

                BasicElementParser.Parse(section, "tr",
                                         (tr, tr_index) =>
                {
                    /*
                     * <td valign="top">
                     * <a href="/title/tt0397892/" onClick="(new Image()).src='/rg/find-tiny-photo-1/title_popular/images/b.gif?link=/title/tt0397892/';"><img src="http://ia.media-imdb.com/images/M/MV5BNDQyNDE5NjQ1N15BMl5BanBnXkFtZTcwMDExMTAwMg@@._V1._SY30_SX23_.jpg" width="23" height="32" border="0"></a>&nbsp;</td>
                     * <td align="right" valign="top"><img src="/images/b.gif" width="1" height="6"><br>1.</td>
                     * <td valign="top"><img src="/images/b.gif" width="1" height="6"><br><a href="/title/tt0397892/" onclick="(new Image()).src='/rg/find-title-1/title_popular/images/b.gif?link=/title/tt0397892/';">Bolt</a> (2008)    <br>&#160;aka <em>"Bolt - Pes pro kazd&#253; pr&#237;pad"</em> - Czech Republic<br>&#160;aka <em>"Bolt - Un perro fuera de serie 3D"</em> - Chile<br>&#160;aka <em>"Bolt - Superc&#227;o"</em> - Brazil<br>&#160;aka <em>"Bolt - Un perro fuera de serie"</em> - Argentina, Mexico<br>&#160;aka <em>"Bolt - Ein Hund f&#252;r alle F&#228;lle"</em> - Germany </td>
                     *
                     */

                    var Image   = "";
                    var Content = "";

                    BasicElementParser.Parse(tr, "td",
                                             (td, td_index) =>
                    {
                        if (td_index == 0)
                        {
                            Image = td;
                        }

                        if (td_index == 2)
                        {
                            Content = td;
                        }
                    }
                                             );

                    AddItem(Image, Content);
                }
                                         );
            };

            //c.Crawl("/find?s=tt;site=aka;q=" + "The Dark Knight".URLEncode());
            //c.Crawl("/find?s=tt;site=aka;q=" + "Bolt".URLEncode());
        }
示例#10
0
        public BasicPirateBaySearch()
        {
            this.Crawler = new BasicWebCrawler("thepiratebay.org", 80);

            this.Crawler.DataReceived +=
                document =>
            {
                var results     = document.IndexOf("<table id=\"searchResult\">");
                var headend     = document.IndexOf("</thead>", results);
                var results_end = document.IndexOf("</table>", headend);

                int entryindex = -1;

                Action <Action <Entry, int> > ForEachEntry =
                    AddEntry =>
                {
                    #region ScanSingleResultOrReturn
                    Func <int, int> ScanSingleResultOrReturn =
                        offset =>
                    {
                        var itemstart = document.IndexOf("<tr>", offset);

                        if (itemstart < 0)
                        {
                            return(offset);
                        }

                        if (itemstart > results_end)
                        {
                            return(offset);
                        }

                        var itemend = document.IndexOf("</tr>", itemstart);

                        if (itemend < 0)
                        {
                            return(offset);
                        }

                        if (itemend > results_end)
                        {
                            return(offset);
                        }

                        var itemdata = document.Substring(itemstart, itemend - itemstart);



                        //<tr>
                        //<td class="vertTh"><a href="/browse/205" title="More from this category">Video &gt; TV shows</a></td>
                        //<td><a href="/torrent/4727946/Heroes.S03E16.HDTV.XviD-XOR.avi" class="detLink" title="Details for Heroes.S03E16.HDTV.XviD-XOR.avi">Heroes.S03E16.HDTV.XviD-XOR.avi</a></td>
                        //<td>Today&nbsp;04:55</td>
                        //<td><a href="http://torrents.thepiratebay.org/4727946/Heroes.S03E16.HDTV.XviD-XOR.avi.4727946.TPB.torrent" title="Download this torrent"><img src="http://static.thepiratebay.org/img/dl.gif" class="dl" alt="Download" /></a><img src="http://static.thepiratebay.org/img/icon_comment.gif" alt="This torrent has 22 comments." title="This torrent has 22 comments." /><img src="http://static.thepiratebay.org/img/vip.gif" alt="VIP" title="VIP" style="width:11px;" /></td>
                        //<td align="right">348.97&nbsp;MiB</td>
                        //<td align="right">47773</td>
                        //<td align="right">60267</td>

                        //Console.WriteLine("<h1>Most Popular video</h1>");
                        //Console.WriteLine("<table>");

                        // type, name, uploaded, links, size, se, le

                        var Fields = new BasicPirateBaySearch.Entry();

                        Action <string> SetField = null;

                        SetField                                                                                                                                                                                                                                                                                                          = Type =>
                                                                                                     SetField                                                                                                                                                                                                                             = Name =>
                                                                                                                                                                       SetField                                                                                                                                                           = Time =>
                                                                                                                                                                                                                              SetField                                                                                                    = Links =>
                                                                                                                                                                                                                                                                          SetField                                                        = Size =>
                                                                                                                                                                                                                                                                                                           SetField                       = Seeders =>
                                                                                                                                                                                                                                                                                                                                 SetField = Leechers =>
                        {
                            Fields = new BasicPirateBaySearch.Entry
                            {
                                Type     = Type,
                                Name     = Name,
                                Time     = Time,
                                Links    = Links,
                                Size     = Size,
                                Seeders  = Seeders,
                                Leechers = Leechers
                            };

                            SetField = delegate { };
                        };


                        var ep = new BasicElementParser();

                        ep.AddContent +=
                            (value, index) =>
                        {
                            //Console.WriteLine("AddContent start #" + index);
                            SetField(value);
                            //Console.WriteLine("AddContent stop #" + index);
                        };

                        ep.Parse(itemdata, "td");

                        entryindex++;

                        if (AddEntry != null)
                        {
                            AddEntry(Fields, entryindex);
                        }



                        return(itemend + 5);
                    };
                    #endregion


                    ScanSingleResultOrReturn.ToChainedFunc((x, y) => y > x)(headend);
                };

                if (this.Loaded != null)
                {
                    this.Loaded(ForEachEntry);
                }
            };
        }
示例#11
0
        public BasicIMDBPosterSearch()
        {
            this.Crawler =
                new Library.BasicWebCrawler("www.imdb.com", 80)
            {
                CoralEnabled = true
            };



            var DefaultImage = new { Source = "", Alt = "", Title = "" };

            var ParseImage = DefaultImage.ToAnonymousConstructor(
                (string element) =>
            {
                var Source = "";
                var Alt    = "";
                var Title  = "";

                element.
                ParseAttribute("src", value => Source  = value).
                ParseAttribute("alt", value => Alt     = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(null).
                Parse();

                return(new { Source, Alt, Title });
            }
                );


            string location = null;

            this.Crawler.AllHeadersSent +=
                () =>
            {
                location = null;
            };

            this.Crawler.LocationReceived +=
                value =>
            {
                location = value;
            };

            this.Crawler.DataReceived +=
                document =>
            {
                if (!string.IsNullOrEmpty(location))
                {
                    var u = new Uri(location);

                    this.Crawler.Crawl(u.PathAndQuery);

                    return;
                }

                var poster_tag       = "<table id=\"principal\">";
                var poster_i         = document.IndexOf(poster_tag);
                var poster_close_tag = "</table>";
                var poster_close_i   = document.IndexOf(poster_close_tag, poster_i);

                var poster = ParseImage(
                    BasicElementParser.GetContent(
                        document.Substring(poster_i, poster_close_i + poster_close_tag.Length - poster_i)
                        , "td")
                    );

                if (this.AddEntry != null)
                {
                    this.AddEntry(poster.Source);
                }
            };
        }
        public static void Search(string stitle, Action <AliasEntry> handler)
        {
            var c = new BasicWebCrawler("www.omdb.si", 80);

            var DefaultLink = new { Link = "", Title = "", Text = "" };

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse();

                return(new { Link, Title, Text });
            }
                );

            c.DataReceived +=
                document =>
            {
                var trigger_tag     = "<table width=\"100%\" class=\"fW\">";
                var trigger_end_tag = "</table>";

                Func <int, int> scan =
                    offset =>
                {
                    var trigger_i = document.IndexOf(trigger_tag, offset);

                    if (trigger_i < 0)
                    {
                        return(offset);
                    }

                    var tirgger_end_i = document.IndexOf(trigger_end_tag, trigger_i);

                    if (tirgger_end_i < 0)
                    {
                        return(offset);
                    }

                    var data = document.Substring(trigger_i + trigger_tag.Length, tirgger_end_i - trigger_i - trigger_tag.Length);

                    /*
                     * <tr>
                     * <td class="bTl"><img alt="" src="/images/default/Ogrodje0.gif" width="10" height="10" /></td>
                     * <td class="bT"></td><td class="bTr"></td></tr>
                     * <tr>
                     * <td class="bL"></td>
                     * <td class="bM">
                     * <div align="left">
                     *  <table width="100%"  border="0" cellspacing="0" cellpadding="0">
                     *          <tr>
                     * <td width="5" rowspan="2"></td>
                     * <td width="444" align="left">
                     * <div align="left"><a href="/index.php/ofilm/?i=401737">Lost <b>(2004)</b></a></div>
                     * </td>
                     * <td align="right">
                     * <span class="forumozadje3"><b>8.9</b>
                     * <img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellow.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/d9.gif' class='slikevvrsti' /><img src='/images/default/zvezdice/yellowEmpty.gif' class='slikevvrsti' /></span>
                     * </td></tr>
                     *          <tr>
                     * <td align="left">
                     * <span class="oddelki_forum_mala">Genre: <b>Drama, Adventure, Mystery, Thriller</b> Duration: <b>45 min</b></span>
                     * </td>
                     * <td align="right"><span class="oddelki_forum_mala">(174 votes)</span></td></tr>
                     */

                    var title_tag     = "<div align=\"left\">";
                    var title_end_tag = "</div>";

                    var title_i = data.IndexOf(title_tag);

                    if (title_i < 0)
                    {
                        return(offset);
                    }

                    title_i = data.IndexOf(title_tag, title_i + title_tag.Length);

                    var title_end_i = data.IndexOf(title_end_tag, title_i);

                    //  Lost <b>(2004)</b>
                    var title = ParseLink(data.Substring(title_i + title_tag.Length, title_end_i - title_i - title_tag.Length));

                    var genre_tag     = "<span class=\"oddelki_forum_mala\">";
                    var genre_i       = data.IndexOf(genre_tag, title_end_i);
                    var genre_end_tag = "</span>";
                    var genre_end_i   = data.IndexOf(genre_end_tag, genre_i);
                    // Genre: <b>Drama, Adventure, Mystery, Thriller</b> Duration: <b>45 min</b>
                    var genre = data.Substring(genre_i + genre_tag.Length, genre_end_i - genre_i - genre_tag.Length);

                    var e = new AliasEntry
                    {
                        Genres   = genre.Substring("Genre: <b>", "</b>").Split(new[] { ',' }).Trim(),
                        Duration = genre.Substring("Duration: <b>", "</b>"),
                        Link     = "http://www.omdb.si" + title.Link,
                        Title    = title.Text.Substring(0, title.Text.IndexOf("<")),
                        Year     = title.Text.Substring("<b>", "</b>")
                    };

                    handler(e);

                    return(tirgger_end_i + trigger_end_tag.Length);
                };

                var start_tag    = "<td align=\"right\" class=\"bM\">";
                var start_offset = document.IndexOf(start_tag);
                start_offset = document.IndexOf(start_tag, start_offset);

                scan.ToChainedFunc((x, y) => y > x)(start_offset);
            };

            // we will only look at the first result page
            c.Crawl("/index.php/odefault/search?sK=" + stitle.URLEncode());
        }
示例#13
0
        public static void Search(string title, Action <AliasEntry> handler)
        {
            var t = new Uri("http://www.movieposterdb.com/browse/search?search_type=movies&title=");
            var c = new BasicWebCrawler(t.Host, 80);

            var DefaultLink = new { Link = "", Title = "", Text = "" };
            var DefaultSpan = new { Text = "", Title = "" };

            var ParseSpan = DefaultSpan.ToAnonymousConstructor(
                (string element) =>
            {
                var Text  = "";
                var Title = "";

                element.
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse("span");

                return(new { Text, Title });
            }
                );

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse("a");

                return(new { Link, Title, Text });
            }
                );

            c.DataReceived +=
                document =>
            {
                var trigger = "Movies</h3>";

                var trigger_i = document.IndexOf(trigger);

                var data = BasicElementParser.GetContent(document.Substring(trigger_i), "table");

                BasicElementParser.Parse(data, "tr",
                                         (element, index) =>
                {
                    /*
                     * <td valign="middle" style="font-size: 0pt; border-bottom: 1px solid #D2D2D2; height: 54px; width: 44px;">
                     *  <img src="http://www.movieposterdb.com/posters/08_09/2008/1179855/m_1179855_4fb9999f.jpg" style="margin-right: 8px; padding: 2px; border: 1px solid #D2D2D2; float: left;" />
                     * </td>
                     * <td valign="middle" style="border-bottom: 1px solid #D2D2D2; width: 60%;">
                     *  <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b>
                     * </td>
                     * <td style="border-bottom: 1px solid #D2D2D2; font-size: 8pt; color: #808080;">
                     *
                     * </td>
                     */

                    BasicElementParser.Parse(element, "td",
                                             (tdelement, tdindex) =>
                    {
                        if (tdindex == 1)
                        {
                            // <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b>
                            var _title = ParseLink(tdelement);
                            var _year  = ParseSpan(tdelement);

                            handler(
                                new AliasEntry
                            {
                                Link  = _title.Link,
                                Title = _title.Text,
                                Year  = _year.Text
                            }
                                );
                        }
                    }
                                             );
                }
                                         );
            };

            c.Crawl(t.PathAndQuery + title.URLEncode());
        }
        public BasicGoogleVideoCrawler()
        {
            var c = new BasicWebCrawler("video.google.com", 80);

            this.Crawler = c;

            //<div class="embed_html" style="display: none">
            // &lt;object id=&quot;object_player_1&quot; classid=&quot;clsid:D27CDB6E-AE6D-11cf-96B8-444553540000&quot; codebase=&quot;http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=9,0,0,0&quot; width=&quot;100%&quot; height=&quot;100%&quot;&gt;&lt;param name=&quot;movie&quot; value=&quot;http://www.youtube.com/v/aDWPsoKQoOs&amp;fs=1&amp;hl=en&amp;enablejsapi=1&amp;playerapiid=object_player_1&quot;/&gt;&lt;param name=&quot;allowFullScreen&quot; value=&quot;true&quot;/&gt;&lt;param name=&quot;allowScriptAccess&quot; value=&quot;always&quot;/&gt;
            // &lt;embed
            //		id=&quot;embed_player_1&quot;
            //		width=&quot;100%&quot;
            //		height=&quot;100%&quot;
            //		bgcolor=&quot;#000000&quot;
            //		type=&quot;application/x-shockwave-flash&quot;
            //		pluginspage=&quot;http://www.macromedia.com/go/getflashplayer&quot;
            //		allowScriptAccess=&quot;always&quot;
            //		allowFullScreen=&quot;true&quot;
            //		src=&quot;http://www.youtube.com/v/aDWPsoKQoOs&amp;fs=1&amp;hl=en&amp;enablejsapi=1&amp;playerapiid=embed_player_1&quot;/&gt;
            // &lt;/object&gt;
            //</div>

            // <embed id="embed_player_1" width="100%" height="100%" bgcolor="#000000" type="application/x-shockwave-flash" pluginspage="http://www.macromedia.com/go/getflashplayer" allowScriptAccess="always" allowFullScreen="true" src="http://www.youtube.com/v/aDWPsoKQoOs&fs=1&hl=en&enablejsapi=1&playerapiid=embed_player_1"/>

            var ParseEmbed = new
            {
                id                = "",
                width             = "",
                height            = "",
                bgcolor           = "",
                type              = "",
                pluginspage       = "",
                allowScriptAccess = "",
                allowFullScreen   = "",
                src               = "",
            }.ToAnonymousConstructor(
                (string element) =>
            {
                string id         = "",
                width             = "",
                height            = "",
                bgcolor           = "",
                type              = "",
                pluginspage       = "",
                allowScriptAccess = "",
                allowFullScreen   = "",
                src = "";

                element.
                ParseAttribute("id", value => id                               = value).
                ParseAttribute("width", value => width                         = value).
                ParseAttribute("height", value => height                       = value).
                ParseAttribute("bgcolor", value => bgcolor                     = value).
                ParseAttribute("type", value => type                           = value).
                ParseAttribute("pluginspage", value => pluginspage             = value).
                ParseAttribute("allowScriptAccess", value => allowScriptAccess = value).
                ParseAttribute("allowFullScreen", value => allowFullScreen     = value).
                ParseAttribute("src", value => src                             = value).
                ParseContent(null).
                Parse();

                return(new
                {
                    id,
                    width,
                    height,
                    bgcolor,
                    type,
                    pluginspage,
                    allowScriptAccess,
                    allowFullScreen,
                    src
                });
            }
                );

            c.DataReceived +=
                document =>
            {
                var embed_start = document.IndexOf("&lt;embed");

                if (embed_start < 0)
                {
                    return;
                }

                var embed_end     = document.IndexOf("/&gt;", embed_start);
                var embed_content = document.
                                    Substring(embed_start, embed_end - embed_start + 5).
                                    Replace("&quot;", "\"").
                                    Replace("&amp;", "&").
                                    Replace("&lt;", "<").
                                    Replace("&gt;", ">");

                var embed = ParseEmbed(embed_content);

                if (string.IsNullOrEmpty(embed.src))
                {
                    return;
                }

                var video_start = embed.src.IndexOf("v/");

                var video_end = embed.src.IndexOf("&", video_start);
                var video     = embed.src.Substring(video_start + 2, video_end - video_start - 2);

                if (this.VideoSourceFound != null)
                {
                    this.VideoSourceFound(video, embed.src);
                }
            };
        }
示例#15
0
        public BasicPirateBayImage()
        {
            this.CrawlerDownloader =
                new Library.BasicWebCrawler("", 80)
            {
                //CoralEnabled = true
            };

            this.CrawlerUploader =
                new Library.BasicWebCrawler("bayimg.com", 80)
            {
                Method = "POST"
            };

            #region parser
            var DefaultLink  = new { Link = "", Title = "", Text = "" };
            var DefaultImage = new { Source = "", Alt = "", Title = "", width = "", height = "" };

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse();

                return(new { Link, Title, Text });
            }
                );

            var ParseImage = DefaultImage.ToAnonymousConstructor(
                (string element) =>
            {
                var Source = "";
                var Alt    = "";
                var Title  = "";
                var width  = "";
                var height = "";

                element.
                ParseAttribute("src", value => Source    = value).
                ParseAttribute("alt", value => Alt       = value).
                ParseAttribute("title", value => Title   = value).
                ParseAttribute("width", value => width   = value).
                ParseAttribute("height", value => height = value).
                ParseContent(null).
                Parse();

                return(new { Source, Alt, Title, width, height });
            }
                );
            #endregion

            // http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
            var boundary         = "---------------------------" + int.MaxValue.Random();
            var current_filename = "_" + int.MaxValue.Random();

            #region StreamWriter
            Action <StreamWriter, Stream, int, string> StreamWriter =
                (stream, source, sourcelength, filename) =>
            {
                stream.AutoFlush = true;

                stream.WriteLine("--" + boundary);
                stream.WriteLine("Content-Disposition: form-data; name=\"file\"; filename=\"" + filename + "\"");
                stream.WriteLine("Content-Type: application/octet-stream");
                stream.WriteLine();

                if (source == null)
                {
                    stream.BaseStream.Position += sourcelength;
                }
                else
                {
                    var buffer = new byte[0x1000];
                    var offset = 0;
                    var size   = source.Read(buffer, 0, buffer.Length);

                    while (size > 0)
                    {
                        //Console.WriteLine(new { offset, size });

                        stream.BaseStream.Write(buffer, 0, size);
                        offset += size;
                        size    = source.Read(buffer, 0, buffer.Length);
                    }
                }

                stream.WriteLine();

                stream.WriteLine("--" + boundary);
                stream.WriteLine("Content-Disposition: form-data; name=\"code\"");
                stream.WriteLine();
                stream.WriteLine("tpb");

                stream.WriteLine("--" + boundary);
                stream.WriteLine("Content-Disposition: form-data; name=\"tags\"");
                stream.WriteLine();
                stream.WriteLine("");

                stream.WriteLine("--" + boundary + "--");
            };
            #endregion

            this.CrawlerDownloader.ContentLengthReceived +=
                ContentLength =>
            {
                current_filename = "_" + int.MaxValue.Random();

                var value = int.Parse(ContentLength);

                this.CrawlerUploader.HeaderWriter +=
                    stream =>
                {
                    stream.WriteLine("Content-Type: multipart/form-data; boundary=" + boundary);

                    using (var v = new StreamWriter(new VoidStream()))
                    {
                        StreamWriter(v, null, value, current_filename);

                        stream.WriteLine("Content-Length: " +
                                         v.BaseStream.Position
                                         );
                    }
                };
            };

            this.CrawlerDownloader.StreamReader +=
                source =>
            {
                this.CrawlerUploader.StreamWriter +=
                    stream => StreamWriter(stream, source, 0, current_filename);

                this.CrawlerUploader.Crawl("/upload");
            };

            this.CrawlerUploader.DataReceived +=
                document =>
            {
                var result_tag     = "<div id=\"extra2\">";
                var result_i       = document.IndexOf(result_tag);
                var result_end_tag = "<br/>";
                var result_end_i   = document.IndexOf(result_end_tag, result_i);

                var data = document.Substring(result_i + result_tag.Length, result_end_i - (result_i + result_tag.Length)).Trim();

                // http://bayimg.com/image/eaofgaabg.jpg

                var Link           = ParseLink(data);
                var ThumbnailImage = ParseImage(Link.Text);


                if (this.AddEntry != null)
                {
                    this.AddEntry(new Entry(Link.Link.Substring(1).ToLower()));
                }
                //new IHTMLImage { Source = ImageLink, Title = imdb.SmartTitle }.ToString().ToConsole();

                //Console.WriteLine(ImageHTML);
                //Console.WriteLine(ThumbnailImageHTML);
            };
        }
        public BasicIMDBCrawler()
        {
            this.Crawler =
                new Library.BasicWebCrawler("www.imdb.com", 80)
            {
                //CoralEnabled = true
            };

            var DefaultLink  = new { Link = "", Title = "", Text = "" };
            var DefaultImage = new { Source = "", Alt = "", Title = "" };

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse();

                return(new { Link, Title, Text });
            }
                );

            var ParseImage = DefaultImage.ToAnonymousConstructor(
                (string element) =>
            {
                var Source = "";
                var Alt    = "";
                var Title  = "";

                element.
                ParseAttribute("src", value => Source  = value).
                ParseAttribute("alt", value => Alt     = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(null).
                Parse();

                return(new { Source, Alt, Title });
            }
                );

            this.Crawler.DataReceived +=
                document =>
            {
                var entry = new Entry();

                var title   = BasicElementParser.GetContent(document, "title");
                var title_i = title.IndexOf("(");

                entry.Title = title.Substring(0, title_i).Trim();

                // remove qoutes from the title
                entry.Title = entry.Title.Replace("&#34;", "");


                entry.Year = title.Substring(title_i + 1, title.IndexOf(")", title_i + 1) - (title_i + 1));


                var poster_i = document.IndexOf("name=\"poster\"");

                // no poster - the poster may be found on other services
                if (poster_i < 0)
                {
                }
                else
                {
                    var poster_j = document.Substring(0, poster_i).LastIndexOf("<a");
                    var poster_q = document.IndexOf("</a>", poster_i);

                    var poster       = ParseLink(document.Substring(poster_j, poster_q - poster_j + 4));
                    var poster_image = ParseImage(poster.Text);

                    entry.MediumPosterImageProvider = "imdb";
                    entry.MediumPosterImage         = poster_image.Source;
                    entry.MediumPosterImagePage     = poster.Link;
                    entry.MediumPosterTitle         = poster.Title;
                }


                #region UserRating
                var meta_tag = "<div class=\"meta\">";
                var meta_i   = document.IndexOf(meta_tag);

                if (meta_i < 0)
                {
                    entry.UserRating = "";
                }
                else
                {
                    var meta = document.Substring(meta_i + meta_tag.Length, document.IndexOf("</div>", meta_i) - meta_i - meta_tag.Length);

                    entry.UserRating = BasicElementParser.GetContent(meta, "b");
                }
                #endregion

                #region Genres
                var genre_tag = "<h5>Genre:</h5>";
                var genre_i   = document.IndexOf(genre_tag);
                var genres    = new List <string>();

                if (genre_i < 0)
                {
                }
                else
                {
                    var genre = document.Substring(genre_i + genre_tag.Length, document.IndexOf("</div>", genre_i) - genre_i - genre_tag.Length);

                    BasicElementParser.Parse(genre, "a",
                                             (text, index) =>
                    {
                        if (text == "more")
                        {
                            return;
                        }

                        genres.Add(text);
                    }
                                             );
                }

                entry.Genres = genres.ToArray();
                #endregion

                #region Runtime
                var runtime_tag = "<h5>Runtime:</h5>";
                if (genre_i < 0)
                {
                    genre_i = 0;
                }

                var runtime_i = document.IndexOf(runtime_tag, genre_i);

                if (runtime_i < 0)
                {
                    entry.Runtime = "";
                }
                else
                {
                    var runtime = document.Substring(runtime_i + runtime_tag.Length, document.IndexOf("</div>", runtime_i) - runtime_i - runtime_tag.Length);

                    entry.Runtime = runtime.Trim();
                }

                #endregion

                #region Tagline
                var Tagline_tag = "<h5>Tagline:</h5>";
                var Tagline_i   = document.IndexOf(Tagline_tag, genre_i);

                if (Tagline_i < 0)
                {
                    entry.Tagline = "";
                }
                else
                {
                    var Tagline = document.Substring(Tagline_i + Tagline_tag.Length, document.IndexOf("<", Tagline_i + Tagline_tag.Length) - Tagline_i - Tagline_tag.Length);

                    entry.Tagline = Tagline.Trim();
                }
                #endregion

                if (AddEntry != null)
                {
                    AddEntry(entry);
                }
            };
        }
示例#17
0
        public BasicTinyURLCrawler()
        {
            this.Crawler =
                new Library.BasicWebCrawler("tinyurl.com", 80)
            {
                //CoralEnabled = true
            };

            this.Crawler.DataReceived +=
                document =>
            {
                var entry = new Entry();

                if (APIMode)
                {
                    entry.Alias = document;
                }
                else
                {
                    var trigger = "<h1>TinyURL was created!</h1>";

                    var trigger_i = document.IndexOf(trigger);

                    if (trigger_i < 0)
                    {
                        return;
                    }

                    // we are still in the business...

                    //<h1>TinyURL was created!</h1>
                    //<p>The following URL:
                    //<blockquote><b>http://thepiratebay.org<br />
                    //</b></blockquote>
                    //has a length of 23 characters and resulted in the following TinyURL which has a length of 24 characters:
                    //<blockquote><b>http://tinyurl.com/5umsn</b><br><small>[<a href="http://tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small></blockquote>
                    //Or, give your recipients confidence with a preview TinyURL:
                    //<blockquote><b>http://preview.tinyurl.com/5umsn</b><br><small>[<a href="http://preview.tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small>

                    //</blockquote>
                    //</p>

                    var start_tag = "<p>";
                    var start_i   = document.IndexOf(start_tag, trigger_i);

                    if (start_i < 0)
                    {
                        return;
                    }

                    var end_tag = "</p>";
                    var end_i   = document.IndexOf(end_tag, start_i);

                    var data = document.Substring(start_i + start_tag.Length, end_i - start_i + start_tag.Length);



                    BasicElementParser.Parse(data, "blockquote",
                                             (value, index) =>
                    {
                        if (index == 0)
                        {
                            entry.URL = BasicElementParser.GetContent(value, "b");

                            var br_tag = "<br />";
                            var br_i   = entry.URL.IndexOf(br_tag);

                            if (br_i >= 0)
                            {
                                entry.URL = entry.URL.Substring(0, br_i);
                            }


                            return;
                        }

                        if (index == 1)
                        {
                            entry.Alias = BasicElementParser.GetContent(value, "b");

                            return;
                        }
                    }
                                             );
                }

                if (this.AddEntry != null)
                {
                    this.AddEntry(entry);
                }
            };
        }