예제 #1
0
        public BasicIMDBPosterSearch()
        {
            this.Crawler =
                new Library.BasicWebCrawler("www.imdb.com", 80)
            {
                CoralEnabled = true
            };



            var DefaultImage = new { Source = "", Alt = "", Title = "" };

            var ParseImage = DefaultImage.ToAnonymousConstructor(
                (string element) =>
            {
                var Source = "";
                var Alt    = "";
                var Title  = "";

                element.
                ParseAttribute("src", value => Source  = value).
                ParseAttribute("alt", value => Alt     = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(null).
                Parse();

                return(new { Source, Alt, Title });
            }
                );


            string location = null;

            this.Crawler.AllHeadersSent +=
                () =>
            {
                location = null;
            };

            this.Crawler.LocationReceived +=
                value =>
            {
                location = value;
            };

            this.Crawler.DataReceived +=
                document =>
            {
                if (!string.IsNullOrEmpty(location))
                {
                    var u = new Uri(location);

                    this.Crawler.Crawl(u.PathAndQuery);

                    return;
                }

                var poster_tag       = "<table id=\"principal\">";
                var poster_i         = document.IndexOf(poster_tag);
                var poster_close_tag = "</table>";
                var poster_close_i   = document.IndexOf(poster_close_tag, poster_i);

                var poster = ParseImage(
                    BasicElementParser.GetContent(
                        document.Substring(poster_i, poster_close_i + poster_close_tag.Length - poster_i)
                        , "td")
                    );

                if (this.AddEntry != null)
                {
                    this.AddEntry(poster.Source);
                }
            };
        }
예제 #2
0
        public static void Search(string title, Action <AliasEntry> handler)
        {
            var t = new Uri("http://www.movieposterdb.com/browse/search?search_type=movies&title=");
            var c = new BasicWebCrawler(t.Host, 80);

            var DefaultLink = new { Link = "", Title = "", Text = "" };
            var DefaultSpan = new { Text = "", Title = "" };

            var ParseSpan = DefaultSpan.ToAnonymousConstructor(
                (string element) =>
            {
                var Text  = "";
                var Title = "";

                element.
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse("span");

                return(new { Text, Title });
            }
                );

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse("a");

                return(new { Link, Title, Text });
            }
                );

            c.DataReceived +=
                document =>
            {
                var trigger = "Movies</h3>";

                var trigger_i = document.IndexOf(trigger);

                var data = BasicElementParser.GetContent(document.Substring(trigger_i), "table");

                BasicElementParser.Parse(data, "tr",
                                         (element, index) =>
                {
                    /*
                     * <td valign="middle" style="font-size: 0pt; border-bottom: 1px solid #D2D2D2; height: 54px; width: 44px;">
                     *  <img src="http://www.movieposterdb.com/posters/08_09/2008/1179855/m_1179855_4fb9999f.jpg" style="margin-right: 8px; padding: 2px; border: 1px solid #D2D2D2; float: left;" />
                     * </td>
                     * <td valign="middle" style="border-bottom: 1px solid #D2D2D2; width: 60%;">
                     *  <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b>
                     * </td>
                     * <td style="border-bottom: 1px solid #D2D2D2; font-size: 8pt; color: #808080;">
                     *
                     * </td>
                     */

                    BasicElementParser.Parse(element, "td",
                                             (tdelement, tdindex) =>
                    {
                        if (tdindex == 1)
                        {
                            // <b><a class="bbg" href="http://www.movieposterdb.com/movie/1179855/Go-Fast.html">Go Fast</a><br /><span style="color: #8C8C8C;">2008</span></b>
                            var _title = ParseLink(tdelement);
                            var _year  = ParseSpan(tdelement);

                            handler(
                                new AliasEntry
                            {
                                Link  = _title.Link,
                                Title = _title.Text,
                                Year  = _year.Text
                            }
                                );
                        }
                    }
                                             );
                }
                                         );
            };

            c.Crawl(t.PathAndQuery + title.URLEncode());
        }
        public BasicIMDBCrawler()
        {
            this.Crawler =
                new Library.BasicWebCrawler("www.imdb.com", 80)
            {
                //CoralEnabled = true
            };

            var DefaultLink  = new { Link = "", Title = "", Text = "" };
            var DefaultImage = new { Source = "", Alt = "", Title = "" };

            var ParseLink = DefaultLink.ToAnonymousConstructor(
                (string element) =>
            {
                var Link  = "";
                var Title = "";
                var Text  = "";

                element.
                ParseAttribute("href", value => Link   = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(value => Text             = value).
                Parse();

                return(new { Link, Title, Text });
            }
                );

            var ParseImage = DefaultImage.ToAnonymousConstructor(
                (string element) =>
            {
                var Source = "";
                var Alt    = "";
                var Title  = "";

                element.
                ParseAttribute("src", value => Source  = value).
                ParseAttribute("alt", value => Alt     = value).
                ParseAttribute("title", value => Title = value).
                ParseContent(null).
                Parse();

                return(new { Source, Alt, Title });
            }
                );

            this.Crawler.DataReceived +=
                document =>
            {
                var entry = new Entry();

                var title   = BasicElementParser.GetContent(document, "title");
                var title_i = title.IndexOf("(");

                entry.Title = title.Substring(0, title_i).Trim();

                // remove qoutes from the title
                entry.Title = entry.Title.Replace("&#34;", "");


                entry.Year = title.Substring(title_i + 1, title.IndexOf(")", title_i + 1) - (title_i + 1));


                var poster_i = document.IndexOf("name=\"poster\"");

                // no poster - the poster may be found on other services
                if (poster_i < 0)
                {
                }
                else
                {
                    var poster_j = document.Substring(0, poster_i).LastIndexOf("<a");
                    var poster_q = document.IndexOf("</a>", poster_i);

                    var poster       = ParseLink(document.Substring(poster_j, poster_q - poster_j + 4));
                    var poster_image = ParseImage(poster.Text);

                    entry.MediumPosterImageProvider = "imdb";
                    entry.MediumPosterImage         = poster_image.Source;
                    entry.MediumPosterImagePage     = poster.Link;
                    entry.MediumPosterTitle         = poster.Title;
                }


                #region UserRating
                var meta_tag = "<div class=\"meta\">";
                var meta_i   = document.IndexOf(meta_tag);

                if (meta_i < 0)
                {
                    entry.UserRating = "";
                }
                else
                {
                    var meta = document.Substring(meta_i + meta_tag.Length, document.IndexOf("</div>", meta_i) - meta_i - meta_tag.Length);

                    entry.UserRating = BasicElementParser.GetContent(meta, "b");
                }
                #endregion

                #region Genres
                var genre_tag = "<h5>Genre:</h5>";
                var genre_i   = document.IndexOf(genre_tag);
                var genres    = new List <string>();

                if (genre_i < 0)
                {
                }
                else
                {
                    var genre = document.Substring(genre_i + genre_tag.Length, document.IndexOf("</div>", genre_i) - genre_i - genre_tag.Length);

                    BasicElementParser.Parse(genre, "a",
                                             (text, index) =>
                    {
                        if (text == "more")
                        {
                            return;
                        }

                        genres.Add(text);
                    }
                                             );
                }

                entry.Genres = genres.ToArray();
                #endregion

                #region Runtime
                var runtime_tag = "<h5>Runtime:</h5>";
                if (genre_i < 0)
                {
                    genre_i = 0;
                }

                var runtime_i = document.IndexOf(runtime_tag, genre_i);

                if (runtime_i < 0)
                {
                    entry.Runtime = "";
                }
                else
                {
                    var runtime = document.Substring(runtime_i + runtime_tag.Length, document.IndexOf("</div>", runtime_i) - runtime_i - runtime_tag.Length);

                    entry.Runtime = runtime.Trim();
                }

                #endregion

                #region Tagline
                var Tagline_tag = "<h5>Tagline:</h5>";
                var Tagline_i   = document.IndexOf(Tagline_tag, genre_i);

                if (Tagline_i < 0)
                {
                    entry.Tagline = "";
                }
                else
                {
                    var Tagline = document.Substring(Tagline_i + Tagline_tag.Length, document.IndexOf("<", Tagline_i + Tagline_tag.Length) - Tagline_i - Tagline_tag.Length);

                    entry.Tagline = Tagline.Trim();
                }
                #endregion

                if (AddEntry != null)
                {
                    AddEntry(entry);
                }
            };
        }
예제 #4
0
        public BasicTinyURLCrawler()
        {
            this.Crawler =
                new Library.BasicWebCrawler("tinyurl.com", 80)
            {
                //CoralEnabled = true
            };

            this.Crawler.DataReceived +=
                document =>
            {
                var entry = new Entry();

                if (APIMode)
                {
                    entry.Alias = document;
                }
                else
                {
                    var trigger = "<h1>TinyURL was created!</h1>";

                    var trigger_i = document.IndexOf(trigger);

                    if (trigger_i < 0)
                    {
                        return;
                    }

                    // we are still in the business...

                    //<h1>TinyURL was created!</h1>
                    //<p>The following URL:
                    //<blockquote><b>http://thepiratebay.org<br />
                    //</b></blockquote>
                    //has a length of 23 characters and resulted in the following TinyURL which has a length of 24 characters:
                    //<blockquote><b>http://tinyurl.com/5umsn</b><br><small>[<a href="http://tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small></blockquote>
                    //Or, give your recipients confidence with a preview TinyURL:
                    //<blockquote><b>http://preview.tinyurl.com/5umsn</b><br><small>[<a href="http://preview.tinyurl.com/5umsn" target="_blank">Open in new window</a>]</small>

                    //</blockquote>
                    //</p>

                    var start_tag = "<p>";
                    var start_i   = document.IndexOf(start_tag, trigger_i);

                    if (start_i < 0)
                    {
                        return;
                    }

                    var end_tag = "</p>";
                    var end_i   = document.IndexOf(end_tag, start_i);

                    var data = document.Substring(start_i + start_tag.Length, end_i - start_i + start_tag.Length);



                    BasicElementParser.Parse(data, "blockquote",
                                             (value, index) =>
                    {
                        if (index == 0)
                        {
                            entry.URL = BasicElementParser.GetContent(value, "b");

                            var br_tag = "<br />";
                            var br_i   = entry.URL.IndexOf(br_tag);

                            if (br_i >= 0)
                            {
                                entry.URL = entry.URL.Substring(0, br_i);
                            }


                            return;
                        }

                        if (index == 1)
                        {
                            entry.Alias = BasicElementParser.GetContent(value, "b");

                            return;
                        }
                    }
                                             );
                }

                if (this.AddEntry != null)
                {
                    this.AddEntry(entry);
                }
            };
        }