Пример #1
0
 protected void OnMovieFound(MovieFoundEventArgs args)
 {
     if (MovieFound != null)
     {
         MovieFound(this, args);
     }
 }
Пример #2
0
        public override List <ScrapedMovie> ScrapeMovies(List <string> skipUrls, List <int> years = null)
        {
            if (years == null)
            {
                years = new List <int>();
            }

            foreach (var entry in RootLinks)
            {
                try
                {
                    if (years.Count > 0 && !years.Any(x => x == entry.Value))
                    {
                        continue;
                    }
                    int year     = entry.Value;
                    var dom      = GotoUrl(entry.Key);
                    var lastPage = SelectItem(dom, "a.last").Attributes["href"];
                    int ix1      = lastPage.LastIndexOf('/');
                    int ix2      = ix1 > 0 ? lastPage.LastIndexOf('/', ix1 - 1) : -1;
                    var count    = Convert.ToInt32(lastPage.Substring(ix2 + 1, lastPage.Length - ix2 - 2));
                    lastPage = lastPage.Substring(0, ix2 + 1);
                    var pageUrls = new List <string>();
                    for (var i = count; i >= 2; i--)
                    {
                        pageUrls.Add(lastPage + i.ToString() + "/");
                    }
                    pageUrls.Add(entry.Key);
                    foreach (var elem in pageUrls)
                    {
                        try
                        {
                            dom = GotoUrl(elem);
                            var postBoxes = SelectItems(dom, "h2.title a");
                            for (var j = postBoxes.Count - 1; j >= 0; j--)
                            {
                                try
                                {
                                    var movie = new ScrapedMovie(this);
                                    allMovies.Add(movie);
                                    movie.PageUrl = ReadAttribute(postBoxes[j], "href");
                                    OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                    movie.LangCode     = "te";
                                    movie.ReleasedDate = new DateTime(year, 1, 1);
                                    dom            = GotoUrl(movie.PageUrl);
                                    movie.Name     = FixTitle(SelectItem(dom, "h2.title").InnerText);
                                    movie.ImageUrl = SelectItem(dom, "img.wp-post-image").Attributes["src"];
                                    try
                                    {
                                        movie.Description = SelectItems(dom, "div.entry p span")[1].InnerText;
                                    }
                                    catch
                                    {
                                        //try
                                        //{
                                        //    movie.Description = SelectItem(dom, ".textsection").InnerText;
                                        //}
                                        //catch { }
                                    }

                                    var links   = new Dictionary <string, string>();
                                    var anchors = SelectItems(dom, "a");
                                    foreach (var anchor in anchors)
                                    {
                                        try
                                        {
                                            var url = anchor.Attributes["href"];
                                            if (GetScrapper(url) != null)
                                            {
                                                if (!links.ContainsKey(url))
                                                {
                                                    links.Add(url, anchor.InnerText.Replace("&nbsp;", ""));
                                                }
                                            }
                                            if (url.Contains("http://www.power4link.us"))
                                            {
                                                dom = GotoUrl(url);
                                                var frame = SelectItem(dom, "div.entry-content  iframe");
                                                if (!links.ContainsKey(url))
                                                {
                                                    links.Add(frame.Attributes["src"], "Watch Online");
                                                }
                                            }
                                        }
                                        catch { }
                                    }

                                    var iframes = SelectItems(dom, "div.entry  iframe");
                                    foreach (var iframe in iframes)
                                    {
                                        links.Add(iframe.Attributes["src"], "Watch Online");
                                    }
                                    if (links.Count == 0)
                                    {
                                        var k = 0;
                                    }
                                    foreach (var l in links)
                                    {
                                        try
                                        {
                                            var linkUrl = l.Key;
                                            if (IgnoreLink(linkUrl))
                                            {
                                                continue;
                                            }

                                            var host = GetScrapper(linkUrl);
                                            if (host != null)
                                            {
                                                if (skipUrls.Any(x => x == linkUrl))
                                                {
                                                    continue;
                                                }
                                                linkUrl = host.SanitizeUrl(linkUrl);

                                                if (skipUrls.Any(x => x == linkUrl))
                                                {
                                                    continue;
                                                }

                                                MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                                try
                                                {
                                                    result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                                }
                                                catch { }

                                                if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                    result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    var k = 0;
                                                }
                                                if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    var name = l.Value;
                                                    if (name.ToLower().Contains("part"))
                                                    {
                                                        var ind = name.IndexOf("part", StringComparison.InvariantCultureIgnoreCase);
                                                        name = "Watch " + name.Substring(ind, name.Length - ind);
                                                    }
                                                    else
                                                    {
                                                        name = "Watch Online";
                                                    }

                                                    movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, name));
                                                }
                                            }
                                            else
                                            {
                                                OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                            }
                                        }
                                        catch { }
                                    }


                                    if (movie.Links.Count > 0)
                                    {
                                        var args = new MovieFoundEventArgs(movie);
                                        OnMovieFound(args);
                                    }
                                    if (this.stop)
                                    {
                                        return(allMovies);
                                    }
                                }
                                catch { }
                            }
                        }
                        catch { }
                    }
                }
                catch { }
            }
            return(allMovies);
        }
Пример #3
0
        public override List <ScrapedMovie> ScrapeMovies(List <string> skipUrls, List <int> years = null)
        {
            try
            {
                var dom = GotoUrl(RootUrl);
                if (years == null)
                {
                    years = new List <int>();
                }

                //loop through each year
                var elems = SelectItems(dom, "#HTML5.widget div.widget-content a");

                for (var i = elems.Count - 1; i >= 0; i--)
                {
                    try
                    {
                        var elem = elems[i];
                        int year = Convert.ToInt32(elem.InnerText.Trim());
                        if (years.Count > 0 && !years.Any(x => x == year))
                        {
                            continue;
                        }
                        var urls = new Stack <string>();
                        //goto year page
                        try
                        {
                            dom = GotoUrl(ReadAttribute(elem, "href"));

                            while (true) //grab all links
                            {
                                var mUrls = SelectItems(dom, "div.blog-posts a").Skip(4).ToList();
                                foreach (var mu in mUrls)
                                {
                                    var h = ReadAttribute(mu, "href");
                                    if (urls.Contains(h))
                                    {
                                        continue;
                                    }
                                    urls.Push(h);
                                }
                                if (mUrls.Count == 0)
                                {
                                    break;
                                }

                                dom = GotoUrl(ReadAttribute(SelectItem(dom, "#blog-pager-older-link a"), "href"));
                            }
                        }
                        catch { continue; }


                        while (urls.Count > 0)
                        {
                            string u = null;
                            try
                            {
                                u   = urls.Pop();
                                dom = GotoUrl(u);
                                var          title = SelectItem(dom, ".post-title.entry-title a").InnerText;
                                ScrapedMovie movie = null;
                                try
                                {
                                    movie = new ScrapedMovie(this)
                                    {
                                        PageUrl      = u,
                                        Description  = title.Contains("-") ? title.Split('-')[1] : String.Empty,
                                        Name         = FixTitle(title.Contains("-") ? title.Split('-')[0] : title),
                                        LangCode     = "ta",
                                        ReleasedDate = new DateTime(year, 1, 1),
                                        ImageUrl     = ReadAttribute(SelectItem(dom, "div.post-body.entry-content img"), "src")
                                    };
                                }
                                catch
                                {
                                }
                                OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                allMovies.Add(movie);
                                foreach (var item in SelectItems(dom, ".fullpost a"))
                                {
                                    string linkUrl = null;
                                    try
                                    {
                                        linkUrl = ReadAttribute(item, "href");
                                        if (linkUrl.Contains("links2sites"))
                                        {
                                            dom = GotoUrl(linkUrl);
                                            try
                                            {
                                                linkUrl = ReadAttribute(SelectItem(dom, ".post-body.entry-content embed"), "src");
                                            }
                                            catch
                                            {
                                                try
                                                {
                                                    linkUrl = ReadAttribute(SelectItem(dom, ".post-body.entry-content iframe"), "src");
                                                }
                                                catch { }
                                            }
                                        }


                                        if (IgnoreLink(linkUrl))
                                        {
                                            continue;
                                        }

                                        var host = GetScrapper(linkUrl);
                                        if (host != null)
                                        {
                                            if (skipUrls.Any(x => x == linkUrl))
                                            {
                                                continue;
                                            }
                                            linkUrl = host.SanitizeUrl(linkUrl);

                                            if (skipUrls.Any(x => x == linkUrl))
                                            {
                                                continue;
                                            }
                                            skipUrls.Add(linkUrl);

                                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                            try
                                            {
                                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                            }
                                            catch { }

                                            if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                var k = 0;
                                            }
                                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, "Watch Full Movie"));
                                            }
                                        }
                                        else
                                        {
                                            OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                        }
                                    }
                                    catch (WebException ex)
                                    {
                                    }
                                    catch { }
                                }

                                if (movie.Links.Count > 0)
                                {
                                    var args = new MovieFoundEventArgs(movie);
                                    OnMovieFound(args);
                                }
                                if (this.stop)
                                {
                                    return(allMovies);
                                }
                            }
                            catch { }
                        }
                    }
                    catch { }
                }
            }
            catch (Exception ex)
            {
                //throw;
            }
            return(allMovies);
        }
Пример #4
0
        private void ScrapThread(CQ dom, int year, string startUrl, List <string> skipUrls)
        {
            var last      = SelectItem(dom, ".wp-pagenavi > .last");
            var lasthRef  = ReadAttribute(last, "href");
            var pos       = lasthRef.LastIndexOf('/');
            var lastIndex = Int32.Parse(lasthRef.Substring(pos + 1, lasthRef.Length - pos - 1));
            var urls      = new List <string>();

            var urlTemplate = lasthRef.Substring(0, pos);

            for (var i = lastIndex; i > 1; i--)
            {
                urls.Add(String.Format("{0}/{1}", urlTemplate, i));
            }
            urls.Add(startUrl);


            foreach (var url in urls)
            {
                try
                {
                    dom = GotoUrl(url);
                    var elems = SelectItems(dom, "a.clip-link");
                    for (var i = elems.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            var subElem = elems[i];
                            var movie   = new ScrapedMovie(this);
                            movie.PageUrl = ReadAttribute(subElem, "href");
                            OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                            if (movie.PageUrl.ToLower().Contains("-in-hindi") ||
                                movie.PageUrl.ToLower().Contains("-hindi."))
                            {
                                continue;
                            }

                            dom = GotoUrl(movie.PageUrl);


                            movie.ReleasedDate = new DateTime(year, 1, 1);
                            movie.LangCode     = "hi";
                            movie.Language     = "Hindi";
                            movie.Description  = String.Empty;
                            movie.Name         = ReadText(SelectItem(dom, ".entry-title")).Replace("\n", "").Replace("\t", "");
                            try
                            {
                                var descElems = SelectItems(dom, ".entry-content p");
                                //var descs = descElems.Count > 5 ? descElems.Skip(3) : descElems.Skip(2);
                                foreach (var p in descElems)
                                {
                                    if (!p.InnerHTML.Contains("<strong>"))
                                    {
                                        var t = ReadText(p);
                                        movie.Description += ReadText(p) + Environment.NewLine;
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }
                                if (String.IsNullOrWhiteSpace(movie.Description))
                                {
                                    movie.Description = String.Empty;
                                }
                            }
                            catch { }

                            var imgElems = SelectItems(dom, "#thumb img");

                            if (imgElems.Count == 0)
                            {
                                Debug.WriteLine("No Image: " + movie.PageUrl);
                            }
                            else
                            {
                                movie.ImageUrl = ReadAttribute(imgElems[0], "src");
                            }
                            allMovies.Add(movie);

                            //links
                            var linkPages = SelectItems(dom, ".entry-content p a.external");

                            foreach (var l in linkPages)
                            {
                                var    pageUrl = ReadAttribute(l, "href");
                                string linkUrl = "";
                                if (pageUrl.Contains("filmshowonline.net"))
                                {
                                    continue;
                                    dom = GotoUrl(pageUrl);

                                    IDomElement item   = null;
                                    var         attrib = "src";
                                    if (dom.Document.Body.InnerHTML.Contains("id=\"cipher\""))
                                    {
                                        var html = DecryptLink(ReadAttribute(SelectItem(dom, "#key"), "value"),
                                                               ReadAttribute(SelectItem(dom, "#cipher"), "value"));
                                        var doc = CsQuery.CQ.CreateDocument(html);
                                        item = SelectItem(doc, "iframe");
                                        if (item == null)
                                        {
                                            item = SelectItem(doc, "embed");
                                        }
                                        if (item == null)
                                        {
                                            item = SelectItems(doc, "object param").FirstOrDefault(x => x.Attributes["name"] == "movie");
                                            if (item != null)
                                            {
                                                attrib = "value";
                                            }
                                        }
                                        if (html.Contains("flashvars"))
                                        {
                                            html    = System.Web.HttpUtility.UrlDecode(html);
                                            linkUrl = System.Web.HttpUtility.UrlDecode(SubstringBetween(html, "&url=", "&"));
                                        }
                                    }

                                    if (item == null)
                                    {
                                        item = SelectItems(dom, "center embed").FirstOrDefault(x => x.HasAttribute("allowfullscreen"));
                                    }
                                    if (item == null)
                                    {
                                        item = SelectItems(dom, "center iframe").FirstOrDefault(x => x.HasAttribute("allowfullscreen"));
                                    }


                                    if (item == null)
                                    {
                                        OnScraperNotFound(new ScraperNotFound("No Link", pageUrl));
                                        continue;
                                    }
                                    if (String.IsNullOrWhiteSpace(linkUrl))
                                    {
                                        linkUrl = ReadAttribute(item, attrib);
                                    }
                                }
                                else if (pageUrl.Contains("www.veoh.com/download"))
                                {
                                    continue;
                                }
                                else
                                {
                                    linkUrl = pageUrl;
                                }

                                if (IgnoreLink(linkUrl))
                                {
                                    continue;
                                }


                                try
                                {
                                    var host = GetScrapper(linkUrl);
                                    if (host != null)
                                    {
                                        if (skipUrls.Any(x => x == linkUrl))
                                        {
                                            continue;
                                        }
                                        linkUrl = host.SanitizeUrl(linkUrl);
                                        if (skipUrls.Any(x => x == linkUrl))
                                        {
                                            continue;
                                        }
                                        if (!movie.Links.Any(x => x.DownloadUrl.ToLower() == linkUrl.ToLower()))
                                        {
                                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                            try
                                            {
                                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                            }
                                            catch { }
                                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, l.InnerText));
                                            }
                                        }
                                    }
                                    else
                                    {
                                        OnScraperNotFound(new ScraperNotFound(linkUrl, pageUrl));
                                    }
                                }
                                catch (Exception ex)
                                {
                                    OnScraperNotFound(new ScraperNotFound("Exception", ex.Message));
                                }
                            }
                            if (movie.Links.Count > 0)
                            {
                                var args = new MovieFoundEventArgs(movie);
                                OnMovieFound(args);
                            }
                            if (this.stop)
                            {
                                return;
                            }
                        }
                        catch (Exception ex)
                        {
                            OnScraperNotFound(new ScraperNotFound("Exception", ex.Message));
                        }
                    }
                }
                catch (Exception ex)
                {
                    OnScraperNotFound(new ScraperNotFound("Exception", ex.Message));
                }
            }
        }
Пример #5
0
        public override List <ScrapedMovie> ScrapeMovies(List <string> skipUrls, List <int> years = null)
        {
            if (years == null)
            {
                years = new List <int>();
            }
            try
            {
                foreach (var entry in RootLinks)
                {
                    var dom = GotoUrl(entry.Key, 3);

                    var elems = SelectItems(dom, ".video-organizer-element-wrapper a");
                    for (var i = elems.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            var elem = elems[i];
                            int year;
                            Int32.TryParse(elem.InnerText, out year);

                            if (years.Count > 0 && !years.Any(x => x == year))
                            {
                                continue;
                            }

                            dom = GotoUrl(new Uri(new Uri(entry.Key), ReadAttribute(elem, "href")).AbsoluteUri, 3);

                            if (!Int32.TryParse((Regex.Replace(ReadText(SelectItems(dom, ".filter-selected").First()), "[^0-9.]", "")), out year))
                            {
                                continue;
                            }
                            if (year.ToString().Length != 4)
                            {
                                continue;
                            }

                            if (years.Count > 0 && !years.Any(x => x == year))
                            {
                                continue;
                            }

                            foreach (var el in SelectItems(dom, ".numerical-nav a"))
                            {
                                try
                                {
                                    dom = GotoUrl(new Uri(new Uri(entry.Key), ReadAttribute(el, "href")).AbsoluteUri, 3);
                                    foreach (var subElem in SelectItems(dom, ".video-object-wrapper"))
                                    {
                                        try
                                        {
                                            dom = CQ.Create(subElem);
                                            var movie = new ScrapedMovie(this);
                                            var e     = dom.Select(".movie-title").Elements.First();
                                            movie.PageUrl = new Uri(new Uri(RootUrl), ReadAttribute(e, "href").Replace("..", "")).AbsoluteUri;
                                            OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                            if (skipUrls.Any(x => x == movie.PageUrl))
                                            {
                                                continue;
                                            }
                                            movie.ImageUrl     = ReadAttribute(dom.Select(".video-object-thumb img").Elements.First(), "src");
                                            movie.ReleasedDate = new DateTime(year, 1, 1);
                                            movie.LangCode     = entry.Value;
                                            movie.Description  = ReadText(dom.Select(".desc_body").Elements.First()).Replace("-", "");
                                            movie.Name         = ReadText(e).Replace("\n", "").Replace("\t", "");
                                            movie.Name         = Regex.Replace(movie.Name, @"\s*?(?:\(.*?\)|\[.*?\]|\{.*?\})", String.Empty);
                                            movie.Links.Add(new ScrapedMovieLink(movie.PageUrl, "einthusan.com", "With Subtitles"));
                                            allMovies.Add(movie);
                                            var args = new MovieFoundEventArgs(movie);
                                            OnMovieFound(args);
                                            if (this.stop)
                                            {
                                                return(allMovies);
                                            }
                                        }
                                        catch { }
                                    }
                                }
                                catch { }
                            }
                        }
                        catch { }
                    }
                }
            }
            catch { }
            return(allMovies);
        }
Пример #6
0
        public override List <ScrapedMovie> ScrapeMovies(List <string> skipUrls, List <int> years = null)
        {
            if (years == null)
            {
                years = new List <int>();
            }

            foreach (var entry in RootLinks)
            {
                try
                {
                    if (years.Count > 0 && !years.Any(x => x == entry.Value))
                    {
                        continue;
                    }
                    int year = entry.Value;
                    var dom  = GotoUrl(entry.Key);

                    var elems    = SelectItems(dom, ".wp-pagenavi a").Where(x => x.Attributes["class"] == "page larger" || x.Attributes["class"] == "page smaller").ToList();
                    var pageUrls = new List <string>();
                    for (var i = elems.Count - 1; i >= 0; i--)
                    {
                        pageUrls.Add(new Uri(new Uri(entry.Key), ReadAttribute(elems[i], "href")).AbsoluteUri);
                    }
                    pageUrls.Add(entry.Key);
                    foreach (var elem in pageUrls)
                    {
                        try
                        {
                            dom = GotoUrl(elem);
                            var postBoxes = SelectItems(dom, ".boxentry a");
                            for (var j = postBoxes.Count - 1; j >= 0; j--)
                            {
                                try
                                {
                                    var pb = postBoxes[j];

                                    var movie = new ScrapedMovie(this);
                                    allMovies.Add(movie);
                                    movie.PageUrl = ReadAttribute(pb, "href");
                                    OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                    movie.LangCode     = "ta";
                                    movie.ReleasedDate = new DateTime(year, 1, 1);
                                    var title = ReadAttribute(pb, "title");
                                    if (title.ToLower().Contains("dubbed"))
                                    {
                                        continue;
                                    }
                                    movie.Name     = FixTitle(title);
                                    movie.ImageUrl = pb.FirstElementChild.Attributes["src"];

                                    dom = GotoUrl(movie.PageUrl);
                                    try
                                    {
                                        //movie.Description = SelectItem(dom, "meta[name='description']").InnerText;
                                    }
                                    catch
                                    {
                                        //try
                                        //{
                                        //    movie.Description = SelectItem(dom, ".textsection").InnerText;
                                        //}
                                        //catch { }
                                    }
                                    var embedds = SelectItems(dom, ".videosection embed");
                                    var iframes = SelectItems(dom, ".videosection iframe");
                                    var links   = new List <string>();
                                    foreach (var emb in embedds)
                                    {
                                        links.Add(emb.Attributes["src"]);
                                    }
                                    foreach (var iframe in iframes)
                                    {
                                        links.Add(iframe.Attributes["src"]);
                                    }
                                    if (links.Count == 0)
                                    {
                                        var k = 0;
                                    }
                                    foreach (var l in links)
                                    {
                                        try
                                        {
                                            var linkUrl = l;
                                            if (IgnoreLink(linkUrl))
                                            {
                                                continue;
                                            }

                                            var host = GetScrapper(linkUrl);
                                            if (host != null)
                                            {
                                                if (skipUrls.Any(x => x == linkUrl))
                                                {
                                                    continue;
                                                }
                                                linkUrl = host.SanitizeUrl(linkUrl);

                                                if (skipUrls.Any(x => x == linkUrl))
                                                {
                                                    continue;
                                                }

                                                MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                                try
                                                {
                                                    result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                                }
                                                catch { }

                                                if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                    result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    var k = 0;
                                                }
                                                if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, "Watch Full Movie"));
                                                }
                                            }
                                            else
                                            {
                                                OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                            }
                                        }
                                        catch { }
                                    }


                                    if (movie.Links.Count > 0)
                                    {
                                        var args = new MovieFoundEventArgs(movie);
                                        OnMovieFound(args);
                                    }
                                    if (this.stop)
                                    {
                                        return(allMovies);
                                    }
                                }
                                catch { }
                            }
                        }
                        catch { }
                    }
                }
                catch { }
            }
            return(allMovies);
        }
Пример #7
0
        private List <ScrapedMovie> DoScrapeMovies(List <string> links, string langCode, List <string> skipUrls, List <int> years = null)
        {
            if (years == null)
            {
                years = new List <int>();
            }


            foreach (var entry in links)
            {
                var dom    = GotoUrl(entry);
                var movies = SelectItems(dom, ".movie");
                foreach (var m in movies)
                {
                    try
                    {
                        var movie = new ScrapedMovie(this);
                        allMovies.Add(movie);
                        movie.PageUrl = "http://apnaview.com" + m.FirstElementChild.Attributes["href"];
                        var children = m.FirstElementChild.ChildElements.ToList();
                        movie.ReleasedDate = new DateTime(Convert.ToInt32(children[2].InnerText), 1, 1);
                        OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + movie.ReleasedDate.Year.ToString()));
                        movie.LangCode = langCode;
                        movie.Name     = children[1].InnerText;
                        if (children[0].Attributes["src"].Contains("/img"))
                        {
                            movie.ImageUrl = "http://apnaview.com" + children[0].Attributes["src"];
                        }

                        dom = GotoUrl(movie.PageUrl);
                        var vids = SelectItems(dom, ".table.table-bordered tbody tr");
                        foreach (var vid in vids)
                        {
                            try
                            {
                                var vidLinks = vid.ChildElements.ToList()[1].ChildElements.ToList();
                                foreach (var vl in vidLinks)
                                {
                                    var linkUrl = vl.Attributes["href"];
                                    if (GetScrapper(linkUrl) == null)
                                    {
                                        linkUrl = String.Empty;
                                        dom     = GotoUrl(vl.Attributes["href"]);
                                        try
                                        {
                                            linkUrl = SelectItem(dom, ".videoplayer iframe").Attributes["src"];
                                        }
                                        catch { }
                                        try
                                        {
                                            if (String.IsNullOrWhiteSpace(linkUrl))
                                            {
                                                linkUrl = SelectItem(dom, ".videoplayer embed").Attributes["src"];
                                            }
                                        }
                                        catch { }
                                    }

                                    if (IgnoreLink(linkUrl))
                                    {
                                        continue;
                                    }

                                    try
                                    {
                                        var host = GetScrapper(linkUrl);
                                        if (host != null)
                                        {
                                            if (skipUrls.Any(x => x == linkUrl))
                                            {
                                                continue;
                                            }
                                            linkUrl = host.SanitizeUrl(linkUrl);

                                            if (skipUrls.Any(x => x == linkUrl))
                                            {
                                                continue;
                                            }

                                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                            try
                                            {
                                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                            }
                                            catch { }

                                            if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                var k = 0;
                                            }
                                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                var name = vl.InnerText.Trim();
                                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, name));
                                            }
                                        }
                                        else
                                        {
                                            OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                        }
                                    }
                                    catch { }
                                }
                            }
                            catch { }
                        }
                        if (movie.Links.Count > 0)
                        {
                            var args = new MovieFoundEventArgs(movie);
                            OnMovieFound(args);
                        }
                        if (this.stop)
                        {
                            return(allMovies);
                        }
                    }
                    catch { }
                }
            }


            return(allMovies);
        }
Пример #8
0
        private bool GetMovies(int year, CQ dom, List <string> skipUrls)
        {
            var items = SelectItems(dom, ".itemList a");

            if (items.Count == 0)
            {
                Debug.Assert(false, "No movies");
            }
            for (var i = items.Count - 1; i >= 0; i--)
            {
                var elem1    = items[i];
                var movieUrl = RootUrl + ReadAttribute(elem1, "href");
                OnNotify(new NotificationEventArgs("Processing " + movieUrl + ". Year: " + year.ToString()));
                if (allMovies.Any(x => x.PageUrl == movieUrl))
                {
                    continue;
                }
                try
                {
                    dom = GotoUrl(movieUrl);
                }
                catch { continue; }

                var links = SelectItems(dom, ".itemIntroText table a");
                if (links.Count == 0)
                {
                    links = SelectItems(dom, "div.itemFullText a");
                }
                if (links.Count == 0)
                {
                    links = SelectItems(dom, ".avPlayerBlock iframe");
                }

                if (links.Count > 0)
                {
                    var movie = new ScrapedMovie(this);
                    movie.ReleasedDate = new DateTime(year, 1, 1);
                    movie.LangCode     = "ml";
                    movie.Language     = "Malayalam";
                    movie.Name         = ReadText(SelectItems(dom, ".itemTitle")[0]).Replace("\n", "").Replace("\t", "");

                    try
                    {
                        try
                        {
                            movie.Description = ReadText(SelectItems(dom, ".itemIntroText p")[0]);
                        }
                        catch
                        {
                            try
                            {
                                movie.Description = ReadText(SelectItems(dom, ".itemIntroText")[0]);
                            }
                            catch {
                                var spans = SelectItems(dom, ".typeTextfield span");
                                if (spans.Count > 0)
                                {
                                    movie.Description = String.Empty;
                                    foreach (var span in spans)
                                    {
                                        movie.Description += span.InnerText;
                                    }
                                }
                            }
                        }
                        if (movie.Description != null)
                        {
                            movie.Description = movie.Description.Replace("\n", "").Replace("\t", "");
                        }
                    }
                    catch { }
                    try
                    {
                        var a = SelectItems(dom, ".itemIntroText p img").FirstOrDefault();
                        if (a == null)
                        {
                            a = SelectItems(dom, ".itemImage a img").FirstOrDefault();
                        }
                        if (a == null)
                        {
                            a = SelectItems(dom, ".itemIntroText img").FirstOrDefault();
                        }
                        if (a == null)
                        {
                            a = SelectItems(dom, ".itemIntroText span img").FirstOrDefault();
                        }
                        if (a != null)
                        {
                            movie.ImageUrl = RootUrl + ReadAttribute(a, "src");
                        }
                    }
                    catch { }
                    movie.PageUrl = movieUrl;



                    allMovies.Add(movie);
                    foreach (var link in links)
                    {
                        string linkUrl = null;
                        try
                        {
                            linkUrl = ReadAttribute(link, "href");
                        }
                        catch {
                            try
                            {
                                linkUrl = ReadAttribute(link, "src");
                            }
                            catch { }
                        }
                        if (IgnoreLink(linkUrl))
                        {
                            continue;
                        }
                        var host = GetScrapper(linkUrl);
                        if (host != null)
                        {
                            if (skipUrls.Any(x => x == linkUrl))
                            {
                                continue;
                            }
                            try
                            {
                                linkUrl = host.SanitizeUrl(linkUrl);
                            }
                            catch { continue; }
                            if (skipUrls.Any(x => x == linkUrl))
                            {
                                continue;
                            }

                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                            try
                            {
                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                            }
                            catch { }
                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                            {
                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, link.InnerText));
                            }
                        }
                        else
                        {
                            OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                        }
                    }

                    if (movie.Links.Count > 0)
                    {
                        var args = new MovieFoundEventArgs(movie);
                        OnMovieFound(args);
                    }
                    if (this.stop)
                    {
                        return(false);
                    }
                }
                else
                {
                }
            }
            return(true);
        }