Exemplo n.º 1
0
        public override List<ScrapedMovie> ScrapeMovies(List<string> skipUrls, List<int> years = null)
        {
            if (years == null)
                years = new List<int>();

            foreach (var entry in RootLinks)
            {
                try
                {
                    if (years.Count > 0 && !years.Any(x => x == entry.Value))
                        continue;
                    int year = entry.Value;
                    var dom = GotoUrl(entry.Key);
                    var lastPage = SelectItem(dom, "a.last").Attributes["href"];
                    int ix1 = lastPage.LastIndexOf('/');
                    int ix2 = ix1 > 0 ? lastPage.LastIndexOf('/', ix1 - 1) : -1;
                    var count = Convert.ToInt32(lastPage.Substring(ix2 + 1, lastPage.Length - ix2 - 2));
                    lastPage = lastPage.Substring(0, ix2 + 1);
                    var pageUrls = new List<string>();
                    for (var i = count; i >= 2; i--)
                        pageUrls.Add(lastPage + i.ToString() + "/");
                    pageUrls.Add(entry.Key);
                    foreach (var elem in pageUrls)
                    {
                        try
                        {
                            dom = GotoUrl(elem);
                            var postBoxes = SelectItems(dom, "h2.title a");
                            for (var j = postBoxes.Count - 1; j >= 0; j--)
                            {
                                try
                                {
                                    var movie = new ScrapedMovie(this);
                                    allMovies.Add(movie);
                                    movie.PageUrl = ReadAttribute(postBoxes[j], "href");
                                    OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                    movie.LangCode = "te";
                                    movie.ReleasedDate = new DateTime(year, 1, 1);
                                    dom = GotoUrl(movie.PageUrl);
                                    movie.Name = FixTitle( SelectItem(dom, "h2.title").InnerText);
                                    movie.ImageUrl = SelectItem(dom, "img.wp-post-image").Attributes["src"];
                                    try
                                    {
                                        movie.Description = SelectItems(dom, "div.entry p span")[1].InnerText;
                                    }
                                    catch
                                    {
                                        //try
                                        //{
                                        //    movie.Description = SelectItem(dom, ".textsection").InnerText;
                                        //}
                                        //catch { }
                                    }

                                    var links = new Dictionary<string, string>();
                                    var anchors = SelectItems(dom, "a");
                                    foreach (var anchor in anchors)
                                    {
                                        try
                                        {
                                            var url = anchor.Attributes["href"];
                                            if (GetScrapper(url) != null)
                                            {
                                                if(!links.ContainsKey(url))
                                                    links.Add(url, anchor.InnerText.Replace("&nbsp;", ""));
                                            }
                                            if(url.Contains("http://www.power4link.us"))
                                            {
                                                dom = GotoUrl(url);
                                                var frame = SelectItem(dom, "div.entry-content  iframe");
                                                if (!links.ContainsKey(url))
                                                    links.Add(frame.Attributes["src"], "Watch Online");
                                            }
                                        }
                                        catch { }
                                    }

                                    var iframes = SelectItems(dom, "div.entry  iframe");
                                    foreach (var iframe in iframes)
                                        links.Add(iframe.Attributes["src"], "Watch Online");
                                    if (links.Count == 0)
                                    {
                                        var k = 0;
                                    }
                                    foreach (var l in links)
                                    {
                                        try
                                        {
                                            var linkUrl = l.Key;
                                            if (IgnoreLink(linkUrl))
                                                continue;

                                            var host = GetScrapper(linkUrl);
                                            if (host != null)
                                            {
                                                if (skipUrls.Any(x => x == linkUrl))
                                                    continue;
                                                linkUrl = host.SanitizeUrl(linkUrl);

                                                if (skipUrls.Any(x => x == linkUrl))
                                                    continue;

                                                MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                                try
                                                {
                                                    result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                                }
                                                catch { }

                                                if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                    result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    var k = 0;
                                                }
                                                if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    var name = l.Value;
                                                    if (name.ToLower().Contains("part"))
                                                    {
                                                        var ind = name.IndexOf("part", StringComparison.InvariantCultureIgnoreCase);
                                                        name = "Watch " + name.Substring(ind, name.Length - ind);
                                                    }
                                                    else
                                                        name = "Watch Online";

                                                    movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, name));
                                                }
                                            }
                                            else
                                            {
                                                OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                            }
                                        }
                                        catch { }

                                    }

                                    if (movie.Links.Count > 0)
                                    {
                                        var args = new MovieFoundEventArgs(movie);
                                        OnMovieFound(args);

                                    }
                                    if (this.stop)
                                        return allMovies;

                                }
                                catch { }
                            }
                        }
                        catch { }

                    }
                }
                catch { }
            }
            return allMovies;
        }
Exemplo n.º 2
0
        private bool GetMovies(int year, CQ dom, List<string> skipUrls)
        {
            var items = SelectItems(dom, ".itemList a");
            if (items.Count == 0)
            {
                Debug.Assert(false, "No movies");
            }
            for (var i = items.Count - 1; i >= 0; i--)
            {
                var elem1 = items[i];
                var movieUrl = RootUrl + ReadAttribute(elem1, "href");
                OnNotify(new NotificationEventArgs("Processing " + movieUrl + ". Year: " + year.ToString()));
                if (allMovies.Any(x => x.PageUrl == movieUrl) )
                    continue;
                try
                {
                    dom = GotoUrl(movieUrl);
                }
                catch { continue; }

                var links = SelectItems(dom, ".itemIntroText table a");
                if (links.Count == 0)
                    links = SelectItems(dom, "div.itemFullText a");
                if (links.Count == 0)
                    links = SelectItems(dom, ".avPlayerBlock iframe");

                if (links.Count > 0)
                {
                    var movie = new ScrapedMovie(this);
                    movie.ReleasedDate = new DateTime(year, 1, 1);
                    movie.LangCode = "ml";
                    movie.Language = "Malayalam";
                    movie.Name = ReadText(SelectItems(dom, ".itemTitle")[0]).Replace("\n", "").Replace("\t", "");

                    try
                    {
                        try
                        {
                            movie.Description = ReadText(SelectItems(dom, ".itemIntroText p")[0]);
                        }
                        catch
                        {
                            try
                            {
                                movie.Description = ReadText(SelectItems(dom, ".itemIntroText")[0]);
                            }
                            catch{
                                var spans = SelectItems(dom, ".typeTextfield span");
                                if (spans.Count > 0)
                                {
                                    movie.Description = String.Empty;
                                    foreach (var span in spans)
                                        movie.Description += span.InnerText;
                                }
                            }
                        }
                        if (movie.Description != null)
                            movie.Description = movie.Description.Replace("\n", "").Replace("\t", "");
                    }
                    catch { }
                    try
                    {
                        var a = SelectItems(dom, ".itemIntroText p img").FirstOrDefault();
                        if(a == null)
                            a = SelectItems(dom, ".itemImage a img").FirstOrDefault();
                        if (a == null)
                            a = SelectItems(dom, ".itemIntroText img").FirstOrDefault();
                        if(a == null)
                            a = SelectItems(dom, ".itemIntroText span img").FirstOrDefault();
                        if (a != null)
                            movie.ImageUrl = RootUrl + ReadAttribute(a, "src");
                    }
                    catch { }
                    movie.PageUrl = movieUrl;

                    allMovies.Add(movie);
                    foreach (var link in links)
                    {
                        string linkUrl = null;
                        try
                        {
                            linkUrl = ReadAttribute(link, "href");
                        }
                        catch {
                            try
                            {
                                linkUrl = ReadAttribute(link, "src");
                            }
                            catch { }
                        }
                        if (IgnoreLink(linkUrl))
                            continue;
                        var host = GetScrapper(linkUrl);
                        if (host != null)
                        {
                            if (skipUrls.Any(x => x == linkUrl))
                                continue;
                            try
                            {
                                linkUrl = host.SanitizeUrl(linkUrl);
                            }
                            catch { continue; }
                            if (skipUrls.Any(x => x == linkUrl))
                                continue;

                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                            try
                            {
                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                            }
                            catch { }
                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, link.InnerText));
                        }
                        else
                        {
                            OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                        }
                    }

                    if (movie.Links.Count > 0)
                    {
                        var args = new MovieFoundEventArgs(movie);
                        OnMovieFound(args);

                    }
                    if (this.stop)
                        return false;
                }
                else
                {
                }
            }
            return true;
        }
Exemplo n.º 3
0
        public override List<ScrapedMovie> ScrapeMovies(List<string> skipUrls, List<int> years = null)
        {
            if (years == null)
                years = new List<int>();

            foreach (var entry in RootLinks)
            {
                try
                {
                    if (years.Count > 0 && !years.Any(x => x == entry.Value))
                        continue;
                    int year = entry.Value;
                    var dom = GotoUrl(entry.Key);

                    var elems = SelectItems(dom, ".wp-pagenavi a").Where(x => x.Attributes["class"] == "page larger" || x.Attributes["class"] == "page smaller").ToList();
                    var pageUrls = new List<string>();
                    for (var i = elems.Count - 1; i >= 0; i--)
                        pageUrls.Add(new Uri(new Uri(entry.Key), ReadAttribute(elems[i], "href")).AbsoluteUri);
                    pageUrls.Add(entry.Key);
                    foreach (var elem in pageUrls)
                    {
                        try
                        {
                            dom = GotoUrl(elem);
                            var postBoxes = SelectItems(dom, ".boxentry a");
                            for (var j = postBoxes.Count - 1; j >= 0; j--)
                            {
                                try
                                {
                                    var pb = postBoxes[j];

                                    var movie = new ScrapedMovie(this);
                                    allMovies.Add(movie);
                                    movie.PageUrl = ReadAttribute(pb, "href");
                                    OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                    movie.LangCode = "ta";
                                    movie.ReleasedDate = new DateTime(year, 1, 1);
                                    var title = ReadAttribute(pb, "title");
                                    if (title.ToLower().Contains("dubbed"))
                                        continue;
                                    movie.Name = FixTitle(title);
                                    movie.ImageUrl = pb.FirstElementChild.Attributes["src"];

                                    dom = GotoUrl(movie.PageUrl);
                                    try
                                    {
                                        //movie.Description = SelectItem(dom, "meta[name='description']").InnerText;
                                    }
                                    catch
                                    {
                                        //try
                                        //{
                                        //    movie.Description = SelectItem(dom, ".textsection").InnerText;
                                        //}
                                        //catch { }
                                    }
                                    var embedds = SelectItems(dom, ".videosection embed");
                                    var iframes = SelectItems(dom, ".videosection iframe");
                                    var links = new List<string>();
                                    foreach (var emb in embedds)
                                        links.Add(emb.Attributes["src"]);
                                    foreach (var iframe in iframes)
                                        links.Add(iframe.Attributes["src"]);
                                    if (links.Count == 0)
                                    {
                                        var k = 0;
                                    }
                                    foreach (var l in links)
                                    {
                                        try
                                        {
                                            var linkUrl = l;
                                            if (IgnoreLink(linkUrl))
                                                continue;

                                            var host = GetScrapper(linkUrl);
                                            if (host != null)
                                            {
                                                if (skipUrls.Any(x => x == linkUrl))
                                                    continue;
                                                linkUrl = host.SanitizeUrl(linkUrl);

                                                if (skipUrls.Any(x => x == linkUrl))
                                                    continue;

                                                MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                                try
                                                {
                                                    result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                                }
                                                catch { }

                                                if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                    result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                {
                                                    var k = 0;
                                                }
                                                if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                    movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, "Watch Full Movie"));
                                            }
                                            else
                                            {
                                                OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                            }
                                        }
                                        catch { }

                                    }

                                    if (movie.Links.Count > 0)
                                    {
                                        var args = new MovieFoundEventArgs(movie);
                                        OnMovieFound(args);

                                    }
                                    if (this.stop)
                                        return allMovies;

                                }
                                catch { }
                            }
                        }
                        catch { }

                    }
                }
                catch { }
            }
            return allMovies;
        }
Exemplo n.º 4
0
 void scraper_MovieFound(object sender, MovieFoundEventArgs e)
 {
     UpdateUI(e.Movie);
     if (this.stop)
         ((MovieDetailsScraperBase)sender).Stop();
 }
Exemplo n.º 5
0
        private List<ScrapedMovie> DoScrapeMovies(List<string> links, string langCode, List<string> skipUrls, List<int> years = null)
        {
            if (years == null)
                years = new List<int>();

            foreach (var entry in links)
            {
                var dom = GotoUrl(entry);
                var movies = SelectItems(dom, ".movie");
                foreach (var m in movies)
                {
                    try
                    {
                        var movie = new ScrapedMovie(this);
                        allMovies.Add(movie);
                        movie.PageUrl = "http://apnaview.com" + m.FirstElementChild.Attributes["href"];
                        var children = m.FirstElementChild.ChildElements.ToList();
                        movie.ReleasedDate = new DateTime(Convert.ToInt32(children[2].InnerText), 1, 1);
                        OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + movie.ReleasedDate.Year.ToString()));
                        movie.LangCode = langCode;
                        movie.Name = children[1].InnerText;
                        if(children[0].Attributes["src"].Contains("/img"))
                            movie.ImageUrl = "http://apnaview.com" + children[0].Attributes["src"];

                        dom = GotoUrl(movie.PageUrl);
                        var vids = SelectItems(dom, ".table.table-bordered tbody tr");
                        foreach (var vid in vids)
                        {
                            try
                            {
                                var vidLinks = vid.ChildElements.ToList()[1].ChildElements.ToList();
                                foreach (var vl in vidLinks)
                                {

                                    var linkUrl = vl.Attributes["href"];
                                    if (GetScrapper(linkUrl) == null)
                                    {
                                        linkUrl = String.Empty;
                                        dom = GotoUrl(vl.Attributes["href"]);
                                        try
                                        {
                                            linkUrl = SelectItem(dom, ".videoplayer iframe").Attributes["src"];
                                        }
                                        catch { }
                                        try
                                        {
                                            if (String.IsNullOrWhiteSpace(linkUrl))
                                                linkUrl = SelectItem(dom, ".videoplayer embed").Attributes["src"];
                                        }
                                        catch { }
                                    }

                                    if (IgnoreLink(linkUrl))
                                        continue;

                                    try
                                    {
                                        var host = GetScrapper(linkUrl);
                                        if (host != null)
                                        {
                                            if (skipUrls.Any(x => x == linkUrl))
                                                continue;
                                            linkUrl = host.SanitizeUrl(linkUrl);

                                            if (skipUrls.Any(x => x == linkUrl))
                                                continue;

                                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                            try
                                            {
                                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                            }
                                            catch { }

                                            if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                var k = 0;
                                            }
                                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                var name = vl.InnerText.Trim();
                                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, name));
                                            }
                                        }
                                        else
                                        {
                                            OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                        }
                                    }
                                    catch { }
                                }
                            }
                            catch { }
                        }
                        if (movie.Links.Count > 0)
                        {
                            var args = new MovieFoundEventArgs(movie);
                            OnMovieFound(args);

                        }
                        if (this.stop)
                            return allMovies;
                    }
                    catch { }
                }
            }

            return allMovies;
        }
Exemplo n.º 6
0
        private void ScrapThread(CQ dom, int year,string startUrl, List<string> skipUrls)
        {
            var last = SelectItem(dom, ".wp-pagenavi > .last");
            var lasthRef = ReadAttribute(last, "href");
            var pos = lasthRef.LastIndexOf('/');
            var lastIndex = Int32.Parse( lasthRef.Substring(pos + 1, lasthRef.Length - pos -1) );
            var urls = new List<string>();

            var urlTemplate = lasthRef.Substring(0, pos);
            for (var i = lastIndex; i > 1; i--)
                urls.Add(String.Format("{0}/{1}", urlTemplate, i));
            urls.Add(startUrl);

            foreach(var url in urls)
            {

                try
                {

                    dom = GotoUrl(url);
                    var elems = SelectItems(dom, "a.clip-link");
                    for (var i = elems.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            var subElem = elems[i];
                            var movie = new ScrapedMovie(this);
                            movie.PageUrl = ReadAttribute(subElem, "href");
                            OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                            if (movie.PageUrl.ToLower().Contains("-in-hindi") ||
                               movie.PageUrl.ToLower().Contains("-hindi."))
                                continue;

                            dom = GotoUrl(movie.PageUrl);

                            movie.ReleasedDate = new DateTime(year, 1, 1);
                            movie.LangCode = "hi";
                            movie.Language = "Hindi";
                            movie.Description = String.Empty;
                            movie.Name = ReadText(SelectItem(dom, ".entry-title")).Replace("\n", "").Replace("\t", "");
                            try
                            {
                                var descElems = SelectItems(dom, ".entry-content p");
                                //var descs = descElems.Count > 5 ? descElems.Skip(3) : descElems.Skip(2);
                                foreach (var p in descElems)
                                {
                                    if (!p.InnerHTML.Contains("<strong>"))
                                    {
                                        var t = ReadText(p);
                                        movie.Description += ReadText(p) + Environment.NewLine;
                                    }
                                    else
                                    {
                                        break;
                                    }
                                }
                                if (String.IsNullOrWhiteSpace(movie.Description))
                                    movie.Description = String.Empty;
                            }
                            catch { }

                            var imgElems = SelectItems(dom, "#thumb img");

                            if (imgElems.Count == 0)
                            {
                                Debug.WriteLine("No Image: " + movie.PageUrl);
                            }
                            else
                                movie.ImageUrl = ReadAttribute(imgElems[0], "src");
                            allMovies.Add(movie);

                            //links
                            var linkPages = SelectItems(dom, ".entry-content p a.external");

                            foreach (var l in linkPages)
                            {
                                var pageUrl = ReadAttribute(l, "href");
                                string linkUrl = "";
                                if (pageUrl.Contains("filmshowonline.net"))
                                {
                                    continue;
                                    dom = GotoUrl(pageUrl);

                                    IDomElement item = null;
                                    var attrib = "src";
                                    if (dom.Document.Body.InnerHTML.Contains("id=\"cipher\""))
                                    {
                                        var html = DecryptLink(ReadAttribute(SelectItem(dom, "#key"), "value"),
                                            ReadAttribute(SelectItem(dom, "#cipher"), "value"));
                                        var doc = CsQuery.CQ.CreateDocument(html);
                                        item = SelectItem(doc, "iframe");
                                        if (item == null)
                                            item = SelectItem(doc, "embed");
                                        if (item == null)
                                        {
                                            item = SelectItems(doc, "object param").FirstOrDefault(x => x.Attributes["name"] == "movie");
                                            if (item != null)
                                                attrib = "value";
                                        }
                                        if (html.Contains("flashvars"))
                                        {
                                            html = System.Web.HttpUtility.UrlDecode(html);
                                            linkUrl = System.Web.HttpUtility.UrlDecode(SubstringBetween(html, "&url=", "&"));
                                        }
                                    }

                                    if (item == null)
                                        item = SelectItems(dom, "center embed").FirstOrDefault(x => x.HasAttribute("allowfullscreen"));
                                    if (item == null)
                                        item = SelectItems(dom, "center iframe").FirstOrDefault(x => x.HasAttribute("allowfullscreen"));

                                    if (item == null)
                                    {
                                        OnScraperNotFound(new ScraperNotFound("No Link", pageUrl));
                                        continue;
                                    }
                                    if (String.IsNullOrWhiteSpace(linkUrl))
                                        linkUrl = ReadAttribute(item, attrib);
                                }
                                else if (pageUrl.Contains("www.veoh.com/download"))
                                {
                                    continue;
                                }
                                else
                                    linkUrl = pageUrl;

                                if (IgnoreLink(linkUrl))
                                    continue;

                                try
                                {
                                    var host = GetScrapper(linkUrl);
                                    if (host != null)
                                    {
                                        if (skipUrls.Any(x => x == linkUrl))
                                            continue;
                                        linkUrl = host.SanitizeUrl(linkUrl);
                                        if (skipUrls.Any(x => x == linkUrl))
                                            continue;
                                        if (!movie.Links.Any(x => x.DownloadUrl.ToLower() == linkUrl.ToLower()))
                                        {
                                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                            try
                                            {
                                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                            }
                                            catch { }
                                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, l.InnerText));
                                        }
                                    }
                                    else
                                    {
                                        OnScraperNotFound(new ScraperNotFound(linkUrl, pageUrl));
                                    }
                                }
                                catch (Exception ex)
                                {
                                    OnScraperNotFound(new ScraperNotFound("Exception", ex.Message));
                                }
                            }
                            if (movie.Links.Count > 0)
                            {
                                var args = new MovieFoundEventArgs(movie);
                                OnMovieFound(args);

                            }
                            if (this.stop)
                                return;
                        }
                        catch (Exception ex)
                        {
                            OnScraperNotFound(new ScraperNotFound("Exception", ex.Message));
                        }
                    }
                }
                catch (Exception ex)
                {
                    OnScraperNotFound(new ScraperNotFound("Exception", ex.Message));
                }
            }
        }
Exemplo n.º 7
0
        public override List<ScrapedMovie> ScrapeMovies(List<string> skipUrls, List<int> years = null)
        {
            try
            {

                var dom = GotoUrl(RootUrl);
                if (years == null)
                    years = new List<int>();

                //loop through each year
                var elems = SelectItems(dom, "#HTML5.widget div.widget-content a");

                for (var i = elems.Count - 1; i >= 0; i--)
                {
                    try
                    {
                        var elem = elems[i];
                        int year = Convert.ToInt32(elem.InnerText.Trim());
                        if (years.Count > 0 && !years.Any(x => x == year))
                            continue;
                         var urls = new Stack<string>();
                        //goto year page
                        try
                        {
                            dom = GotoUrl(ReadAttribute(elem, "href"));

                            while (true) //grab all links
                            {
                                var mUrls = SelectItems(dom, "div.blog-posts a").Skip(4).ToList();
                                foreach (var mu in mUrls)
                                {
                                    var h = ReadAttribute(mu, "href");
                                    if (urls.Contains(h))
                                        continue;
                                    urls.Push(h);
                                }
                                if (mUrls.Count == 0)
                                    break;

                                dom = GotoUrl(ReadAttribute(SelectItem(dom, "#blog-pager-older-link a"), "href"));
                            }
                        }
                        catch { continue; }

                        while (urls.Count > 0)
                        {
                            string u = null;
                            try
                            {
                                u = urls.Pop();
                                dom = GotoUrl(u);
                                var title = SelectItem(dom, ".post-title.entry-title a").InnerText;
                                ScrapedMovie movie = null;
                                try
                                {
                                    movie = new ScrapedMovie(this)
                                    {
                                        PageUrl = u,
                                        Description = title.Contains("-") ? title.Split('-')[1] : String.Empty,
                                        Name = FixTitle(title.Contains("-") ? title.Split('-')[0] : title),
                                        LangCode = "ta",
                                        ReleasedDate = new DateTime(year, 1, 1),
                                        ImageUrl = ReadAttribute(SelectItem(dom, "div.post-body.entry-content img"), "src")

                                    };
                                }
                                catch
                                {

                                }
                                OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                allMovies.Add(movie);
                                foreach (var item in SelectItems(dom, ".fullpost a"))
                                {

                                    string linkUrl = null;
                                    try
                                    {
                                        linkUrl = ReadAttribute(item, "href");
                                        if (linkUrl.Contains("links2sites"))
                                        {
                                            dom = GotoUrl(linkUrl);
                                            try
                                            {
                                                linkUrl = ReadAttribute(SelectItem(dom, ".post-body.entry-content embed"), "src");
                                            }
                                            catch
                                            {
                                                try
                                                {
                                                    linkUrl = ReadAttribute(SelectItem(dom, ".post-body.entry-content iframe"), "src");
                                                }
                                                catch { }
                                            }

                                        }

                                        if (IgnoreLink(linkUrl))
                                            continue;

                                        var host = GetScrapper(linkUrl);
                                        if (host != null)
                                        {
                                            if (skipUrls.Any(x => x == linkUrl))
                                                continue;
                                            linkUrl = host.SanitizeUrl(linkUrl);

                                            if (skipUrls.Any(x => x == linkUrl))
                                                continue;
                                            skipUrls.Add(linkUrl);

                                            MovieTube.Client.Scraper.ScraperResult result = MovieTube.Client.Scraper.ScraperResult.Success;
                                            try
                                            {
                                                result = MovieTube.Client.Scraper.VideoScraperBase.ValidateUrl(linkUrl);
                                            }
                                            catch { }

                                            if (result != MovieTube.Client.Scraper.ScraperResult.Success &&
                                                result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                            {
                                                var k = 0;
                                            }
                                            if (result != MovieTube.Client.Scraper.ScraperResult.VideoDoesNotExist)
                                                movie.Links.Add(new ScrapedMovieLink(linkUrl, host.ID, "Watch Full Movie"));
                                        }
                                        else
                                        {
                                            OnScraperNotFound(new ScraperNotFound(linkUrl, movie.PageUrl));
                                        }
                                    }
                                    catch (WebException ex)
                                    {
                                    }
                                    catch { }
                                }

                                if (movie.Links.Count > 0)
                                {
                                    var args = new MovieFoundEventArgs(movie);
                                    OnMovieFound(args);

                                }
                                if (this.stop)
                                    return allMovies;
                            }
                            catch { }
                        }
                    }
                    catch { }

                }

            }
            catch (Exception ex)
            {

                //throw;
            }
            return allMovies;
        }
Exemplo n.º 8
0
        public override List<ScrapedMovie> ScrapeMovies(List<string> skipUrls, List<int> years = null)
        {
            if (years == null)
                years = new List<int>();
            try
            {
                foreach (var entry in RootLinks)
                {
                    var dom = GotoUrl(entry.Key, 3);

                    var elems = SelectItems(dom, ".video-organizer-element-wrapper a");
                    for (var i = elems.Count - 1; i >= 0; i--)
                    {
                        try
                        {
                            var elem = elems[i];
                            int year;
                            Int32.TryParse(elem.InnerText, out year);

                            if (years.Count > 0 && !years.Any(x => x == year))
                                continue;

                            dom = GotoUrl(new Uri(new Uri(entry.Key), ReadAttribute(elem, "href")).AbsoluteUri, 3);

                            if (!Int32.TryParse((Regex.Replace(ReadText(SelectItems(dom, ".filter-selected").First()), "[^0-9.]", "")), out year))
                                continue;
                            if (year.ToString().Length != 4)
                                continue;

                            if (years.Count > 0 && !years.Any(x => x == year))
                                continue;

                            foreach (var el in SelectItems(dom, ".numerical-nav a"))
                            {
                                try
                                {
                                    dom = GotoUrl(new Uri(new Uri(entry.Key), ReadAttribute(el, "href")).AbsoluteUri, 3);
                                    foreach (var subElem in SelectItems(dom, ".video-object-wrapper"))
                                    {
                                        try
                                        {
                                            dom = CQ.Create(subElem);
                                            var movie = new ScrapedMovie(this);
                                            var e = dom.Select(".movie-title").Elements.First();
                                            movie.PageUrl = new Uri(new Uri(RootUrl), ReadAttribute(e, "href").Replace("..", "")).AbsoluteUri;
                                            OnNotify(new NotificationEventArgs("Processing " + movie.PageUrl + ". Year: " + year.ToString()));
                                            if (skipUrls.Any(x => x == movie.PageUrl))
                                                continue;
                                            movie.ImageUrl = ReadAttribute(dom.Select(".video-object-thumb img").Elements.First(), "src");
                                            movie.ReleasedDate = new DateTime(year, 1, 1);
                                            movie.LangCode = entry.Value;
                                            movie.Description = ReadText(dom.Select(".desc_body").Elements.First()).Replace("-", "");
                                            movie.Name = ReadText(e).Replace("\n", "").Replace("\t", "");
                                            movie.Name = Regex.Replace(movie.Name, @"\s*?(?:\(.*?\)|\[.*?\]|\{.*?\})", String.Empty);
                                            movie.Links.Add(new ScrapedMovieLink(movie.PageUrl, "einthusan.com", "With Subtitles"));
                                            allMovies.Add(movie);
                                            var args = new MovieFoundEventArgs(movie);
                                            OnMovieFound(args);
                                            if (this.stop)
                                                return allMovies;
                                        }
                                        catch { }
                                    }
                                }
                                catch { }
                            }
                        }
                        catch { }
                    }
                }
            }
            catch { }
            return allMovies;
        }