예제 #1
0
        public void DumpPage(int year, int page)
        {
            var url = string.Format("http://www.mymovies.it/film/{0}/?pagina={1}", year, page);

            Console.WriteLine(url);

            // var pageHtml = SimpleWebRequest.GetHtmlRobust(url, Encoding: "iso-8859-1");

            web.Headers[HttpRequestHeader.UserAgent] = SimpleWebRequest.DefaultUserAgent;
            var pageHtml = web.DownloadString(url);


            if (pageHtml.Contains("Si è verificato un errore nella pagina, riprova a collegarti facendo clic all'indirizzo qui sotto"))
            {
                throw new EndOfPagesException();
            }


            var parser = new QuickTextParser(pageHtml.BetweenS("<table class=\"struttura\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\">", "<!--***** Inizio Sinistra *****-->"), "<a", "<div style=\"clear:both; height:10px;\"></div>");



            foreach (var movieHtmlVs in parser)
            {
                var movieHtml = "<a" + movieHtmlVs.AsString;
                var node      = movieHtml.AsHtmlNode();

                var movie = new Movie()
                {
                    MyMoviesId       = int.Parse(node.FindAll("script").ElementAt(1).InnerText.TryCapture(@"schiarisci(\d+)\(")),
                    Title            = (node.FindSingle("h2 a") ?? node.FindSingle("a")).GetText(),
                    Rating           = TryGet(() => Single.Parse(movieHtml.BetweenS("valutazione media tra critica e pubblico: ", " stelle").Replace(',', '.'), System.Globalization.NumberFormatInfo.InvariantInfo)),
                    Genre            = TryGet(() => movieHtml.TryCapture(@"Genere\s*[^>]*>([^<]+)")),
                    Year             = year,
                    Summary          = TryGet(() => node.FindSingle("p[style='margin-top:5px']").GetText()),
                    ImageCode        = TryGet(() => movieHtml.TryCapture(@"http\://pad\.mymovies\.it/filmclub\/([\d/]+)\/imm2")),
                    TrailerCode      = TryGet(() => movieHtml.TryCapture(@"videotrailer_centrale\.asp\?codicefilm=([\d/]+)&")),
                    ShortDescription = TryGet(() => node.FindSingle("h3").GetText()),
                    ShortName        = TryGet(() => node.FindSingle("h2 a").GetAttributeValue("href", string.Empty).TryCapture(@"film/\d{4}/(.+?)/"))
                };

                if (db.Movies.Any(x => x.MyMoviesId == movie.MyMoviesId))
                {
                    Console.WriteLine("Duplicato: {0} {1}", movie.MyMoviesId, movie.Title);
                    continue;
                }


                Console.WriteLine(movie.Title);
                var suggested = movieHtml.TryCapture(">Consigliato: (.+?)<");
                switch (suggested)
                {
                case "Assolutamente No": movie.Suggestion = 1; break;

                case "No": movie.Suggestion = 2; break;

                case "N&igrave;": movie.Suggestion = 3; break;

                case "S&igrave;": movie.Suggestion = 4; break;

                case "Assolutamente S&igrave;": movie.Suggestion = 5; break;

                case null: break;

                default: throw new Exception();
                }

                var actorsHtmls = new QuickTextParser(node.FindSingle(".linkblu").InnerHtml, "http://www.mymovies.it/biografia/?a=", "</a>");
                foreach (var act in actorsHtmls)
                {
                    var s         = act.AsString;
                    var actorCode = int.Parse(s.TryCapture(@"(\d+)"));
                    var actorName = s.TryCapture(@">([^<]+)").DeEntitize();
                    var actor     = db.Actors.Where(x => x.MyMoviesId == actorCode).SingleOrDefault();
                    if (actor == null)
                    {
                        actor            = new Actor();
                        actor.MyMoviesId = actorCode;
                        actor.Name       = actorName;
                        db.Actors.AddObject(actor);
                        db.SaveChanges();
                    }
                    movie.Actors.Add(actor);
                }

                var countries = movieHtml.TryCapture(@"produzione ([^<]+),\s*<a");
                if (countries != null)
                {
                    foreach (var cntr in countries.DeEntitize().Split(','))
                    {
                        var name    = cntr.Trim();
                        var country = db.Countries.Where(x => x.Name == name).SingleOrDefault();
                        if (country == null)
                        {
                            country      = new Country();
                            country.Name = name;
                            db.Countries.AddObject(country);
                        }
                        movie.Countries.Add(country);
                        db.SaveChanges();
                    }
                }

                var directorIdString = movieHtml.TryCapture(@"biografia/\?r=(\d+)");
                if (directorIdString != null)
                {
                    var directorId   = int.Parse(directorIdString);
                    var directorName = movieHtml.TryCapture(@"biografia/\?r=\d+\x22>(.+?)<").DeEntitize();
                    var dir          = db.Directors.Where(x => x.MyMoviesId == directorId).SingleOrDefault();
                    if (dir == null)
                    {
                        dir            = new Director();
                        dir.MyMoviesId = directorId;
                        dir.Name       = directorName;
                        db.Directors.AddObject(dir);
                    }

                    movie.Director = dir;
                }

                if (movie.EntityState == System.Data.EntityState.Detached)
                {
                    db.Movies.AddObject(movie);
                }
                // db.Movies.AddObject(movie);

                db.SaveChanges();
            }
        }
예제 #2
0
        public IEnumerable <TvProgram> GetProgramsFromSavedHtmlIfAvailable()
        {
            if (_html == null)
            {
                return(Enumerable.Empty <TvProgram>());
            }

            var p = new QuickTextParser(_html.BetweenS("<body", "class=\"piedipagina\""),
                                        "<div style=\"font-size:180%; text-align:right; ");


            var programs = new List <TvProgram>();

            bool midnightPassed = false;

            double time_prev = 0;
            double time_curr = 0;

            TvProgram previousProgram = null;

            foreach (var t in p)
            {
                if (!t.Contains("<strong>Ore"))
                {
                    continue;
                }
                TvProgram prog;
                try
                {
                    prog = TvProgram.FromHtml(t, Day, Channel);
                }
                catch (Exception ex)
                {
                    System.Diagnostics.Debug.WriteLine("Errore TvProgram: " + ex.Message);
                    continue;
                }
                time_curr = prog.TimeOfDay.TotalSeconds;

                if (time_curr < time_prev)
                {
                    if (midnightPassed)
                    {
                        throw new InvalidDataException("Channels are not correctly ordered.");
                    }
                    else
                    {
                        midnightPassed = true;
                    }
                }


                if (midnightPassed)
                {
                    prog.FixToNextDay();
                }


                time_prev = time_curr;


                if (previousProgram != null)
                {
                    previousProgram.Duration = prog.Date - previousProgram.Date;
                }

                previousProgram = prog;

                //    if (!midnightPassed && prog.time)
                programs.Add(prog);

                //if (t.Contains("ATTENZIONE: i seguenti programmi si riferiscono al giorno dopo.")) ;
            }

            return(programs);
        }