public void DumpPage(int year, int page) { var url = string.Format("http://www.mymovies.it/film/{0}/?pagina={1}", year, page); Console.WriteLine(url); // var pageHtml = SimpleWebRequest.GetHtmlRobust(url, Encoding: "iso-8859-1"); web.Headers[HttpRequestHeader.UserAgent] = SimpleWebRequest.DefaultUserAgent; var pageHtml = web.DownloadString(url); if (pageHtml.Contains("Si è verificato un errore nella pagina, riprova a collegarti facendo clic all'indirizzo qui sotto")) { throw new EndOfPagesException(); } var parser = new QuickTextParser(pageHtml.BetweenS("<table class=\"struttura\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\">", "<!--***** Inizio Sinistra *****-->"), "<a", "<div style=\"clear:both; height:10px;\"></div>"); foreach (var movieHtmlVs in parser) { var movieHtml = "<a" + movieHtmlVs.AsString; var node = movieHtml.AsHtmlNode(); var movie = new Movie() { MyMoviesId = int.Parse(node.FindAll("script").ElementAt(1).InnerText.TryCapture(@"schiarisci(\d+)\(")), Title = (node.FindSingle("h2 a") ?? node.FindSingle("a")).GetText(), Rating = TryGet(() => Single.Parse(movieHtml.BetweenS("valutazione media tra critica e pubblico: ", " stelle").Replace(',', '.'), System.Globalization.NumberFormatInfo.InvariantInfo)), Genre = TryGet(() => movieHtml.TryCapture(@"Genere\s*[^>]*>([^<]+)")), Year = year, Summary = TryGet(() => node.FindSingle("p[style='margin-top:5px']").GetText()), ImageCode = TryGet(() => movieHtml.TryCapture(@"http\://pad\.mymovies\.it/filmclub\/([\d/]+)\/imm2")), TrailerCode = TryGet(() => movieHtml.TryCapture(@"videotrailer_centrale\.asp\?codicefilm=([\d/]+)&")), ShortDescription = TryGet(() => node.FindSingle("h3").GetText()), ShortName = TryGet(() => node.FindSingle("h2 a").GetAttributeValue("href", string.Empty).TryCapture(@"film/\d{4}/(.+?)/")) }; if (db.Movies.Any(x => x.MyMoviesId == movie.MyMoviesId)) { Console.WriteLine("Duplicato: {0} {1}", movie.MyMoviesId, movie.Title); continue; } Console.WriteLine(movie.Title); var suggested = movieHtml.TryCapture(">Consigliato: (.+?)<"); switch (suggested) { case "Assolutamente No": movie.Suggestion = 1; break; case "No": movie.Suggestion = 2; break; case "Nì": movie.Suggestion = 3; break; case "Sì": movie.Suggestion = 4; break; case "Assolutamente Sì": movie.Suggestion = 5; break; case null: break; default: throw new Exception(); } var actorsHtmls = new QuickTextParser(node.FindSingle(".linkblu").InnerHtml, "http://www.mymovies.it/biografia/?a=", "</a>"); foreach (var act in actorsHtmls) { var s = act.AsString; var actorCode = int.Parse(s.TryCapture(@"(\d+)")); var actorName = s.TryCapture(@">([^<]+)").DeEntitize(); var actor = db.Actors.Where(x => x.MyMoviesId == actorCode).SingleOrDefault(); if (actor == null) { actor = new Actor(); actor.MyMoviesId = actorCode; actor.Name = actorName; db.Actors.AddObject(actor); db.SaveChanges(); } movie.Actors.Add(actor); } var countries = movieHtml.TryCapture(@"produzione ([^<]+),\s*<a"); if (countries != null) { foreach (var cntr in countries.DeEntitize().Split(',')) { var name = cntr.Trim(); var country = db.Countries.Where(x => x.Name == name).SingleOrDefault(); if (country == null) { country = new Country(); country.Name = name; db.Countries.AddObject(country); } movie.Countries.Add(country); db.SaveChanges(); } } var directorIdString = movieHtml.TryCapture(@"biografia/\?r=(\d+)"); if (directorIdString != null) { var directorId = int.Parse(directorIdString); var directorName = movieHtml.TryCapture(@"biografia/\?r=\d+\x22>(.+?)<").DeEntitize(); var dir = db.Directors.Where(x => x.MyMoviesId == directorId).SingleOrDefault(); if (dir == null) { dir = new Director(); dir.MyMoviesId = directorId; dir.Name = directorName; db.Directors.AddObject(dir); } movie.Director = dir; } if (movie.EntityState == System.Data.EntityState.Detached) { db.Movies.AddObject(movie); } // db.Movies.AddObject(movie); db.SaveChanges(); } }
public IEnumerable <TvProgram> GetProgramsFromSavedHtmlIfAvailable() { if (_html == null) { return(Enumerable.Empty <TvProgram>()); } var p = new QuickTextParser(_html.BetweenS("<body", "class=\"piedipagina\""), "<div style=\"font-size:180%; text-align:right; "); var programs = new List <TvProgram>(); bool midnightPassed = false; double time_prev = 0; double time_curr = 0; TvProgram previousProgram = null; foreach (var t in p) { if (!t.Contains("<strong>Ore")) { continue; } TvProgram prog; try { prog = TvProgram.FromHtml(t, Day, Channel); } catch (Exception ex) { System.Diagnostics.Debug.WriteLine("Errore TvProgram: " + ex.Message); continue; } time_curr = prog.TimeOfDay.TotalSeconds; if (time_curr < time_prev) { if (midnightPassed) { throw new InvalidDataException("Channels are not correctly ordered."); } else { midnightPassed = true; } } if (midnightPassed) { prog.FixToNextDay(); } time_prev = time_curr; if (previousProgram != null) { previousProgram.Duration = prog.Date - previousProgram.Date; } previousProgram = prog; // if (!midnightPassed && prog.time) programs.Add(prog); //if (t.Contains("ATTENZIONE: i seguenti programmi si riferiscono al giorno dopo.")) ; } return(programs); }