private void GetCinemaInfo(string lineHtml) { if (!lineHtml.Equals(string.Empty)) { if (lineHtml.Contains("col-xs-12 col-sm-12 col-md-3 txt")) { _addingAddress = true; } if (_addingAddress) { if (!lineHtml.Trim().Equals("<p>") && !lineHtml.Trim().Equals("</div>") && !lineHtml.Trim().Contains("maps") && !lineHtml.Trim().Contains("col-xs-12 col-sm-12 col-md-3 txt") && lineHtml.Trim().Length > 0) { Cinema.Address = CinevoStrings.RemoveChars(lineHtml, '\t'); _addingAddress = false; } } if (lineHtml.Contains("Tel:")) { Cinema.Telephone = CinevoStrings.GetChunk(lineHtml, "</strong> ", "</p>"); } if (lineHtml.Contains("Venta Golfas:")) { Cinema.NightPasses = CinevoStrings.GetChunk(lineHtml, "</strong> ", "</p>"); } if (lineHtml.Contains("Matinales:")) { Cinema.MorningPasses = CinevoStrings.GetChunk(lineHtml, "</strong> ", "</p>"); } if (lineHtml.Contains("Día del espectador:")) { Cinema.CheapDay = CinevoStrings.GetChunk(lineHtml, "</strong> ", "</p>"); } if (lineHtml.Contains("Venta anticipada:")) { Cinema.OnlineTickets = CinevoStrings.GetChunk(lineHtml, "</strong> ", "</p>"); } if (lineHtml.Contains("<img src=")) { Cinema.MapUrl = CinevoStrings.GetChunk(lineHtml, "<img src=", ">"); } if (Cinema.MapUrl != null) { SetMapValues(); } } }
private Town ConvertToObject(string lineHtml) { try { var town = new Town { Id = CinevoStrings.GetChunk(lineHtml, "value=\"", "data-name", "\""), Name = CinevoStrings.GetChunk(lineHtml, "/\">", "</a>"), Tag = CinevoStrings.GetChunk(lineHtml, "data-name=\"", "\" >"), Url = CinevoStrings.GetChunk(lineHtml, "<a href=\"", "\">") }; return(town); } catch (Exception ex) { new Error().SendError(ex); return(null); } }
private Cinema ConvertToObject(ArrayList linesPerCinema) { try { var cinema = new Cinema(); cinema.CinemaId = _count++.ToString(); foreach (string lineHtml in linesPerCinema) { if (lineHtml.Contains("col-xs-12 col-sm-6 col-md-6 info-cine")) { cinema.TownId = CinevoStrings.GetChunk(lineHtml, "data-poblacion=\"", "\">"); } if (lineHtml.Contains("href")) { cinema.Name = CinevoStrings.GetChunk(lineHtml, "\">", "</a>"); cinema.Url = CinevoStrings.GetChunk(lineHtml, "href=\"", "\">"); cinema.Tag = cinema.Name.ToLower().Replace(' ', '-'); } if (lineHtml.Contains("TEL")) { cinema.Telephone = CinevoStrings.GetChunk(lineHtml, "</strong>", "</p>").TrimStart(); } if (lineHtml.Contains("DIRECCIÓN")) { cinema.Address = CinevoStrings.GetChunk(lineHtml, "</strong>", "</p>").TrimStart(); } if (lineHtml.EndsWith("</p>") && cinema.Address != null) { cinema.Town = lineHtml.Replace("</p>", ""); } } return(cinema); } catch (Exception ex) { new Error().SendError(ex); return(null); } }
private Film ConvertToObject(ArrayList linesPerFilm) { try { var film = new Film(); Day time = null; bool addingDay = false; const string baseUrl = "https://cartelera.elperiodico.com"; foreach (string lineHtml in linesPerFilm) { if (!lineHtml.Equals(string.Empty)) { if (lineHtml.Contains("<img style")) { film.Image = CinevoStrings.GetChunk(lineHtml, "src=\"", "\" alt"); } if (lineHtml.Contains("Ver película")) { film.FilmUrl = baseUrl + CinevoStrings.GetChunk(lineHtml, "href=\"", "\" title"); } if (lineHtml.Contains("(") && lineHtml.Contains(")") && lineHtml.Contains("V")) { film.Version = lineHtml; film.IsOriginalVersion = IsOriginalVersion(film.Version); } if (lineHtml.Contains("href=\"") && lineHtml.Contains("title=\"") && lineHtml.Contains("class=\"")) { film.Name = CinevoStrings.GetChunk(lineHtml, "\">", "</a>"); string tagTemp = !film.Name.Equals(string.Empty) ? film.Name.Replace(",", "").Replace(" ", "-").Replace("#", "").TrimEnd().TrimStart().ToLower() : film.Tag = "NOTAG"; film.Tag = System.IO.Path.GetInvalidFileNameChars().Aggregate(tagTemp, (current, c) => current.Replace(c.ToString(), string.Empty)); } if (lineHtml.Contains("class=\"wrap\"")) { addingDay = true; } if (addingDay) { if (lineHtml.Trim().Contains("<dt>")) { time = new Day { DayOfWeek = CinevoStrings.GetChunk(lineHtml, ">", "</") }; } if (lineHtml.Trim().Contains("<dd>")) { time?.Times.Add(CinevoStrings.GetChunk(lineHtml, ">", "</")); } if (lineHtml.Trim().Contains("</dl>")) { addingDay = false; film.Days.Add(time); } } } } return(film); } catch (Exception ex) { new Error().SendError(ex); return(null); } }
private void SetMapValues() { Cinema.Latitude = CinevoStrings.GetChunk(Cinema.MapUrl, "|", ","); Cinema.Longitude = CinevoStrings.GetChunk(Cinema.MapUrl, ",", "&mobile"); }
public void ScrapeHtml(string path) { string files = Directory.GetFiles(path).ToList().First(x => x.Contains(Film.Tag)); Console.WriteLine("Scraping film -> " + Film.Name); Console.WriteLine("----\n"); if (!string.IsNullOrEmpty(files)) { var fileReader = new StreamReader(files); string line; bool updatingDescription = false; while ((line = fileReader.ReadLine()) != null) { line = CinevoStrings.RemoveChars(line, '\u0009'); //--------------- if (updatingDescription) { Film.Description = CinevoStrings.StripHtml(line).TrimStart().TrimEnd(); } if (line.Contains(StartDescription)) { updatingDescription = true; } if (!String.IsNullOrEmpty(Film.Description)) { updatingDescription = false; } //--------------- if (line.Contains(StartTrailer) && Film?.Trailer == null) { Film.Trailer = CinevoStrings.GetChunk(line, "src=\"", "\" frameborder"); } if (line.Contains(StartActors)) { Film.Actors = CinevoStrings.StripHtml(line).Replace(StartActors, string.Empty).TrimStart().TrimEnd(); } if (line.Contains(StartDirector)) { Film.Director = CinevoStrings.StripHtml(line).Replace(StartDirector, string.Empty).TrimStart().TrimEnd(); } if (line.Contains(StartEstreno)) { Film.FirstShown = CinevoStrings.StripHtml(line).Replace(StartEstreno, string.Empty).TrimStart().TrimEnd(); } if (line.Contains(StartGenre)) { Film.Genre = CinevoStrings.StripHtml(line).Replace(StartGenre, string.Empty).TrimStart().TrimEnd(); } if (line.Contains(StartDuration)) { Film.Durantion = CinevoStrings.StripHtml(line).Replace(StartDuration, string.Empty).TrimStart().TrimEnd(); } if (line.Contains(StartCountry)) { Film.Country = CinevoStrings.StripHtml(line).Replace(StartCountry, string.Empty).TrimStart().TrimEnd(); } } fileReader.Close(); fileReader.Dispose(); } }
public void Shoudl_return_the_corrent_chunk() { string html = "<option value=\"872\" data-name=\"abrera\" ><a href=\"https://cartelera.elperiodico.com/cines/abrera/\">Abrera</a></option>"; Assert.IsTrue(CinevoStrings.GetChunk(html, "value=\"", "data-name", "\"").Equals("872")); }