public HtmlBag Load() { var htmlBag = new HtmlBag(); foreach (var day in DateBuilder.DaysOfWeek) { htmlBag.Add(day.ToString(), GetHtml(Urls.GetUrlFor(day))); LogHelper.Information(this, $"Loading of InTheaterNow html for {day.ToString()} has finished"); } return(htmlBag); }
public List <ScrapedData> Parse(HtmlBag htmlBag, List <ScrapedData> data) { if (htmlBag == null || data == null) { throw new ArgumentException(); } var docs = htmlBag.Bag; foreach (var siteMoveId in docs.Keys) { try { var sd = data.Where(x => x.SiteMovieId == siteMoveId).SingleOrDefault(); if (sd == null) { continue; } var nodes = docs[siteMoveId].DocumentNode.SelectNodes("//div[@class='film']/h4").ToList(); sd.Storyline = nodes[0].NextSibling.NextSibling.InnerText.RemoveSpecial(); sd.PhotoUrl = nodes[1].NextSibling .NextSibling .SelectSingleNode(".//img[@id='img1']") .GetAttributeValue("src", String.Empty); sd.VideoUrl = nodes[2].NextSibling .NextSibling .SelectSingleNode(".//object/param[@name='movie']") .GetAttributeValue("value", String.Empty) .OnlyYtLink(); LogHelper.Information(this, $"Movie html parsed ({sd.PhotoUrl}, {sd.VideoUrl})"); } catch (Exception e) { LogHelper.Error(this, e.ToString()); } } return(data); }
public HtmlBag Load(List <ScrapedData> data) { if (data == null) { throw new ArgumentException(); } var htmlBag = new HtmlBag(); //get unique ids var ids = data.GroupBy(m => m.SiteMovieId).Select(x => x.Key); foreach (var id in ids) { htmlBag.Add(id, GetHtml(Urls.GetUrlFor(id))); LogHelper.Information(this, $"Loading of Movie details - html for {id} has finished"); } return(htmlBag); }
public List <ScrapedData> Parse(HtmlBag htmlBag) { if (htmlBag == null) { throw new ArgumentException(); } var docs = htmlBag.Bag; var parsedData = new List <ScrapedData>(); foreach (var day in docs.Keys) { var nodes = GetMovieNodes(docs[day]); nodes.ForEach((node) => { var parsed = new NodeParser(EnumHelper.GetEnum <DayOfWeek>(day), node).Parse(); parsedData.Add(parsed); LogHelper.Information(this, $"Fragment parsed {day}_{parsed?.SiteMovieId}_{parsed?.Showtimes?.Count}_{nodes?.Count}"); }); } return(parsedData); }