public HtmlBag Load()
        {
            var htmlBag = new HtmlBag();

            foreach (var day in DateBuilder.DaysOfWeek)
            {
                htmlBag.Add(day.ToString(), GetHtml(Urls.GetUrlFor(day)));
                LogHelper.Information(this, $"Loading of InTheaterNow html for {day.ToString()} has finished");
            }

            return(htmlBag);
        }
Пример #2
0
        public List <ScrapedData> Parse(HtmlBag htmlBag, List <ScrapedData> data)
        {
            if (htmlBag == null || data == null)
            {
                throw new ArgumentException();
            }

            var docs = htmlBag.Bag;

            foreach (var siteMoveId in docs.Keys)
            {
                try
                {
                    var sd = data.Where(x => x.SiteMovieId == siteMoveId).SingleOrDefault();

                    if (sd == null)
                    {
                        continue;
                    }

                    var nodes = docs[siteMoveId].DocumentNode.SelectNodes("//div[@class='film']/h4").ToList();

                    sd.Storyline = nodes[0].NextSibling.NextSibling.InnerText.RemoveSpecial();

                    sd.PhotoUrl = nodes[1].NextSibling
                                  .NextSibling
                                  .SelectSingleNode(".//img[@id='img1']")
                                  .GetAttributeValue("src", String.Empty);

                    sd.VideoUrl = nodes[2].NextSibling
                                  .NextSibling
                                  .SelectSingleNode(".//object/param[@name='movie']")
                                  .GetAttributeValue("value", String.Empty)
                                  .OnlyYtLink();

                    LogHelper.Information(this, $"Movie html parsed ({sd.PhotoUrl}, {sd.VideoUrl})");
                }
                catch (Exception e)
                {
                    LogHelper.Error(this, e.ToString());
                }
            }
            return(data);
        }
Пример #3
0
        public HtmlBag Load(List <ScrapedData> data)
        {
            if (data == null)
            {
                throw new ArgumentException();
            }

            var htmlBag = new HtmlBag();

            //get unique ids
            var ids = data.GroupBy(m => m.SiteMovieId).Select(x => x.Key);

            foreach (var id in ids)
            {
                htmlBag.Add(id, GetHtml(Urls.GetUrlFor(id)));
                LogHelper.Information(this, $"Loading of Movie details - html for {id} has finished");
            }
            return(htmlBag);
        }
        public List <ScrapedData> Parse(HtmlBag htmlBag)
        {
            if (htmlBag == null)
            {
                throw new ArgumentException();
            }

            var docs       = htmlBag.Bag;
            var parsedData = new List <ScrapedData>();

            foreach (var day in docs.Keys)
            {
                var nodes = GetMovieNodes(docs[day]);

                nodes.ForEach((node) =>
                {
                    var parsed = new NodeParser(EnumHelper.GetEnum <DayOfWeek>(day), node).Parse();
                    parsedData.Add(parsed);
                    LogHelper.Information(this, $"Fragment parsed {day}_{parsed?.SiteMovieId}_{parsed?.Showtimes?.Count}_{nodes?.Count}");
                });
            }
            return(parsedData);
        }