Beispiel #1
0
        private ScrapedData ShrinkIt(List <ScrapedData> sameId)
        {
            ScrapedData data = new ScrapedData
            {
                PosterLink  = sameId[0]?.PosterLink,
                SiteMovieId = sameId[0]?.SiteMovieId,
                TitleBA     = sameId[0]?.TitleBA,
                Title       = sameId[0]?.Title,
                Director    = sameId[0]?.Director,
                Cast        = sameId[0]?.Cast,
                Genre       = sameId[0]?.Genre,
                Duration    = sameId[0]?.Duration,
                Storyline   = sameId[0]?.Storyline,
                PhotoUrl    = sameId[0]?.PhotoUrl,
                VideoUrl    = sameId[0]?.VideoUrl,
                Showtimes   = new List <Tuple <DayOfWeek, string> >()
            };

            foreach (var item in sameId)
            {
                data.Showtimes.AddRange(item.Showtimes);
            }

            LogHelper.Information(this, $"Shrinked({sameId?.Count})fragments({data?.SiteMovieId}_{data?.Showtimes?.Count})");
            return(data);
        }
Beispiel #2
0
        public Movie Create(ScrapedData scrapedData)
        {
            movie = new Movie();
            BindData(scrapedData);

            return(movie);
        }
Beispiel #3
0
        public async Task InsertSiteDataToCache(string url, ScrapedData siteData)
        {
            var options = new DistributedCacheEntryOptions()
            {
                AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(5)
            };

            var serializedData = JsonSerializer.Serialize(siteData);
            await _redisCache.SetStringAsync(url, serializedData, options);
        }
        public async Task <ScrapedData> ScrapeData(string link)
        {
            Uri testUri;
            var isUrl = Uri.TryCreate(link, UriKind.Absolute, out testUri) && (testUri.Scheme == Uri.UriSchemeHttp || testUri.Scheme == Uri.UriSchemeHttps);

            if (!isUrl)
            {
                throw new BadRequestException("An invalid Url is received.");
            }

            var scrapedData = new ScrapedData();

            if (!string.IsNullOrEmpty(link))
            {
                _httpClient.BaseAddress = new Uri(link);
                var response = await _httpClient.GetAsync(link);

                var rawHtmlData = await response.Content.ReadAsStringAsync();

                HtmlDocument pageDocument = new HtmlDocument();
                pageDocument.LoadHtml(rawHtmlData);

                scrapedData.BodyContent = pageDocument.DocumentNode.SelectSingleNode("//body").InnerText;

                pageDocument.DocumentNode.SelectNodes("//meta").ToList()
                .ForEach(x =>
                {
                    var metaContent = x.GetAttributeValue("content", string.Empty);

                    if (!string.IsNullOrWhiteSpace(metaContent))
                    {
                        scrapedData.MetaTags.Add(metaContent);
                    }
                });

                pageDocument.DocumentNode.SelectNodes("//a[@href]").ToList()
                .ForEach(x =>
                {
                    var href = x.GetAttributeValue("href", string.Empty);
                    Uri Uri;
                    var isExternalLink = Uri.TryCreate(href, UriKind.Absolute, out Uri) && (Uri.Scheme == Uri.UriSchemeHttp || Uri.Scheme == Uri.UriSchemeHttps);

                    if (isExternalLink)
                    {
                        scrapedData.Links.Add(href);
                    }
                });
            }


            return(scrapedData);
        }
Beispiel #5
0
 public override void Parse(Response response)
 {
     foreach (var Word_link in response.Css("ol ol li"))
     {
         string Content = Word_link.TextContentClean;
         var    Data    = new ScrapedData();
         Data["language"] = "Swedish";
         Data["Word"]     = Content;
         Data["Length"]   = Content.Length;
         string json = JsonConvert.SerializeObject(Data, Formatting.Indented);
         File.AppendAllText("Scrape/WordData.json", json);
     }
 }
Beispiel #6
0
        private async Task <ScrapedData> CheckForScrapedDataExistsInCacheAsync(string text)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(new ScrapedData());
            }

            ScrapedData scrapedData = await _cache.GetSiteDataFromCache(text);

            if (scrapedData is null)
            {
                scrapedData = await _linkScraperService.ScrapeData(text);

                await _cache.InsertSiteDataToCache(text, scrapedData);
            }
            return(scrapedData);
        }
Beispiel #7
0
        private void BindData(ScrapedData scrapedData)
        {
            movie.Id          = Guid.NewGuid().ToString();
            movie.PosterLink  = scrapedData.PosterLink;
            movie.Title       = scrapedData.Title;
            movie.TitleBA     = scrapedData.TitleBA;
            movie.Duration    = scrapedData.Duration;
            movie.Genre       = scrapedData.Genre;
            movie.Director    = scrapedData.Director;
            movie.Cast        = scrapedData.Cast;
            movie.SiteMovieId = scrapedData.SiteMovieId;
            movie.Storyline   = scrapedData.Storyline;
            movie.PhotoUrl    = Urls.GetPhotoUrlFor(scrapedData.PhotoUrl);
            movie.VideoUrl    = Urls.GetYtUrlFor(scrapedData.VideoUrl);

            LogHelper.Information(this, $"Movie created ({movie?.Title}) {movie?.SiteMovieId}");
        }
Beispiel #8
0
        public void UpdateShowtimes(ScrapedData scrapedData)
        {
            var scrapedShowtimes = new List <Showtime>();

            foreach (var time in scrapedData.Showtimes)
            {
                scrapedShowtimes.Add(CreateShowtime(time));
            }

            // exclude showtimes already in movie.showtimes
            var difShowtimes = scrapedShowtimes.Except(movie.Showtimes, new ShowtimeEqualityComparer());

            foreach (var item in difShowtimes)
            {
                movie.Showtimes.Add(item);
            }

            LogHelper.Information(this, $"Showtimes compared fs_{scrapedShowtimes.Count}  totals_{movie.Showtimes.Count}");
        }
        public async Task <ScrapedData> ScrapeData(string text)
        {
            var scrapedData = new ScrapedData();

            if (!string.IsNullOrEmpty(text))
            {
                scrapedData.BodyContent = text;


                var links = RegexExtensions.listMatchingRegex(RegexExtensions.isLink, text);

                foreach (var link in links)
                {
                    scrapedData.Links.Add(link);
                }
            }


            return(scrapedData);
        }