Beispiel #1
0
        private async Task <List <Article> > ScrapeArticles()
        {
            var articles = new List <Article>();

            var feed = await httpHelper.Get(FeedUrl);

            if (string.IsNullOrEmpty(feed))
            {
                throw new Exception($"Could not get feed from url {FeedUrl}");
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);
            var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10);

            foreach (var schema in newestArticlesSchemas)
            {
                var document = await httpHelper.GetDocumentFromUrl(schema.InternalID);

                articles.Add(new Article
                {
                    Source    = NewsSource.PriznajemHr,
                    Guid      = GetGuidFromUrl(schema.InternalID),
                    Title     = schema.Title,
                    Image     = GetArticleImage(document),
                    Text      = await httpHelper.GetArticleText(schema.Content),
                    Summary   = await httpHelper.GetFirstParagraph(schema.Content),
                    Keywords  = keywordHelper.GetKeywordsFromTitle(schema.Title),
                    SourceUrl = schema.InternalID
                });
            }

            return(articles);
        }