예제 #1
0
        private async Task <List <Article> > ScrapeArticles()
        {
            var articles = new List <Article>();

            var feed = await httpHelper.Get(FeedUrl);

            if (string.IsNullOrEmpty(feed))
            {
                throw new NullReferenceException($"Could not get feed from url {FeedUrl}");
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);
            var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10);

            foreach (var schema in newestArticlesSchemas)
            {
                if (schema.Categories.Contains("Vijesti"))
                {
                    articles.Add(new Article
                    {
                        Source    = NewsSource.IndexHr,
                        Guid      = GetGuidFromUrl(schema.InternalID),
                        Title     = schema.Title,
                        Image     = schema.ImageUrl,
                        Text      = await GetArticleText(schema.InternalID),
                        Summary   = schema.Summary,
                        Keywords  = keywordHelper.GetKeywordsFromTitle(schema.Title),
                        SourceUrl = schema.InternalID
                    });
                }
            }

            return(articles);
        }
예제 #2
0
        private async Task <List <Article> > ScrapeArticles()
        {
            var articles = new List <Article>();

            var feed = await httpHelper.Get(FeedUrl);

            if (string.IsNullOrEmpty(feed))
            {
                throw new Exception($"Could not get feed from url {FeedUrl}");
            }

            var parser = new RssParser();
            var rss    = parser.Parse(feed);
            var newestArticlesSchemas = rss.OrderByDescending(x => x.PublishDate).Take(10);

            foreach (var schema in newestArticlesSchemas)
            {
                var document = await httpHelper.GetDocumentFromUrl(schema.InternalID);

                articles.Add(new Article
                {
                    Source    = NewsSource.PriznajemHr,
                    Guid      = GetGuidFromUrl(schema.InternalID),
                    Title     = schema.Title,
                    Image     = GetArticleImage(document),
                    Text      = await httpHelper.GetArticleText(schema.Content),
                    Summary   = await httpHelper.GetFirstParagraph(schema.Content),
                    Keywords  = keywordHelper.GetKeywordsFromTitle(schema.Title),
                    SourceUrl = schema.InternalID
                });
            }

            return(articles);
        }
예제 #3
0
        public void GetKeywordsFromTitle_ReturnsCorrectKeywords()
        {
            var helper = new KeywordHelper();
            var title  = "This,; is: a## test title! čžćđ## 123as sdf-;:\"'+? (FOTO) (VIDEO)";

            var keywords = helper.GetKeywordsFromTitle(title);

            Assert.Equal("THIS,TEST,TITLE,ČŽĆĐ", keywords);
        }