public async Task InitializeAsync() { var html = await File.ReadAllTextAsync("./hackernewsHtml.html"); var context = BrowsingContext.New(Configuration.Default); document = await context.OpenAsync(req => req.Content(html)); pageLoader = NSubstitute.Substitute.For <IAngleSharpPageLoader>(); pageLoader.LoadPage(Arg.Any <string>()).Returns(document); hackerNewsFieldsParser = Substitute.For <INewsParser>(); hackerNewsFieldsParser.ParseTitle(Arg.Any <string>()).Returns("Title"); hackerNewsFieldsParser.ParseUser(Arg.Any <string>()).Returns("User"); hackerNewsFieldsParser.TryParseUrl(Arg.Any <string>(), out Arg.Any <Uri>()) .Returns(x => { x[1] = new Uri("http://test.com"); return(true); }); hackerNewsFieldsParser.TryParsePoints(Arg.Any <string>(), out Arg.Any <int>()) .Returns(x => { x[1] = 10; return(true); }); hackerNewsFieldsParser.TryParseRank(Arg.Any <string>(), out Arg.Any <int>()) .Returns(x => { x[1] = 20; return(true); }); hackerNewsFieldsParser.TryParseComments(Arg.Any <string>(), out Arg.Any <int>()) .Returns(x => { x[1] = 30; return(true); }); documentParser = new HackerNewsHtmlDocumentParser(hackerNewsFieldsParser); }
public IList <NewsItem> ParseNews(IDocument document) { var rows = document.QuerySelectorAll("table.itemlist tr"); var news = new List <NewsItem>(); NewsItem currentNew = null; foreach (var row in rows) { if (row.ClassName == "athing") { IHtmlAnchorElement titleAnchor = row.QuerySelector("a.storylink") as IHtmlAnchorElement; currentNew = new NewsItem { Title = newsFieldsParser.ParseTitle(titleAnchor?.TextContent), Rank = ParseRank(currentNew, row), Url = ParseUrl(currentNew, titleAnchor), Id = row.Id }; } if (currentNew != null && row.QuerySelectorAll($"#score_{currentNew.Id}").Any()) { currentNew.Points = ParsePoints(currentNew, row); currentNew.Comments = ParseComments(currentNew, row); currentNew.User = newsFieldsParser.ParseUser(row.QuerySelector($".hnuser").TextContent); if (currentNew.IsValid()) { news.Add(currentNew); } } } return(news); }