public void EmptyContentTest() { StringBuilder htmlString = new StringBuilder(); ArticleFinder af = new ArticleFinder(); //10 pages = 150 results var records = af.GetArticles("arrested", pageCount); List<string> htmlList = new List<string>(); using (WebClient client = new WebClient()) { foreach (var item in records) { try { htmlString.Append(client.DownloadString(item.URL)); htmlList.Add(htmlString.ToString()); } catch (WebException) { string url = item.URL; } finally { htmlString.Clear(); } } } foreach (var item in htmlList) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.Load(new StringReader(htmlString.ToString())); IEnumerable<HtmlNode> nodes; try { nodes = from HtmlNode node in htmlDoc.DocumentNode.SelectNodes("//div/article/p") //get the everything in the article select node; } catch (ArgumentNullException) { try { nodes = from HtmlNode node in htmlDoc.DocumentNode.SelectNodes("//div/p") // get every p tag select node; } catch (ArgumentNullException) { nodes = from HtmlNode node in htmlDoc.DocumentNode.DescendantsAndSelf()// get everything select node; } } Assert.IsNotEmpty(nodes); } }
public List<Article> GetArticleData(string searchTerm, int pages) { //get article finder results ArticleFinder af = new ArticleFinder(); var finderResults = af.GetArticles(searchTerm, pages); //get the articles from the article finder List<Article> articleList = new List<Article>(); foreach (var item in finderResults) { articleList.Add(GenerateArticle(item)); } return articleList; }
public void NoDuplicates() { ArticleFinder af = new ArticleFinder(); //10 pages = 150 results var records = af.GetArticles("arrested", pageCount); var dupliactes = from a in records group a by a.BingArticleID into grouped where grouped.Count() > 1 select grouped.Key; Assert.Less(dupliactes.Count(), 1); }