public void EmptyArticleContentTest() { string[] urls = {"http://www.news.com.au/breaking-news/world/indon-says-2-arrested-for-myanmar-plot/story-e6frfkui-1226634619523", "http://www.elpasotimes.com/newupdated/ci_23175234/el-paso-man-arrested-charged-cocaine-possession", "http://www.twincities.com/crime/ci_23160767/st-paul-man-arrested-after-mother-shot-is", "http://www.news-record.com/news/1174493-91/three-arrested-in-home-invasion", "http://www.mercurynews.com/news/ci_23159399/teacher-arrested-allegedly-molesting-girls", "http://www.menafn.com/menafn/9c990483-5c0b-4bb9-87ff-8ade7ac1a9c2/BRIEF-Tulsa-man-arrested-on-drug-gun-complaints"}; List<Bing.NewsResult> bnrList = new List<Bing.NewsResult>(); foreach (var item in urls) { bnrList.Add(new Bing.NewsResult { Url = item }); } ArticleFinder af = new ArticleFinder(); var temp = af.GetResults(bnrList); int emptyCounter = 0; foreach (var item in temp) { if (string.IsNullOrEmpty(item.Content) || string.IsNullOrWhiteSpace(item.Content)) { emptyCounter++; } } Assert.Less(emptyCounter, 1); }
public void EmptyContentTest() { StringBuilder htmlString = new StringBuilder(); ArticleFinder af = new ArticleFinder(); //10 pages = 150 results var records = af.GetArticles("arrested", pageCount); List<string> htmlList = new List<string>(); using (WebClient client = new WebClient()) { foreach (var item in records) { try { htmlString.Append(client.DownloadString(item.URL)); htmlList.Add(htmlString.ToString()); } catch (WebException) { string url = item.URL; } finally { htmlString.Clear(); } } } foreach (var item in htmlList) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.Load(new StringReader(htmlString.ToString())); IEnumerable<HtmlNode> nodes; try { nodes = from HtmlNode node in htmlDoc.DocumentNode.SelectNodes("//div/article/p") //get the everything in the article select node; } catch (ArgumentNullException) { try { nodes = from HtmlNode node in htmlDoc.DocumentNode.SelectNodes("//div/p") // get every p tag select node; } catch (ArgumentNullException) { nodes = from HtmlNode node in htmlDoc.DocumentNode.DescendantsAndSelf()// get everything select node; } } Assert.IsNotEmpty(nodes); } }
public List<Article> GetArticleData(string searchTerm, int pages) { //get article finder results ArticleFinder af = new ArticleFinder(); var finderResults = af.GetArticles(searchTerm, pages); //get the articles from the article finder List<Article> articleList = new List<Article>(); foreach (var item in finderResults) { articleList.Add(GenerateArticle(item)); } return articleList; }
public void NoDuplicates() { ArticleFinder af = new ArticleFinder(); //10 pages = 150 results var records = af.GetArticles("arrested", pageCount); var dupliactes = from a in records group a by a.BingArticleID into grouped where grouped.Count() > 1 select grouped.Key; Assert.Less(dupliactes.Count(), 1); }