public void EmptyArticleContentTest()
        {
            string[] urls = {"http://www.news.com.au/breaking-news/world/indon-says-2-arrested-for-myanmar-plot/story-e6frfkui-1226634619523",
                            "http://www.elpasotimes.com/newupdated/ci_23175234/el-paso-man-arrested-charged-cocaine-possession",
                            "http://www.twincities.com/crime/ci_23160767/st-paul-man-arrested-after-mother-shot-is",
                            "http://www.news-record.com/news/1174493-91/three-arrested-in-home-invasion",
                            "http://www.mercurynews.com/news/ci_23159399/teacher-arrested-allegedly-molesting-girls",
                            "http://www.menafn.com/menafn/9c990483-5c0b-4bb9-87ff-8ade7ac1a9c2/BRIEF-Tulsa-man-arrested-on-drug-gun-complaints"};

            List<Bing.NewsResult> bnrList = new List<Bing.NewsResult>();
            foreach (var item in urls)
            {
                bnrList.Add(new Bing.NewsResult { Url = item });
            }

            ArticleFinder af = new ArticleFinder();
            var temp = af.GetResults(bnrList);
            int emptyCounter = 0;
            foreach (var item in temp)
            {
                if (string.IsNullOrEmpty(item.Content) || string.IsNullOrWhiteSpace(item.Content))
                {
                    emptyCounter++;
                }
            }
            Assert.Less(emptyCounter, 1);
        }
        public void EmptyContentTest()
        {
            StringBuilder htmlString = new StringBuilder();
            ArticleFinder af = new ArticleFinder();
            //10 pages = 150 results

            var records = af.GetArticles("arrested", pageCount);
            List<string> htmlList = new List<string>();
            using (WebClient client = new WebClient())
            {
                foreach (var item in records)
                {
                    try
                    {
                        htmlString.Append(client.DownloadString(item.URL));
                        htmlList.Add(htmlString.ToString());
                    }
                    catch (WebException)
                    {
                        string url = item.URL;
                    }
                    finally
                    {
                        htmlString.Clear();
                    }

                }
            }

            foreach (var item in htmlList)
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.Load(new StringReader(htmlString.ToString()));
                IEnumerable<HtmlNode> nodes;
                try
                {
                    nodes = from HtmlNode node in
                                htmlDoc.DocumentNode.SelectNodes("//div/article/p") //get the everything in the article
                            select node;
                }
                catch (ArgumentNullException)
                {
                    try
                    {
                        nodes = from HtmlNode node in htmlDoc.DocumentNode.SelectNodes("//div/p") //  get every p tag
                                select node;
                    }
                    catch (ArgumentNullException)
                    {
                        nodes = from HtmlNode node in htmlDoc.DocumentNode.DescendantsAndSelf()// get everything
                                select node;
                    }

                }

                Assert.IsNotEmpty(nodes);
            }
        }
Beispiel #3
0
        public List<Article> GetArticleData(string searchTerm, int pages)
        {
            //get article finder results
            ArticleFinder af = new ArticleFinder();
            var finderResults = af.GetArticles(searchTerm, pages); //get the articles from the article finder
            List<Article> articleList = new List<Article>();
            foreach (var item in finderResults)
            {
                articleList.Add(GenerateArticle(item));
            }

            return articleList;
        }
        public void NoDuplicates()
        {
            ArticleFinder af = new ArticleFinder();
            //10 pages = 150 results

            var records = af.GetArticles("arrested", pageCount);

            var dupliactes = from a in records
                             group a by a.BingArticleID into grouped
                             where grouped.Count() > 1
                             select grouped.Key;
            Assert.Less(dupliactes.Count(), 1);
        }