Example #1
0
        public IEnumerable <NewsDTO> Crawl()
        {
            string newsAggregationUrl = "https://www.vice.com/en/section/tech";

            IWebDriver driver = ChromeDriverInitializer.getChromeDriver(new ChromeOptions());

            Navigator.Navigate(driver, newsAggregationUrl, 20);

            var newsAggregationPage = new NewsAggregationPage(driver, By.CssSelector("a.vice-card-hed__link"));

            List <NewsDTO> news = newsAggregationPage.getNews();

            Console.WriteLine("News count {0}", news.Count);


            for (int i = 0; i < news.Count; i++)
            {
                Thread.Sleep(TimeSpan.FromSeconds(5));
                var n = news[i];

                try
                {
                    Navigator.Navigate(driver, n.Url, 10);
                    var newsPage = new NewsPage(driver);

                    n.Author = newsPage.getAuthor(By.CssSelector("div.contributor__meta > div > a "), "text");
                    //n.Description = driver.FindElement(By.CssSelector("meta[name^=description]")).GetAttribute("content");
                    n.DateOfPublication = newsPage.getDateOfPublication(
                        By.CssSelector("div.article__header__datebar__date--original"), "text");
                }
                catch (Exception ex)
                {
                    //log exception
                }
                yield return(n);
            }

            driver.Close();
        }
Example #2
0
        public IEnumerable <NewsDTO> Crawl()
        {
            string newsAggregationUrl = "https://itechua.com/";

            IWebDriver driver = ChromeDriverInitializer.getChromeDriver(new ChromeOptions());

            Navigator.Navigate(driver, newsAggregationUrl, 20);

            var newsAggregationPage = new NewsAggregationPage(driver, By.CssSelector("h3.entry-title.td-module-title > a"));

            List <NewsDTO> news = newsAggregationPage.getNews();

            Console.WriteLine("News count {0}", news.Count);


            for (int i = 0; i < news.Count; i++)
            {
                Thread.Sleep(TimeSpan.FromSeconds(5));
                var n = news[i];

                try
                {
                    Navigator.Navigate(driver, n.Url, 5);
                    var newsPage = new NewsPage(driver);

                    n.Author = newsPage.getAuthor(By.CssSelector("div.td-author-by"), "text");
                    //n.Description = driver.FindElement(By.CssSelector("meta[name^=description]")).GetAttribute("content");
                    n.DateOfPublication = newsPage.getDateOfPublication(
                        By.CssSelector("span.td-post-date > time.entry-date.updated.td-module-date"), "text");
                }
                catch (Exception ex)
                {
                    //log exception
                }
                yield return(n);
            }

            driver.Close();
        }