Example #1
0
        public static void GetArticlesUrlsAndSaveToDB(int numberOfPagesToParse, int startPageNumber)
        {
            using (var context = new ArticlesContext())
            {
                var current_articles_urls = context.Articles;

                for (int i = startPageNumber; i <= numberOfPagesToParse; i++)
                {
                    string current_url = base_url + i + "/";
                    var    web         = new HtmlWeb();
                    var    doc         = web.Load(current_url);

                    var articles = doc.DocumentNode
                                   .SelectNodes("//div[@class='news-all']//div[@class='results__entry ' or @class='results__entry results__entry--pudelekx']//h3[@class='entry__title']//a")
                                   .Select(x => new Article()
                    {
                        Url          = x.Attributes["href"].Value,
                        IsDownloaded = false,
                        InsertedAt   = DateTime.UtcNow
                    })
                                   .Where(x => !current_articles_urls.Any(y => x.Url == y.Url))
                                   .ToList()
                                   .Where(x => x.Url.ToLower().StartsWith("https://www.pudelek.pl"));

                    context.Articles.AddRange(articles);
                    context.SaveChanges();
                }
            }
        }
        public void get_articles_content_using_selenium_and_save_to_db(int numberOfArticlesToDownload)
        {
            var a = 1;

            using (var context = new ArticlesContext())
            {
                var articles_to_download = context.Articles.Where(x => x.article_is_downloaded == false).Take(numberOfArticlesToDownload);

                foreach (var article in articles_to_download)
                {
                    if (article.article_type == "artykul")
                    {
                        get_single_article_content_basic_article_using_selenium_and_save_to_db(article.article_url, article.article_id);
                    }
                    else if (article.article_type == "video")
                    {
                        //throw new NotImplementedException();
                        //get_single_article_content_basic_video_using_selenium_and_save_to_db(article.article_url, article.article_id);
                    }
                    else if (article.article_type == "x")
                    {
                        //throw new NotImplementedException();
                        //get_single_article_content_basic_x_using_selenium_and_save_to_db(article.article_url, article.article_id);
                    }
                    else
                    {
                        throw new Exception("Incorrect article type");
                    }
                }
            }
        }
        public Articles()
        {
            DataContext = new ArticlesContext();
            InitializeComponent();

            if (Core.Temp.Current != System.Windows.WindowState.Minimized)
            {
                this.WindowState = Core.Temp.Current;
            }
        }
Example #4
0
        private static async Task Main(string[] args)
        {
            var configuration = new ConfigurationBuilder().AddEnvironmentVariables().Build();
            var connection    = configuration.GetConnectionString("Default") != null
                ? configuration.GetConnectionString("Default")
                : DefaultConnection;

            await using var context = new ArticlesContext(GetOptions(connection));
            await context.Database.MigrateAsync();
        }
Example #5
0
        public static void GetArticlesContentUsingSeleniumAndSaveToDB(int numberOfArticlesToDownload, int numberOfPagesForCommentsToDownload)
        {
            using (var context = new ArticlesContext())
            {
                var articles_to_download = context.Articles.Where(x => x.IsDownloaded == false).Take(numberOfArticlesToDownload);

                foreach (var article in articles_to_download)
                {
                    GetSingleArticleContentUsingSeleniumAndSaveToDB(article.Url, numberOfPagesForCommentsToDownload);
                }
            }
        }
        public ArticleViewModel GetArticleViewModel(int id)
        {
            var article = this.GetById(id);

            if (article != null)
            {
                ArticlesContext.Entry(article).Reference(a => a.Author).Load();
                ArticlesContext.Entry(article).Collection(a => a.Reviews).Load();
                return(article.MapToViewModel());
            }
            return(null);
        }
Example #7
0
        public AuthorViewModel GetAuthorViewModel(int id)
        {
            Author author = this.GetById(id);

            if (author != null)
            {
                ArticlesContext.Entry(author).Collection(a => a.Articles).Load();
                ArticlesContext.Entry(author).Collection(a => a.Articles).Query().Select(q => q.Reviews).Load();
                return(author.MapToViewModel());
            }
            return(null);
        }
        [Fact] public async void TestSuccesfullAddArticles()
        {
            var articleContext   = new ArticlesContext(options);
            var controller       = new ArticlesController(articleContext);
            var articleFactory   = new ArticleFactory();
            var publisherFactory = new PublisherFactory();
            var pub = publisherFactory.GetDataObject(0, 0).Publisher;
            await articleContext.Publishers.AddAsync(pub);

            await controller.AddArticles(articleFactory.GenerateArticle(pub.Id));

            var articles = await controller.GetArticles();

            Assert.True(await articleContext.Publishers.CountAsync() == 1);
            Assert.True(articles.Value.Count == 1);
        }
Example #9
0
        public SemanticNetwork GetSemanticNetwork(string searchText)
        {
            SemanticNetwork result = new SemanticNetwork();

            Search s = new Search();

            s.search = searchText;

            ArticlesContext context    = HttpContext.RequestServices.GetService(typeof(philsearch.Models.ArticlesContext)) as ArticlesContext;
            string          webService = context.SemanticNetworkWS;

            using (HttpClient client = new HttpClient())
            {
                MediaTypeWithQualityHeaderValue contentType = new MediaTypeWithQualityHeaderValue("application/json");
                client.DefaultRequestHeaders.Accept.Add(contentType);
                string stringData            = JsonConvert.SerializeObject(s);
                var    contentData           = new StringContent(stringData, System.Text.Encoding.UTF8, "application/json");
                HttpResponseMessage response = client.PostAsync(webService, contentData).Result;
                result = JsonConvert.DeserializeObject <SemanticNetwork>(response.Content.ReadAsStringAsync().Result);
            }

            return(result);
        }
Example #10
0
        private Articles SearchArticles(string searchText)
        {
            Articles result = new Articles();

            Regex  rgx       = new Regex("[^a-zA-Z0-9 ]");
            String cleanText = rgx.Replace(searchText, " ");

            Search s = new Search();

            s.search = cleanText;

            ArticlesContext context    = HttpContext.RequestServices.GetService(typeof(philsearch.Models.ArticlesContext)) as ArticlesContext;
            string          webService = context.SearchArticlesWS;

            IEnumerable <SimilarArticles> artList;

            using (HttpClient client = new HttpClient())
            {
                MediaTypeWithQualityHeaderValue contentType = new MediaTypeWithQualityHeaderValue("application/json");
                client.DefaultRequestHeaders.Accept.Add(contentType);
                string stringData            = JsonConvert.SerializeObject(s);
                var    contentData           = new StringContent(stringData, System.Text.Encoding.UTF8, "application/json");
                HttpResponseMessage response = client.PostAsync(webService, contentData).Result;
                artList = JsonConvert.DeserializeObject <IEnumerable <SimilarArticles> >(response.Content.ReadAsStringAsync().Result);
            }

            result.SimilarArticlesList = artList;

            List <Article> articles = new List <Article>();

            foreach (SimilarArticles a in artList)
            {
                articles.Add(context.GetArticleInfo(a.Art_Id));
            }

            result.ArticlesList = articles;
            result.Categories   = GetCategories(result.ArticlesList);
            result.Features     = GetFeatures(result.ArticlesList);
            result.References   = GetReferences(result.ArticlesList);

            /*
             * string filter = "";
             * foreach (SimilarArticles a in artList)
             * {
             *  if (filter.Length > 0) { filter = filter + ","; }
             *  filter = filter + "'" + a.Art_Id + "'";
             *  Article test = context.GetArticleInfo(a.Art_Id);
             * }
             *
             * List<Article> articles = context.GetArticles(filter);
             *
             * result.ArticlesList = articles;
             *
             * result.Categories = GetCategories(result.ArticlesList);
             * result.Features = GetFeatures(result.ArticlesList);
             * result.References = GetReferences(result.ArticlesList);
             */

            String concepts = GetNetworkConcepts(cleanText, result.Features);

            result.ConceptsNetwork = GetSemanticNetwork(concepts);

            return(result);
        }
Example #11
0
 public ReviewsController(ArticlesContext context)
 {
     unitOfWork = new UnitOfWork(context);
 }
Example #12
0
        private readonly ArticlesContext context;   //ctor ad csak értéket

        public ItemService(ArticlesContext context)
        {
            this.context = context;
        }
Example #13
0
 public CategoryService(ArticlesContext context)
 {
     this.context = context;
 }
Example #14
0
 public DataController(ArticlesContext context)
 {
     _context = context;
     _articlesPublishersDto = new ArticlesPublishersDTO();
 }
        public void get_single_article_content_basic_article_using_selenium_and_save_to_db(string url, int current_article_id)
        {
            #region prepare variables
            string   article_title                 = null;
            DateTime?article_created_at            = null;
            string   article_author                = null;
            string   article_content               = null;
            int?     article_number_of_comments    = null;
            int?     article_number_of_likes       = null;
            int?     article_number_of_images      = null;
            int?     article_number_of_bolded_text = null;
            bool?    article_has_slideshow         = null;
            bool?    article_has_video             = null;
            var      tags            = new List <Tag>();
            var      relatedArticles = new List <RelatedArticle>();
            var      comments        = new List <Comment>();
            #endregion


            // go to the artile url
            browser.Url = url;
            browser.Navigate();


            #region accept webiste policy
            try
            {
                var accept_site_policy = browser
                                         .FindElements(By.XPath("//button"))
                                         .Where(x => x.Text.ToLower().Contains("przechodzÄ™ do serwisu"))
                                         .First()
                ;

                accept_site_policy.Click();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get title
            try
            {
                var _article_title = browser.FindElement(By.XPath("//div[@id='item']//h1[@class='item-title']"));
                article_title = HelperMethods.CleanText(_article_title.GetAttribute("innerText").ToString());
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get author
            try
            {
                var _article_author = browser.FindElement(By.XPath("//div[@class='item-details']//a"));
                article_author = HelperMethods.CleanText(_article_author.GetAttribute("innerText").ToString());
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get creation datetime
            try
            {
                var _article_created_at = browser.FindElement(By.XPath("//head//meta[@property='og:published_at']"));
                article_created_at = Convert.ToDateTime(_article_created_at.GetAttribute("content").ToString());
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get number of comments
            try
            {
                var _article_number_of_comments_candidate = browser.FindElement(By.XPath("//head//meta[@property='og:comments_count']"));
                var _article_number_of_comments           = _article_number_of_comments_candidate.GetAttribute("content");
                article_number_of_comments = Convert.ToInt32(HelperMethods.CleanText(_article_number_of_comments.ToString()));
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get number of facebook likes
            try
            {
                // find iframe with given name
                var detailFrame = browser.FindElement(By.XPath("//div[@class='single-entry__header']//iframe[@title='fb:like Facebook Social Plugin']"));

                // open selected iframe
                browser.SwitchTo().Frame(detailFrame);

                // the iframe code in this case is a bit "encrypted"
                // get all texts from all spans
                // try to parse each element
                // get the first one that contain number
                article_number_of_likes = browser.FindElements(By.XPath("//span"))
                                          .Select(x =>
                {
                    int value;
                    string old_value = x.Text;
                    bool success     = int.TryParse(x.Text, out value);
                    return(new { value, old_value, success });
                })
                                          .Where(y => y.success == true)
                                          .Select(z => z.value)
                                          .First()
                ;

                // switch to main window and main frame
                browser.SwitchTo().DefaultContent();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get number of bolded texts
            try
            {
                var _article_number_of_bolded_text = browser.FindElement(By.XPath("//div[@class='single-entry-text bbtext' or @class='single-article-text' or @class='article']//p"));
                article_number_of_bolded_text = _article_number_of_bolded_text.ToString().Length;
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get number of images

            #region get number of images (article content)
            int?_number_of_images = 0;
            try
            {
                _number_of_images = Convert.ToInt32(browser.FindElement(By.XPath("//div[@class='image-container']//img")).ToString());
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion

            #region get number of images (slideshow)
            article_has_slideshow = false;
            int?_images_in_slideshow = 0;
            try
            {
                var _slideshow = browser.FindElement(By.XPath("//div[@data-utm_campaign='slideshow']//span[@class='slideshow-current']"));
                _images_in_slideshow  = Convert.ToInt32(_slideshow.GetAttribute("innerText").ToString().Split("/")[1]);
                article_has_slideshow = true;
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion

            // total number of images in website
            article_number_of_images = (_number_of_images ?? 0) + (_images_in_slideshow ?? 0);
            #endregion


            #region get article text
            try
            {
                var all_article_texts = browser.FindElements(By.XPath("//div[@class='item-content']//h2[@class='item-description']"));
                var _article_content  = "";

                foreach (var text in all_article_texts)
                {
                    var current_text = text.GetAttribute("innerText").ToString();
                    if (current_text.Length > 0)
                    {
                        _article_content += current_text + " ";
                    }
                }

                if (_article_content.Length > 0)
                {
                    article_content = HelperMethods.CleanText(_article_content);
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get tags
            try
            {
                var _tags = browser.FindElements(By.XPath("//div[@class='item-content']//div[@class='tags']//a"));

                foreach (var _tag in _tags)
                {
                    var tag = new Tag()
                    {
                        article_id = current_article_id, tag_text = HelperMethods.CleanText(_tag.GetAttribute("innerText")), tag_url = HelperMethods.CleanText(_tag.GetAttribute("href"))
                    };
                    tags.Add(tag);
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region get comments
            try
            {
                var _comments = browser.FindElements(By.XPath("//div[@class='comments-top']//div[@class='comment']"));

                foreach (var _comment in _comments)
                {
                    #region get single comment
                    try
                    {
                        #region get basic comment info
                        var _comment_author       = _comment.FindElement(By.XPath(".//span[@class='comment-author']"));
                        var _comment_created_at   = _comment.FindElement(By.XPath(".//span[@class='comment-date']"));
                        var comment_message       = HelperMethods.CleanText(_comment.GetAttribute("innerText"));
                        var _comment_thumbs_up    = _comment.FindElement(By.XPath(".//span[@class='yesCount']"));
                        var _comment_thumbs_down  = _comment.FindElement(By.XPath(".//span[@class='noCount']"));
                        var _comment_is_highlited = true;
                        #endregion


                        #region get comment message
                        // get entire comment message
                        try
                        {
                            var _comment_info = _comment.FindElement(By.XPath(".//div[@class='comment-info']"));
                            var comment_info  = HelperMethods.CleanText(_comment_info.GetAttribute("innerText").ToString());
                            comment_message = HelperMethods.CleanText(comment_message.Replace(comment_info, ""));
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }


                        // remove any comment info from message
                        try
                        {
                            var _comment_options = _comment.FindElement(By.XPath(".//div[@class='comment-options']"));
                            var comment_options  = HelperMethods.CleanText(_comment_options.GetAttribute("innerText")).ToString();
                            comment_message = HelperMethods.CleanText(comment_message.Replace(comment_options, ""));
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }


                        // remove any quotes from comment message
                        try
                        {
                            var _comment_message_quote = _comment.FindElement(By.XPath(".//div[@class='quote']"));
                            var comment_message_quote  = HelperMethods.CleanText(_comment_message_quote.GetAttribute("innerText"));
                            comment_message = HelperMethods.CleanText(comment_message.Replace(comment_message_quote, ""));
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }


                        #endregion


                        #region add comment to list
                        var comment = new Comment()
                        {
                            article_id           = current_article_id,
                            comment_author       = HelperMethods.CleanText(_comment_author.GetAttribute("innerText")),
                            comment_created_at   = Convert.ToDateTime(HelperMethods.CleanText(_comment_created_at.GetAttribute("innerText"))),
                            comment_message      = comment_message,
                            comment_thumbs_up    = Convert.ToInt32(HelperMethods.CleanText(_comment_thumbs_up.GetAttribute("innerText"))),
                            comment_thumbs_down  = Convert.ToInt32(HelperMethods.CleanText(_comment_thumbs_down.GetAttribute("innerText"))),
                            comment_is_highlited = _comment_is_highlited
                        };

                        comments.Add(comment);
                        #endregion
                    }
                    catch (Exception exc)
                    {
                        Console.WriteLine(exc.Message);
                    }
                    #endregion
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }


            #endregion


            #region get related articles list
            try
            {
                var _related_article_candidates = browser.FindElements(By.XPath("//div[@class='single-entry-text bbtext']//p//a"));
                foreach (var _related_article_candidate in _related_article_candidates)
                {
                    var relatedArticle = new RelatedArticle()
                    {
                        article_id           = current_article_id,
                        related_article_text = HelperMethods.CleanText(_related_article_candidate.GetAttribute("innerText")),
                        related_article_url  = _related_article_candidate.GetAttribute("href")
                    };
                    relatedArticles.Add(relatedArticle);
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion


            #region save article details to database
            try
            {
                // save basic article details to database
                using (var context = new ArticlesContext())
                {
                    var current_article = context.Articles.Where(x => x.article_url == url).First();

                    current_article.article_title                 = article_title;
                    current_article.article_created_at            = article_created_at;
                    current_article.article_author                = article_author;
                    current_article.article_content               = article_content;
                    current_article.article_number_of_comments    = article_number_of_comments;
                    current_article.article_number_of_likes       = article_number_of_likes;
                    current_article.article_number_of_images      = article_number_of_images;
                    current_article.article_has_slideshow         = article_has_slideshow;
                    current_article.article_has_video             = article_has_video;
                    current_article.article_number_of_bolded_text = article_number_of_bolded_text;
                    current_article.article_is_downloaded         = true;
                    current_article.article_updated_at            = DateTime.Now;

                    current_article.Tags.AddRange(tags.Where(x => !current_article.Tags.Any(y => y.tag_text == x.tag_text)));
                    current_article.RelatedArticles.AddRange(relatedArticles.Where(x => !current_article.RelatedArticles.Any(y => y.related_article_text == x.related_article_text)));
                    current_article.Comments.AddRange(comments.Where(x => !current_article.Comments.Any(y => y.comment_author == x.comment_author && y.comment_message == x.comment_message && x.comment_created_at == y.comment_created_at)));

                    context.SaveChanges();
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion
        }
 public ArticlesRepository(ArticlesContext context)
     : base(context)
 {
 }
Example #17
0
 public ArticlesRepository(ArticlesContext context) => _dbContext = context;
 public ArticlesRepository(ArticlesContext context)
 {
     _context = context;
 }
        public void get_articles_urls_and_save_to_db(int al_no_pages, int al_start_page)
        {
            foreach (int i in Enumerable.Range(al_start_page, al_start_page + al_no_pages + 1))
            {
                List <Article> articles = new List <Article>();

                #region get article urls
                try
                {
                    string current_url = base_url + i.ToString() + "/";

                    browser.Url = current_url;
                    browser.Navigate();

                    var _articles = browser.FindElements(By.XPath("//div[@class='news-all']//div[@class='results__entry ' or @class='results__entry results__entry--pudelekx']//h3[@class='entry__title']//a"));
                    foreach (var article in _articles)
                    {
                        string current_article_url = article.GetAttribute("href");
                        string article_type        = "inny";

                        if (current_article_url.ToLower().StartsWith("https://www.pudelek.pl/artykul/"))
                        {
                            article_type = "artykul";
                        }
                        else if (current_article_url.ToLower().StartsWith("https://tv.pudelek.pl/video/"))
                        {
                            article_type = "video";
                        }
                        else if (current_article_url.ToLower().StartsWith("http://pudelekx.pl/"))
                        {
                            article_type = "x";
                        }


                        #region save article details to database
                        try
                        {
                            // save basic article details to database
                            using (var context = new ArticlesContext())
                            {
                                Article current_article = new Article();
                                current_article.article_url           = current_article_url;
                                current_article.article_type          = article_type;
                                current_article.article_is_downloaded = false;
                                current_article.article_updated_at    = DateTime.Now;
                                context.Add(current_article);
                                context.SaveChanges();
                            }
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }
                        #endregion
                    }
                }
                catch (Exception exc)
                {
                    Console.WriteLine(exc.Message);
                }
                #endregion
            }

            browser.Dispose();
        }
Example #20
0
 public ArticlesDAL(ArticlesContext dependecy)
 {
     this._dependency = dependecy;
 }
 public PublisherRepository(ArticlesContext context) => _dbContext = context;
 public ArticlesController(ArticlesContext context) => _context = context;
Example #23
0
 public ArticlesServices()
 {
     db = new ArticlesContext();
 }
Example #24
0
        public static void GetSingleArticleContentUsingSeleniumAndSaveToDB(string url, int numberOfPagesForCommentsToDownload)
        {
            string                title            = null;
            DateTime?             createdAt        = null;
            string                author           = null;
            string                content          = null;
            int?                  numberOfComments = null;
            int?                  numberOfLikes    = null;
            List <Tag>            tags             = new List <Tag>();
            List <RelatedArticle> relatedArticles  = new List <RelatedArticle>();
            List <Comment>        comments         = new List <Comment>();



            // initialize
            var chromeOptions = new ChromeOptions();

            chromeOptions.AddArguments("headless");
            //chromeOptions.SetLoggingPreference(LogType.Browser, LogLevel.Severe);
            IWebDriver driver = new ChromeDriver(chrome_driver_path, chromeOptions);

            driver.Url = url;



            #region accept policy !!! (show)

            // accept policy
            try
            {
                var accept_site_policy = driver
                                         .FindElements(By.XPath("//button"))
                                         .Where(x => x.Text.ToLower().Contains("przechodzÄ™ do serwisu"))
                                         .First()
                ;

                accept_site_policy.Click();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }

            #endregion



            #region title !!! (show)

            try
            {
                var _title = driver.FindElements(By.XPath("//div[@class='single-entry__header' or @class='single-article-header']//h1"))
                             .First()
                             .Text;

                title = HelperMethods.CleanText(_title);
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }

            #endregion



            #region created at

            try
            {
                var _createdAt = driver.FindElements(By.XPath("//div[@class='single-entry__header' or @class='single-article-header']//span[@class='time']"))
                                 .First()
                                 .GetAttribute("datetime");

                createdAt = Convert.ToDateTime(_createdAt);
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }

            #endregion



            #region author !!! (show)

            try
            {
                // see, sometimes text will not return text, and we need to use innertext
                var _author = driver.FindElements(By.XPath("//div[@class='single-entry__footer' or @class='slideshow__footer']//span[@class='author']"))
                              .First()
                              .GetAttribute("innerText");

                author = HelperMethods.CleanText(_author);
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }

            #endregion



            #region get facebook likes number !!! (show)

            try
            {
                // find iframe with given name
                var detailFrame = driver.FindElement(By.XPath("//div[@class='single-entry__header']//iframe[@title='fb:like Facebook Social Plugin']"));

                // open selected iframe
                driver.SwitchTo().Frame(detailFrame);

                // the iframe code in this case is a bit "encrypted"
                // get all texts from all spans
                // try to parse each element
                // get the first one that contain number
                numberOfLikes = driver.FindElements(By.XPath("//span"))
                                .Select(x =>
                {
                    int value;
                    string old_value = x.Text;
                    bool success     = int.TryParse(x.Text, out value);
                    return(new { value, old_value, success });
                })
                                .Where(y => y.success == true)
                                .Select(z => z.value)
                                .First()
                ;

                // switch to main window and main frame
                driver.SwitchTo().DefaultContent();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion



            #region number of comments

            try
            {
                numberOfComments = driver.FindElements(By.XPath("//div[@class='single-entry__footer' or @class='slideshow__footer']//a[@class='comments-link']"))
                                   .First()
                                   .GetAttribute("innerText")
                                   .Split(' ')
                                   .Select(x =>
                {
                    int value;
                    string old_value = x;
                    bool success     = int.TryParse(x, out value);
                    return(new { value, old_value, success });
                })
                                   .Where(y => y.success == true)
                                   .Select(z => z.value)
                                   .First();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }


            #endregion



            #region tags
            try
            {
                var _tags = driver.FindElements(By.XPath("//div[@class='single-entry__header']//span[@class='inline-tags']//a"));

                foreach (var _tag in _tags)
                {
                    Tag tag = new Tag();
                    tag.Text = HelperMethods.CleanText(_tag.Text);
                    tag.Url  = _tag.GetAttribute("href");
                    tags.Add(tag);
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
            #endregion



            #region related articles

            try
            {
                var _related_article_candidates = driver.FindElements(By.XPath("//div[@class='single-entry-text bbtext']//p//a"));

                foreach (var _related_article_candidate in _related_article_candidates)
                {
                    RelatedArticle relatedArticle = new RelatedArticle();
                    relatedArticle.Text = HelperMethods.CleanText(_related_article_candidate.Text);
                    relatedArticle.Url  = _related_article_candidate.GetAttribute("href");
                    relatedArticles.Add(relatedArticle);
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }

            #endregion



            #region article text

            try
            {
                var all_article_texts = driver.FindElements(By.XPath("//div[@class='single-entry-text bbtext' or @class='single-article-text']//p"));

                string _content = "";

                foreach (var text in all_article_texts)
                {
                    string current_text = text.GetAttribute("innerText");

                    if (current_text.Length > 0)
                    {
                        _content += current_text + " ";
                    }
                }

                if (_content.Length > 0)
                {
                    content = HelperMethods.CleanText(_content);
                }
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }

            #endregion



            #region comments

            // parse comments
            int            comments_website_subpage_number  = 1;
            List <Comment> current_website_popular_comments = new List <Comment>();
            while (comments_website_subpage_number <= numberOfPagesForCommentsToDownload)
            {
                string current_comments_website_subpage = url + "/" + comments_website_subpage_number.ToString() + "/#comments";
                driver.Url = current_comments_website_subpage;
                var current_subpage_comments = driver.FindElements(By.XPath("//div[@class='comment comment-odd' or @class='comment comment-even']"));

                // ger the popular comments only onces
                if (comments_website_subpage_number == 1)
                {
                    // get higlither / most popular comments
                    var current_subpage_popular_comments = driver.FindElements(By.XPath("//div[@class='comments-popular']//div[@class='comment']"));
                    foreach (var current_popular_comment in current_subpage_popular_comments)
                    {
                        string popularCommentAuthor  = null;
                        string popularCommentMessage = null;

                        try
                        {
                            popularCommentAuthor = HelperMethods.CleanText(current_popular_comment.FindElements(By.XPath(".//span[@class='comment-author']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }

                        try
                        {
                            popularCommentMessage = HelperMethods.CleanText(current_popular_comment.FindElements(By.XPath(".//div[@class='comment-text']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }


                        var popular_comment = new Comment();
                        popular_comment.Author  = popularCommentAuthor;
                        popular_comment.Message = popularCommentMessage;
                        current_website_popular_comments.Add(popular_comment);
                    }
                }

                // break if there is no comments
                if (current_subpage_comments.Count() == 0)
                {
                    break;
                }
                else
                {
                    foreach (var current_subpage_comment in current_subpage_comments)
                    {
                        string   commentAuthor      = null;
                        DateTime?commentCreatedAt   = null;
                        bool?    commentIsHighlited = null;
                        string   commentMessage     = null;
                        int?     commentThumbsDown  = null;
                        int?     commentThumbsUp    = null;

                        try
                        {
                            commentAuthor = HelperMethods.CleanText(current_subpage_comment.FindElements(By.XPath(".//span[@class='comment-author']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }

                        try
                        {
                            commentCreatedAt = Convert.ToDateTime(current_subpage_comment.FindElements(By.XPath(".//span[@class='comment-date']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }

                        try
                        {
                            commentThumbsUp = Convert.ToInt32(current_subpage_comment.FindElements(By.XPath(".//div[@class='comment-vote']//span[@class='plus']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }

                        try
                        {
                            commentThumbsDown = Convert.ToInt32(current_subpage_comment.FindElements(By.XPath(".//div[@class='comment-vote']//span[@class='minus']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }

                        try
                        {
                            commentMessage = HelperMethods.CleanText(current_subpage_comment.FindElements(By.XPath(".//div[@class='comment-text']")).First().Text);
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }

                        try
                        {
                            if (current_website_popular_comments.Where(x => x.Author == commentAuthor && x.Message == commentMessage).Count() > 0)
                            {
                                commentIsHighlited = true;
                            }
                            else
                            {
                                commentIsHighlited = false;
                            }
                        }
                        catch (Exception exc)
                        {
                            Console.WriteLine(exc.Message);
                        }



                        Comment comment = new Comment();
                        comment.Author      = commentAuthor;
                        comment.CreatedAt   = commentCreatedAt;
                        comment.IsHighlited = commentIsHighlited;
                        comment.Message     = commentMessage;
                        comment.ThumbsDown  = commentThumbsDown;
                        comment.ThumbsUp    = commentThumbsUp;
                        comments.Add(comment);
                    }
                }

                comments_website_subpage_number++;
            }


            #endregion



            #region extras !!!


            // wait
            //driver.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(5);
            //var wait = new WebDriverWait(driver, TimeSpan.FromSeconds(3));

            // wait for element
            //var wait = new WebDriverWait(driver, TimeSpan.FromSeconds(timeOut)).Until(ExpectedConditions.ElementExists((By.Id(login))));

            // wait rundom number of seconds...
            //var rnd = new Random();
            //var delay = rnd.Next(5000, 10001);
            //Thread.Sleep(delay);

            #endregion



            // clean up the driver
            driver.Quit();


            #region db operations

            // save results to database
            using (var context = new ArticlesContext())
            {
                var current_article = context.Articles.Where(x => x.Url == url).First();

                current_article.Title            = title;
                current_article.CreatedAt        = createdAt;
                current_article.Author           = author;
                current_article.Content          = content;
                current_article.NumberOfComments = numberOfComments;
                current_article.NumberOfLikes    = numberOfLikes;
                current_article.IsDownloaded     = true;
                current_article.UpdatedAt        = DateTime.UtcNow;

                current_article.Tags.AddRange(tags.Where(x => !current_article.Tags.Any(y => y.Text == x.Text)));
                current_article.RelatedArticles.AddRange(relatedArticles.Where(x => !current_article.RelatedArticles.Any(y => y.Text == x.Text)));
                current_article.Comments.AddRange(comments.Where(x => !current_article.Comments.Any(y => y.Author == x.Author && y.Message == x.Message && x.CreatedAt == y.CreatedAt)));

                context.SaveChanges();
            }

            #endregion
        }
Example #25
0
 public AuthorsController(ArticlesContext context)
 {
     unitOfWork = new UnitOfWork(context);
 }
 public CommentRepository(ArticlesContext ctx)
 {
     _ctx = ctx;
 }
 public TagsRepository(ArticlesContext articlesContext)
 {
     _articlesContext = articlesContext;
 }
Example #28
0
 public UnitOfWork(ArticlesContext context)
 {
     this.context = context;
 }
Example #29
0
 public ArticlesService(ArticlesContext context, ILogger <ArticlesService> logger)
 {
     _context = context;
     _logger  = logger;
 }
Example #30
0
 public TagRepository(ArticlesContext ctx)
 {
     _ctx = ctx;
 }