public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//div[@class=\"chitiettin\"]", ".//h1"); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//div[@class=\"noidungchitiet\"]", ".//h2"); //author articleElement.Author = null;// Utilities._GetNode(doc, "//p[@class=\"tacgia\"]", ".//span"); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\"noidungchitiet\"]"); if (articleElement.Content != null) { string className = articleElement.Excerpt.Attributes["class"].Value; articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//h2[@class=\"" + className + "\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@class=\"thongtingame\"]"); //get large image as thumbnail articleElement.Image = Utilities._GetNode(doc, "//div[@class=\"chitiettin\"]", ".//img"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//span[@class=\"news_title\"]"); //excerpt //articleElement.Excerpt = Utilities._GetNode(doc, "//td[@class=\"news_content\"]", ".//strong"); //author articleElement.Author = null; //content articleElement.Content = Utilities._GetNode(doc, "//td[@class=\"news_content\"]"); if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode(".//img[1]"); if (articleElement.Content.SelectSingleNode(".//p[1]").InnerText.Trim() != string.Empty) { articleElement.Excerpt = articleElement.Content.SelectSingleNode(".//p[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@class=\"news_content\"]/p[1]"); } else { articleElement.Excerpt = articleElement.Content.SelectSingleNode(".//p[2]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@class=\"news_content\"]/p[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@class=\"news_content\"]/p[2]"); } } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@class=\"title\"]"); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//h2[@class=\"sapo\"]"); //author articleElement.Author = null;// Utilities._GetNode(doc, "//p[@class=\"p-author\"]"); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\"detail-content\"]"); if (articleElement.Content != null) { articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@type=\"link-content-footer\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//p[@class=\"p-author\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//p[@class=\"p-source\"]"); //get large image as thumbnail articleElement.Image = Utilities._GetNode(doc, "//img[@class=\"img\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { Uri myUri = new Uri(articleUrl); string reqSource = myUri.Host; if (!string.IsNullOrEmpty(reqSource)) { reqSource = reqSource.Replace(".", "_").ToLower().Trim(); } var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@class=\"title_detail\"]", string.Empty); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//div[@class=\"sapo_detail\"]", string.Empty); //author articleElement.Author = Utilities._GetNode(doc, "//p[@class=\"align-right\"]", string.Empty); //content articleElement.Content = Utilities._GetNode(doc, "//div[@id=\"content_detail_news\"]", string.Empty); //publishTime articleElement.PublishedTime = Utilities._GetNode(doc, "//span[@class=\"time_index\"]", string.Empty); //keyword articleElement.Keyword = Utilities._GetNodes(doc, "//div[@class=\"tag_detail\"]//a//h3", string.Empty); //relation article if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode("//meta[@property=\"og:image\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@id=\"bs-inread-container-wrapper\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//p[@class=\"align-right\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//p[@style=\"text-align: right;\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@style=\"display: inline-block;width: 100%;overflow: hidden;\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@class=\"share_detail pkg\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@id=\"content_detail_news\"]//script"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@id=\"content_detail_news\"]//div[@style=\"display: inline-block;width: 100%;overflow: hidden;float: left;\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { Uri myUri = new Uri(articleUrl); string reqSource = myUri.Host; if (!string.IsNullOrEmpty(reqSource)) { reqSource = reqSource.Replace(".", "_").ToLower().Trim(); } var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1", string.Empty); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//h2", string.Empty); //author articleElement.Author = Utilities._GetNode(doc, "//span[@itemprop=\"author\"]", string.Empty); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\"post-content fs15content pb10 pt10\"]", string.Empty); //publishTime articleElement.PublishedTime = Utilities._GetNode(doc, "//span[@class=\"time f-elle-futura-book hidden-sm hidden-xs\"]", string.Empty); //keyword articleElement.Keyword = Utilities._GetNodes(doc, "//div[@class=\"col-md-10\"]//a[@class=\"tarhome fs10\"]", string.Empty); //relation article if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode(".//img[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//p//small"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//div[@class=\"mb10 mt10\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//div[@class=\"row related-post-detail hidden-sm hidden-xs\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//div[@style=\"height: 1px; width: 1px; display: none;\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//p//iframe"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//iframe"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@id=\"title\"]"); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//div[@class=\"content-article\"]", ".//div[@class=\"lead\"]"); //author articleElement.Author = null;// Utilities._GetNode(doc, "//p[@class=\"author\"]"); //content articleElement.Content = Utilities._GetNode(doc, "//div[@id=\"content\"]"); if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode(".//img[1]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@class=\"baiviet-title\"]"); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//p[@class=\"baiviet-sapo\"]"); //author articleElement.Author = null;// Utilities._GetNode(doc, "//div[@class=\"nguontin\"]"); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\" text-conent\"]"); if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = Utilities._GetNode(doc, "//img[@class=\"news-image\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@id=\"title\"]"); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//*[@id=\"content\"]/div[1]"); //author articleElement.Author = null; //content articleElement.Content = Utilities._GetNode(doc, "//div[@id=\"content\"]"); //publishtime var pubdate = Utilities._GetNodeInnerText(doc, "//div[@id=\"date\"]"); var matches = System.Text.RegularExpressions.Regex.Matches(pubdate, @"(.+)\s(\d+)\/(\d+)\/(\d+),\s(\d+):(\d+)(.+)$", System.Text.RegularExpressions.RegexOptions.IgnoreCase); if (matches.Count > 0) { pubdate = string.Format("{0}-{1}-{2} {3}:{4}:00", matches[0].Groups[4].Value, matches[0].Groups[3].Value, matches[0].Groups[2].Value, matches[0].Groups[5].Value, matches[0].Groups[6].Value); } articleElement.PublishedTime = HtmlNode.CreateNode("<span>" + pubdate + "</span>"); if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode(".//img[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@id=\"content\"]/p[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@id=\"content\"]/p[2]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@id=\"content\"]/p[3]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@id=\"content\"]/div[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//table[@class=\"rl box leftside\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//table[@class=\"rl center\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@class=\"title\"]"); //excerpt var desNode = Utilities._GetNode(doc, "//h2[@class=\"lead\"]"); string desc_text = desNode.InnerText; if (desc_text.Contains("-")) { desc_text = desc_text.Substring(desc_text.IndexOf("-") + 1).Trim(); } articleElement.Excerpt = Utilities._CreateNodeFromString(desc_text); //author articleElement.Author = null; //content articleElement.Content = Utilities._GetNode(doc, "//div[@id=\"detail\"]"); if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode(".//img[1]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//*[@class=\"bar-left_th\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//h1[@class=\"title\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//h2[@class=\"lead\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//ul[@class=\"ul_relate\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@id=\"AdAsia\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@id=\"itvcplayer\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//ins"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@style=\"height:30px;margin-right:10px;float:right\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "(//center)[last()]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@id=\"title-h1\"]"); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//div[@class=\"sapo-news-detail\"]", "//h2"); //author articleElement.Author = null; //content articleElement.Content = Utilities._GetNode(doc, "//div[@id=\"content-id\"]"); if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode(".//img[1]"); articleElement.Content.SelectSingleNode(".//img[1]").Remove(); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@class=\"sapo-news-detail\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { Uri myUri = new Uri(articleUrl); string reqSource = myUri.Host; if (!string.IsNullOrEmpty(reqSource)) { reqSource = reqSource.Replace(".", "_").ToLower().Trim(); } var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1[@class=\"title\"]", string.Empty); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//h2[@class=\"sapo\"]", string.Empty); //author articleElement.Author = Utilities._GetNode(doc, "//p[@class=\"dateandcat\"]", string.Empty); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\"contentdetail\"]", string.Empty); //publishTime articleElement.PublishedTime = Utilities._GetNode(doc, "//p[@class=\"dateandcat\"]//span", string.Empty); //keyword articleElement.Keyword = Utilities._GetNodes(doc, "undefined", string.Empty); //relation article if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode("//meta[@property=\"og:image\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//p//strong//span[@class=\"entity _586o\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { Uri myUri = new Uri(articleUrl); string reqSource = myUri.Host; if (!string.IsNullOrEmpty(reqSource)) { reqSource = reqSource.Replace(".", "_").ToLower().Trim(); } var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//h1", string.Empty); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//p[@class=\"the-article-summary cms-desc\"]", string.Empty); //author articleElement.Author = Utilities._GetNode(doc, "//div[@class=\"the-article-credit\"]//p[@class=\"author\"]", string.Empty); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\"the-article-body cms-body\"]", string.Empty); //publishTime articleElement.PublishedTime = Utilities._GetNode(doc, "//ul[@class=\"the-article-meta\"]//li[@class=\"the-article-publish cms-date\"]", string.Empty); //keyword articleElement.Keyword = Utilities._GetNodes(doc, "//p[@class=\"the-article-tags\"]//a", string.Empty); //relation article if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode("//meta[@property=\"og:image\"]"); //articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, ".//p//small"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { Uri myUri = new Uri(articleUrl); string reqSource = myUri.Host; if (!string.IsNullOrEmpty(reqSource)) { reqSource = reqSource.Replace(".", "_").ToLower().Trim(); } var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//div[@class=\"details-wrap\"]//h1", string.Empty); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//div[@class=\"sapo cms-desc\"]", string.Empty); //author articleElement.Author = Utilities._GetNode(doc, "//h4[@class=\"name cms-author\"]", string.Empty); //content articleElement.Content = Utilities._GetNode(doc, "//div[@id=\"main_detail\"]", string.Empty); //publishTime articleElement.PublishedTime = Utilities._GetNode(doc, "//time[@class=\"cms-date\"]", string.Empty); //keyword articleElement.Keyword = Utilities._GetNodes(doc, "//ul[@class=\"tags clearfix\"]//li//a", string.Empty); //relation article if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode("//meta[@property=\"og:image\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@id=\"LavaNetwork\"]"); } return(articleElement); }
public ArticleElementInfo GetArticleElement(string articleUrl) { Uri myUri = new Uri(articleUrl); string reqSource = myUri.Host; if (!string.IsNullOrEmpty(reqSource)) { reqSource = reqSource.Replace(".", "_").ToLower().Trim(); } var articleElement = new ArticleElementInfo(); var doc = Utilities.GetHtmlDocument(articleUrl); //title articleElement.Title = Utilities._GetNode(doc, "//article//h1", string.Empty); //excerpt articleElement.Excerpt = Utilities._GetNode(doc, "//div[@class=\"news_desc\"]//h2", string.Empty); //author articleElement.Author = Utilities._GetNode(doc, "//a[@class=\"author\"]", string.Empty); //content articleElement.Content = Utilities._GetNode(doc, "//div[@class=\"box_content_detail\"]", string.Empty); //publishTime articleElement.PublishedTime = Utilities._GetNode(doc, "//span[@class=\"time_up\"]"); //keyword articleElement.Keyword = Utilities._GetNodes(doc, "//div[@class=\"box_tags\"]//a", string.Empty); //relation article if (articleElement.Content != null) { //get large image as thumbnail articleElement.Image = articleElement.Content.SelectSingleNode("//meta[@property=\"og:image\"]"); articleElement.Content = Utilities._RemoveNodeForNode(articleElement.Content, "//div[@class=\"box_retale_detail_delay\"]"); } return(articleElement); }