private async Task <New> GetContents(LinkObj obj) { var doc = await GetDocuments(obj.Link); var title = doc.DocumentNode.SelectSingleNode("/html/body/section[4]/div[2]/h1"); var contents = doc.DocumentNode.SelectSingleNode("/html/body/section[4]/div[2]/article"); var auth = doc.DocumentNode.SelectSingleNode("/html/body/section[4]/div[2]/div[1]/div[1]/div/strong"); return(new New(title.InnerText, contents.InnerHtml, "", "ndh.vn" + "-" + Type + "-" + auth.InnerText, obj.Img, this.Categories)); }
private async Task <New> GetContents(LinkObj obj) { var doc = await GetDocuments(obj.Link); var title = doc.DocumentNode.QuerySelector("body > div.wrap-main > div.container.wrap-main-page > div.left-col > div.block.pb15 > div.box-post-main-title.pb15 > h1"); var contents = doc.DocumentNode.QuerySelector("#sevenBoxNewContentInfo"); var auth = doc.DocumentNode.QuerySelector("body > div.wrap-main > div.container.wrap-main-page > div.left-col > div:nth-child(2) > div.page-content > div.sevenPostWrap.pb10 > div.sevenPostAuthor"); var img = GetImage(contents.InnerHtml); return(new New(title.InnerText, contents.InnerHtml, "", "cafeland.vn" + "-" + Type + "-" + auth.InnerText, img, this.Categories)); }
private async Task <List <LinkObj> > GetLinks() { HtmlDocument docs = await GetDocuments(this.Url); var childArticles = docs.DocumentNode.SelectNodes("//*[@id=\"dnn_ctr571_ModuleContent\"]/div/div[1]/div/div[1]/a"); // var childArticles = docs.DocumentNode.SelectNodes("/html/body/div[1]/div/div[4]/div[1]/section/div/article/figure/a") == null ? // docs.DocumentNode.SelectNodes("/html/body/div[1]/div/div[3]/div[1]/div/section/div/article/figure/a") : null; List <LinkObj> links = new List <LinkObj>(); // links.Add(mainArticle); foreach (var a in childArticles) { var link = new LinkObj(); link.Link = a.Attributes["href"].Value; var img = a.ChildNodes.FirstOrDefault(e => e.Name == "img"); if (img != null) { link.Img = "http://tiasang.com.vn" + img.Attributes["src"].Value; } links.Add(link); } return(links); }
private async Task <New> GetContents(LinkObj link) { var doc = await GetDocuments(link.Link); string title = "", description = "", author = "", source = string.Empty; title = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/h1") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/h1").InnerText : string.Empty; description = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/p") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/p").InnerText : string.Empty; author = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/strong") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/strong").InnerText : string.Empty; source = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/em") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/em").InnerText : string.Empty; var rendered = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]"); var desc = rendered.ChildNodes[5]; var realDesc = ""; if (desc != null) { foreach (var item in desc.ChildNodes) { realDesc += item.InnerText + "-"; } realDesc = realDesc.Remove(realDesc.Length - 1); } var arti = rendered.ChildNodes[7]; var contents = ""; var artChilds = arti.ChildNodes.Where(e => e.Name == "p" || e.Name == "table").ToList(); for (int i = 0; i < artChilds.Count; i++) { if (artChilds[i].InnerHtml.Contains("<strong>")) { continue; } contents += artChilds[i].OuterHtml; } var auth = arti.ChildNodes[arti.ChildNodes.Count - 2]; var img = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/table/tbody/tr[1]/td/img"); string url = img == null ? link.Img : img.Attributes["src"].Value; url = url.Split("_")[0]; if (!url.Contains(".jpg")) { url += ".jpg"; } arti.RemoveChild(auth); var realAuth = ""; if (auth.Name == "p") { realAuth = auth.InnerText.Trim(); } else { realAuth = author.Trim(); } if (realAuth != "") { realAuth += "-" + source.Trim(); } else { realAuth += source; } // var auth = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p[contains(@style,'text-align:right;')]"); // rendered.RemoveChild(auth); var newRender = realDesc + contents; return(new New ( title, this.RemoveLink(newRender), "", "vnexpress.net-" + realAuth, url, this.Categories )); }