Пример #1
0
        private async Task <New> GetContents(LinkObj obj)
        {
            var doc = await GetDocuments(obj.Link);

            var title    = doc.DocumentNode.SelectSingleNode("/html/body/section[4]/div[2]/h1");
            var contents = doc.DocumentNode.SelectSingleNode("/html/body/section[4]/div[2]/article");
            var auth     = doc.DocumentNode.SelectSingleNode("/html/body/section[4]/div[2]/div[1]/div[1]/div/strong");

            return(new New(title.InnerText, contents.InnerHtml, "", "ndh.vn" + "-" + Type + "-" + auth.InnerText, obj.Img, this.Categories));
        }
Пример #2
0
        private async Task <New> GetContents(LinkObj obj)
        {
            var doc = await GetDocuments(obj.Link);

            var title    = doc.DocumentNode.QuerySelector("body > div.wrap-main > div.container.wrap-main-page > div.left-col > div.block.pb15 > div.box-post-main-title.pb15 > h1");
            var contents = doc.DocumentNode.QuerySelector("#sevenBoxNewContentInfo");
            var auth     = doc.DocumentNode.QuerySelector("body > div.wrap-main > div.container.wrap-main-page > div.left-col > div:nth-child(2) > div.page-content > div.sevenPostWrap.pb10 > div.sevenPostAuthor");
            var img      = GetImage(contents.InnerHtml);

            return(new New(title.InnerText, contents.InnerHtml, "", "cafeland.vn" + "-" + Type + "-" + auth.InnerText, img, this.Categories));
        }
Пример #3
0
        private async Task <List <LinkObj> > GetLinks()
        {
            HtmlDocument docs = await GetDocuments(this.Url);

            var childArticles = docs.DocumentNode.SelectNodes("//*[@id=\"dnn_ctr571_ModuleContent\"]/div/div[1]/div/div[1]/a");
            // var childArticles = docs.DocumentNode.SelectNodes("/html/body/div[1]/div/div[4]/div[1]/section/div/article/figure/a") == null ?
            // docs.DocumentNode.SelectNodes("/html/body/div[1]/div/div[3]/div[1]/div/section/div/article/figure/a") : null;
            List <LinkObj> links = new List <LinkObj>();

            // links.Add(mainArticle);
            foreach (var a in childArticles)
            {
                var link = new LinkObj();
                link.Link = a.Attributes["href"].Value;
                var img = a.ChildNodes.FirstOrDefault(e => e.Name == "img");
                if (img != null)
                {
                    link.Img = "http://tiasang.com.vn" + img.Attributes["src"].Value;
                }
                links.Add(link);
            }

            return(links);
        }
Пример #4
0
        private async Task <New> GetContents(LinkObj link)
        {
            var doc = await GetDocuments(link.Link);

            string title = "", description = "", author = "", source = string.Empty;

            title = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/h1") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/h1").InnerText : string.Empty;

            description = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/p") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/p").InnerText : string.Empty;

            author = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/strong") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/strong").InnerText : string.Empty;

            source = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/em") != null?doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p/em").InnerText : string.Empty;

            var rendered = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]");
            var desc     = rendered.ChildNodes[5];
            var realDesc = "";

            if (desc != null)
            {
                foreach (var item in desc.ChildNodes)
                {
                    realDesc += item.InnerText + "-";
                }
                realDesc = realDesc.Remove(realDesc.Length - 1);
            }
            var arti      = rendered.ChildNodes[7];
            var contents  = "";
            var artChilds = arti.ChildNodes.Where(e => e.Name == "p" || e.Name == "table").ToList();

            for (int i = 0; i < artChilds.Count; i++)
            {
                if (artChilds[i].InnerHtml.Contains("<strong>"))
                {
                    continue;
                }
                contents += artChilds[i].OuterHtml;
            }
            var    auth = arti.ChildNodes[arti.ChildNodes.Count - 2];
            var    img  = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/table/tbody/tr[1]/td/img");
            string url  = img == null ? link.Img : img.Attributes["src"].Value;

            url = url.Split("_")[0];
            if (!url.Contains(".jpg"))
            {
                url += ".jpg";
            }
            arti.RemoveChild(auth);
            var realAuth = "";

            if (auth.Name == "p")
            {
                realAuth = auth.InnerText.Trim();
            }
            else
            {
                realAuth = author.Trim();
            }
            if (realAuth != "")
            {
                realAuth += "-" + source.Trim();
            }
            else
            {
                realAuth += source;
            }
            // var auth = doc.DocumentNode.SelectSingleNode("/html/body/section[2]/section[1]/section[1]/article/p[contains(@style,'text-align:right;')]");
            // rendered.RemoveChild(auth);
            var newRender = realDesc + contents;

            return(new New
                   (
                       title,
                       this.RemoveLink(newRender), "",
                       "vnexpress.net-" + realAuth,
                       url,
                       this.Categories
                   ));
        }