protected override RapideDdl_HeaderPage GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; RapideDdl_HeaderPage data = new RapideDdl_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = RapideDdl_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <RapideDdl_PostHeader> headers = new List <RapideDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { RapideDdl_PostHeader header = new RapideDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //header.title = xe.XPathValue(".//text()", DownloadPrint.Trim); header.title = xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(header.title); if (titleInfos.foundInfo) { //header.originalTitle = header.title; header.title = titleInfos.title; header.infos.SetValues(titleInfos.infos); } xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); // Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); string date = xe.XPathValue(".//span[@class='date']//text()"); header.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (header.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", header.creationDate, date); } xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray(); //if (request.LoadImage) // Http2.LoadImageFromWeb(header.images); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), header.title); header.description = textValues.description; header.language = textValues.language; header.size = textValues.size; header.nbPages = textValues.nbPages; header.infos.SetValues(textValues.infos); xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(DownloadPrint.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); header.category = xe.XPathElements(".//a").DescendantTexts().Select(DownloadPrint.Trim).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }
private static ExtremeDown_PostDetail_v2 GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); ExtremeDown_PostDetail_v2 data = new ExtremeDown_PostDetail_v2(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPostDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); //data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()", DownloadPrint.Trim); data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } XXElement xeDiv = xePost.XPathElement(".//div[@class='blockheader']"); data.Category = xeDiv.XPathValues(".//i[@class='icon-cats']/ancestor::span//a//text()").Select(DownloadPrint.Trim).zToStringValues("/"); //string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(data.Category); data.PostAuthor = xeDiv.XPathValue(".//span/i[@class='icon-user']/ancestor::span//a//text()"); string date = xeDiv.XPathValue(".//span/i[@class='icon-date']/ancestor::span//a//text()"); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date); } xeDiv = xePost.XPathElement(".//div[@class='blockcontent']"); List <string> description = new List <string>(); description.AddRange(xeDiv.XPathValues(".//p[@class='release-name']//text()")); //data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (webResult.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } description.AddRange(xeDiv.XPathValues(".//table//td//blockquote//text()")); //xeDiv = xePost.XPathElement(".//div[@class='clearfix']"); xeDiv = xePost.XPathElement(".//div[@class='upload-infos clearfix']"); description.AddRange(xeDiv.XPathValues(".//table//text()")); data.Description = description.ToArray(); string title = null; // xePost.XPathElements(".//script/parent::div//following-sibling::h2") Func <XXElement, XNodeFilter> filter = xe => { if (xe.XElement.Name == "h2") { title = xe.XPathValue(".//text()"); } else if (xe.XElement.Name == "script") { return(XNodeFilter.Stop); } else if (xe.XElement.Name == "div") { return(XNodeFilter.SelectNode); } return(XNodeFilter.DontSelectNode); }; foreach (XXElement xe in xePost.XPathElements(".//div[@class='prez_2']//following-sibling::*").zFilterElements(filter)) { //string s = xe.XPathValue(".//text()"); //// Liens de téléchargement - Pack 1 //if (s.StartsWith("Liens de téléchargement")) //{ // s = s.Substring(23).Trim(' ', '-'); // if (s == "") // s = title; // else if (title != null) // s = title + " - " + s; // title = null; data.DownloadLinks_new.AddItem(title); title = null; //foreach (XXElement xe2 in xe.XPathElements("following-sibling::div[1]//a")) foreach (XXElement xe2 in xe.XPathElements(".//a")) { //s = xe2.DescendantTextList().FirstOrDefault(); // <strong class="hebergeur"> string server = xe2.XPathValue(".//strong[@class='hebergeur']//text()"); string link = xe2.XPathValue("@href"); if (__getLinksExtremeProtect && __extremeProtect.IsLinkProtected(link)) { data.DownloadLinks_new.AddServer(server, link); data.DownloadLinks_new.AddLinks(__extremeProtect.UnprotectLink(link)); } else { data.DownloadLinks_new.AddServer(server); data.DownloadLinks_new.AddLink(link); } } //} //else if (s != null) // title = s; } //xeDiv = xePost.XPathElement(".//div[@class='blockfooter links']"); ////data.category = xeDiv.DescendantTextList(".//i[@class='icon-cats']/parent::span//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); //data.Category = xeDiv.XPathElements(".//i[@class='icon-cats']/parent::span//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); //string category = data.Category.ToLowerInvariant(); //data.PrintType = GetPrintType(category); ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
protected void _GetDetailData(XXElement xeSource, Vosbooks_PostDetail_v6 data) { // <div id="page"> // <div id="wrapper"> // <table id="layout"> // <tr>...</tr> // <tr> // <td class="sidebars">...</td> // <td> // <div id="left-col"> // <div id="content-padding"> // <div id="content"> // ... // <div class="post" id="post-74299" style="margin-top: 0;"> // // <table id="post-head"> // <tr> // <td id="head-date"> // <div class="date"><span>jan</span> 29</div> // </td> // <td> // <div class="title"> // <h2><a href="http://www.vosbooks.net/74299-livre/les-imposteurs-francois-cavanna.html" rel="bookmark" title="Les imposteurs – François Cavanna" >Les imposteurs – François Cavanna </a></h2> // <div class="postdata"> // <span class="category"> // <a href="http://www.vosbooks.net/category/livre/ebooks-epub" rel="category tag">Ebooks en Epub</a>, // <a href="http://www.vosbooks.net/category/livre" rel="category tag">Livre</a> // </span> // </div> // </div> // </td> // </tr> // </table> // // <div class="entry"> // ... // <p style="text-align: center;"> // <img class="alignnone" src="http://imageshack.com/a/img538/3859/6JXSxu.jpg" alt="Les imposteurs – François Cavanna" title="Les imposteurs – François Cavanna" height="540" width="420" /> // </p> // </tr> XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, _lastPostDate); if (data.PostCreationDate != null) _lastPostDate = new Date(data.PostCreationDate.Value); //if (__trace) // pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Ebooks en Epub / Livre data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); xe = xePost.XPathElement(".//div[@class='entry']"); //data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; //string urlImage = xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"); string urlImage = xe.XPathValue("h3/following-sibling::p/img/@src"); if (urlImage != null) data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, urlImage)) }; // force load image to get image width and height //if (webResult.WebRequest.LoadImageFromWeb) // data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages // xe.DescendantTextList(".//p") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) return XNodeFilter.Stop; } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") return XNodeFilter.Stop; } return XNodeFilter.SelectNode; } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.DescendantNodes( // node => // { // if (!(node is XElement)) // return true; // XElement xe2 = (XElement)node; // if (xe2.Name != "p") // return true; // XAttribute xa = xe2.Attribute("class"); // if (xa == null) // return true; // if (xa.Value != "submeta") // return true; // return false; // }, // node => node is XElement && ((XElement)node).Name == "a") // .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) return XNodeFilter.DontSelectNode; XElement xe2 = (XElement)node; if (xe2.Name == "a") return XNodeFilter.SelectNode; if (xe2.Name != "p") return XNodeFilter.DontSelectNode; XAttribute xa = xe2.Attribute("class"); if (xa == null) return XNodeFilter.DontSelectNode; if (xa.Value != "submeta") return XNodeFilter.DontSelectNode; //return XNodeFilter.SkipNode; return XNodeFilter.Stop; }) .Select(node => ((XElement)node).Attribute("href").Value).Where(zurl.CheckUrl).ToArray(); //// <div id="postlist" class="postlist restrain"> //xe = xePost.XPathElement(".//div[@id='postlist']"); //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //date = date.Replace('\xA0', ' '); //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); //if (data.PostCreationDate == null) // pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1); //// <div class="postbody"> //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); //// get infos, description, language, size, nbPages //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); //data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; //data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.XPathValues(".//a/@href"); //if (__trace) // pb.Trace.WriteLine(data.zToJson()); }
protected override IPost GetData(LoadDataFromWeb_v4 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); Ebookdz_PostDetail data = new Ebookdz_PostDetail(); data.SourceUrl = loadDataFromWeb.WebRequest.HttpRequest.Url; data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; data.Id = GetPostDetailKey(loadDataFromWeb.WebRequest.HttpRequest); // <div class="body_bd"> XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']"); // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()", DownloadPrint.Trim); data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 string lowerTitle = null; if (data.Title != null) { lowerTitle = data.Title.ToLowerInvariant(); } //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return(text != "forum" && !text.EndsWith(lowerTitle)); }).Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(category); //Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // <div id="postlist" class="postlist restrain"> XXElement xe = xePost.XPathElement(".//div[@id='postlist']"); // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues(""); date = date.Replace('\xA0', ' '); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date); } //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.Trim); data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars); // <div class="postbody"> xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (loadDataFromWeb.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.Description = textValues.description; data.Language = textValues.language; data.Size = textValues.size; data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
// detail get data protected override MagazinesGratuits_PostDetail GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); MagazinesGratuits_PostDetail data = new MagazinesGratuits_PostDetail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Key = _GetDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, _lastPostDate); if (data.PostCreationDate != null) { _lastPostDate = new Date(data.PostCreationDate.Value); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); } data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Ebooks en Epub / Livre data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); xe = xePost.XPathElement(".//div[@class='entry']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; // force load image to get image width and height if (webResult.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } // get infos, description, language, size, nbPages // xe.DescendantTextList(".//p") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase)) if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) { return(XNodeFilter.Stop); } } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") { return(XNodeFilter.Stop); } } return(XNodeFilter.SelectNode); } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.DescendantNodes( // node => // { // if (!(node is XElement)) // return true; // XElement xe2 = (XElement)node; // if (xe2.Name != "p") // return true; // XAttribute xa = xe2.Attribute("class"); // if (xa == null) // return true; // if (xa.Value != "submeta") // return true; // return false; // }, // node => node is XElement && ((XElement)node).Name == "a") // .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) { return(XNodeFilter.DontSelectNode); } XElement xe2 = (XElement)node; if (xe2.Name == "a") { return(XNodeFilter.SelectNode); } if (xe2.Name != "p") { return(XNodeFilter.DontSelectNode); } XAttribute xa = xe2.Attribute("class"); if (xa == null) { return(XNodeFilter.DontSelectNode); } if (xa.Value != "submeta") { return(XNodeFilter.DontSelectNode); } //return XNodeFilter.SkipNode; return(XNodeFilter.Stop); }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); //// <div id="postlist" class="postlist restrain"> //xe = xePost.XPathElement(".//div[@id='postlist']"); //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //date = date.Replace('\xA0', ' '); //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); //if (data.PostCreationDate == null) // pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1); //// <div class="postbody"> //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); //// get infos, description, language, size, nbPages //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); //data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; //data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.XPathValues(".//a/@href"); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
// detail get data protected override TelechargerMagazine_PostDetail GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); TelechargerMagazine_PostDetail data = new TelechargerMagazine_PostDetail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetDetailKey(webResult.WebRequest.HttpRequest); // la date est juste la date du jour // <div id="calendar-layer"> // <table id="calendar" cellpadding="3" class="calendar"> // ... // <tr> // ... // <td class="day-active-v day-current" ><a class="day-active-v" href="http://www.telecharger-magazine.com/2015/07/17/" title="Article posté dans 17 Juillet 2015">17</a></td> // ... // </tr> // ... // </table> // </div> // <div id='dle-content'> // ... // <div class="right-full"> // // <div class="cat_name"> // Posted in: // <a href="http://www.telecharger-magazine.com/journaux/">Journaux</a> // </div> // // <h2 class="title"> // <img src="/templates/MStarter/images/title.png" alt="" class="img" /> // Journaux Français Du 17 Juillet 2015 // </h2> // // <div class="contenttext"> // la date est juste la date du jour // http://www.telecharger-magazine.com/2015/07/17/ //xeSource.XPathValue("//div[@id='calendar-layer']//table[@id='calendar']//td[@class='day-active-v day-current']//a/@href"); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']//div[@class='right-full']"); // Journaux data.Category = xePost.XPathValues(".//div[@class='cat_name']//a/text()").Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); data.Title = xePost.XPathValue(".//h2[@class='title']//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } XXElement xeContent = xePost.XPathElement(".//div[@class='contenttext']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xeContent.XPathValue(".//img/@src"))) }; // force load image to get image width and height //if (webResult.WebRequest.LoadImageFromWeb) // data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xeContent.DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); if (text.ToLowerInvariant() == "description") { return(XNodeFilter.DontSelectNode); } } if (node is XElement) { XElement xe = (XElement)node; if (xe.Name == "a") { return(XNodeFilter.Stop); } } return(XNodeFilter.SelectNode); } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title, extractValuesFromText: false); data.Description = textValues.description; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xeContent.DescendantNodes( node => { if (!(node is XElement)) { return(XNodeFilter.DontSelectNode); } XElement xe2 = (XElement)node; if (xe2.Name == "a") { return(XNodeFilter.SelectNode); } if (xe2.Name != "p") { return(XNodeFilter.DontSelectNode); } XAttribute xa = xe2.Attribute("class"); if (xa == null) { return(XNodeFilter.DontSelectNode); } if (xa.Value != "submeta") { return(XNodeFilter.DontSelectNode); } //return XNodeFilter.SkipNode; return(XNodeFilter.Stop); }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xeContent.XPathValues(".//a/@href").ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
protected override RapideDdl_PostDetail GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = loadDataFromWeb.request.Url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GetPostDetailKey(data.sourceUrl); XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(DownloadPrint.TrimFunc1).Where( data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(DownloadPrint.Trim).Where( s => { s = s.ToLowerInvariant(); return(s != "" && !s.Contains("acceuil") && !s.Contains("accueil")); } ).zToStringValues("/"); string category = data.category.ToLowerInvariant(); data.printType = GetPostType(category); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: DownloadPrint.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(DownloadPrint.Trim).LastOrDefault(); //ExtractTitleInfos(data); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title); if (titleInfos.foundInfo) { data.originalTitle = data.title; data.title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); string date = xe.XPathValue(".//span[@class='date']//text()"); //data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(date, loadDataFromWeb.loadFromWebDate); data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date); } data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); if (loadDataFromWeb.request.LoadImage) { data.images = DownloadPrint.LoadImages(data.images).ToArray(); } //RapideDdl.SetTextValues(data, xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a" )); // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); List <string> downloadLinks = new List <string>(); foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } data.downloadLinks = downloadLinks.ToArray(); //if (__trace) // RapideDdl_LoadPostDetail.Trace_RapideDdl_PostDetail(data); return(data); }
// detail get data protected override Test_PostDetail GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); Test_PostDetail data = new Test_PostDetail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); //string[] dates = xe.DescendantTextList(".//td[@id='head-date']", func: Vosbooks.TrimFunc1).ToArray(); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, _lastPostDate); if (data.PostCreationDate != null) { _lastPostDate = new Date(data.PostCreationDate.Value); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); } data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); xe = xePost.XPathElement(".//div[@class='entry']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; // force load image to get image width and height //if (webResult.WebRequest.LoadImage) // data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase)) if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) { return(XNodeFilter.Stop); } } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") { return(XNodeFilter.Stop); } } return(XNodeFilter.SelectNode); } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) { return(XNodeFilter.DontSelectNode); } XElement xe2 = (XElement)node; if (xe2.Name == "a") { return(XNodeFilter.SelectNode); } if (xe2.Name != "p") { return(XNodeFilter.DontSelectNode); } XAttribute xa = xe2.Attribute("class"); if (xa == null) { return(XNodeFilter.DontSelectNode); } if (xa.Value != "submeta") { return(XNodeFilter.DontSelectNode); } //return XNodeFilter.SkipNode; return(XNodeFilter.Stop); }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
//protected override Telechargementz_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); Telechargementz_PostDetail data = new Telechargementz_PostDetail(); data.SourceUrl = loadDataFromWeb.request.Url; data.LoadFromWebDate = loadDataFromWeb.loadFromWebDate; data.Id = GetPostDetailKey(data.SourceUrl); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); data.PostAuthor = xePost.XPathValue(".//div[@class='title-info']//a//text()"); // , 26.12.14 string date = xePost.XPathValue(".//div[@class='title-info']//a/following-sibling::text()"); if (date != null) { data.PostCreationDate = zdate.ParseDateTimeLikeToday(date.Trim(' ', ','), loadDataFromWeb.loadFromWebDate, "dd.MM.yy"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date); } } else { pb.Trace.WriteLine("creationDate not found \"{0}\"", data.SourceUrl); } //data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTextList(func: DownloadPrint.TrimFunc1).FirstOrDefault(); data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTexts().Select(DownloadPrint.Trim).FirstOrDefault(); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } XXElement xe = xePost.XPathElement(".//div[starts-with(@id, 'news-id-')]"); if (xe.XElement == null) { pb.Trace.WriteLine("element not found \".//div[starts-with(@id, 'news-id-')]\""); } //data.Images = new List<UrlImage>(); //data.Images.Add(xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault()); //data.Images = new UrlImage[] { xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault() }; WebImage image = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault(); if (image != null) { data.Images = new WebImage[] { image } } ; // force load image to get image width and height if (loadDataFromWeb.request.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray(); //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); //string category = data.category.ToLowerInvariant(); //data.printType = GetPrintType(category); ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // get infos, description, language, size, nbPages // nodeFilter: not <a> and not <span> // nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span") // nodeFilter: not <a> //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); data.PrintType = PrintType.UnknowEBook; if (data.infos.ContainsKey("Bd") || data.infos.ContainsKey("bd") || data.infos.ContainsKey("BD")) { data.PrintType = PrintType.Comics; } // Editeur : Presse fr else if (data.infos.ContainsKey("editeur") && data.infos["editeur"] is ZString && ((string)data.infos["editeur"]).ToLowerInvariant() == "presse fr") { data.PrintType = PrintType.Print; } else if (data.infos.ContainsKey("isbn")) { data.PrintType = PrintType.Book; } //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span"), returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
//protected override GoldenDdl_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); GoldenDdl_PostDetail data = new GoldenDdl_PostDetail(); data.sourceUrl = loadDataFromWeb.request.Url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GetPostDetailKey(data.sourceUrl); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='hdiin']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.category.ToLowerInvariant(); data.printType = GetPrintType(category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); //data.title = xePost.XPathValue(".//div[@class='bheading']//text()", DownloadPrint.Trim); data.title = xePost.XPathValue(".//div[@class='bheading']//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title); if (titleInfos.foundInfo) { data.originalTitle = data.title; data.title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } string date = xePost.XPathValue(".//div[@class='datenews']//text()"); data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date); } data.postAuthor = xePost.XPathValue(".//div[@class='argr']//a//text()"); XXElement xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (loadDataFromWeb.request.LoadImage) { data.images = DownloadPrint.LoadImages(data.images).ToArray(); } // get infos, description, language, size, nbPages //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.title); PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); List <string> downloadLinks = new List <string>(); foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } data.downloadLinks = downloadLinks.ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }