public static RapideDdl_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { XXElement xeSource = new XXElement(request.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = request.Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); //data.title = RapideDdl.ExtractTextValues(data.infos, xePost.XPathValue(".//div[@class='base fullstory']//text()", RapideDdl.TrimFunc1)); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", node => !(node is XElement) || ((XElement)node).Name != "a", RapideDdl.TrimFunc1).FirstOrDefault(); //data.title = xePost.XPathValue(".//div[@class='spbar']/text()", RapideDdl.TrimFunc1); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); //////////////data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)data.loadFromWebDate); data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(request.Url, nodeFilter: node => node is XElement && ((XElement)node).Name == "a"); //data.images = xe.XPathImages(request.Url); //data.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url)).ToList(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).ToList(); //if (request.LoadImage) // force load image to get image width and height pb.old.Http_v2.LoadImageFromWeb(data.images); //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); //data.SetTextValues(xe.DescendantTextList(".//div")); data.SetTextValues(xe.XPathElements(".//div").DescendantTexts()); //data.downloadLinks.AddRange(xe.XPathValues(".//div[2]//a/@href")); //foreach (XXElement xe2 in xe.XPathElements("div/div").Skip(1)) foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 //data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.StartsWith("http://www.zupmage.eu"))); data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } return(data); }
public static RapideDdl_HeaderPage LoadHeaderPageFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { // loadDataFromWeb XXElement xeSource = new XXElement(request.GetXmlDocument().Root); string url = request.Url; RapideDdl_HeaderPage data = new RapideDdl_HeaderPage(); //data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href")); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <RapideDdl_PostHeader> headers = new List <RapideDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { RapideDdl_PostHeader header = new RapideDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); // xe.XPathValue(".//text()", Download.Print.RapideDdl.RapideDdl.TrimFunc1) /////////////////////////////////header.title = Download.Print.RapideDdl.RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars)); //xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); xe = xeHeader.XPathElement(".//div[@class='shdinf']"); header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); // Aujourd'hui, 17:13 ////////////////////////////////header.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)header.loadFromWebDate); //xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).ToList(); //if (request.LoadImage) // Http2.LoadImageFromWeb(header.images); //header.SetTextValues(xe.DescendantTextList()); header.SetTextValues(xe.DescendantTexts()); xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(RapideDdl.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //header.category = xe.DescendantTextList(".//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); header.category = xe.XPathElements(".//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }
protected override FreeTelechargement_HeaderPage GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { throw new PBException("attention mismatch between free-telechargement.org and golden-ddl.net"); #pragma warning disable 162 XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; FreeTelechargement_HeaderPage data = new FreeTelechargement_HeaderPage(); // <div class="pagination"> data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='pagination']//a[starts-with(text(), 'suiv ')]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='content']//table//a"); List <FreeTelechargement_PostHeader> headers = new List <FreeTelechargement_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { FreeTelechargement_PostHeader header = new FreeTelechargement_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue("@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); header.images = xeHeader.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); #pragma warning restore 162 }
protected override RapideDdl_HeaderPage GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; RapideDdl_HeaderPage data = new RapideDdl_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = RapideDdl_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <RapideDdl_PostHeader> headers = new List <RapideDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { RapideDdl_PostHeader header = new RapideDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //header.title = xe.XPathValue(".//text()", DownloadPrint.Trim); header.title = xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(header.title); if (titleInfos.foundInfo) { //header.originalTitle = header.title; header.title = titleInfos.title; header.infos.SetValues(titleInfos.infos); } xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); // Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); string date = xe.XPathValue(".//span[@class='date']//text()"); header.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (header.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", header.creationDate, date); } xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray(); //if (request.LoadImage) // Http2.LoadImageFromWeb(header.images); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), header.title); header.description = textValues.description; header.language = textValues.language; header.size = textValues.size; header.nbPages = textValues.nbPages; header.infos.SetValues(textValues.infos); xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(DownloadPrint.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); header.category = xe.XPathElements(".//a").DescendantTexts().Select(DownloadPrint.Trim).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }
private static ExtremeDown_PostDetail_v2 GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); ExtremeDown_PostDetail_v2 data = new ExtremeDown_PostDetail_v2(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPostDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); //data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()", DownloadPrint.Trim); data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } XXElement xeDiv = xePost.XPathElement(".//div[@class='blockheader']"); data.Category = xeDiv.XPathValues(".//i[@class='icon-cats']/ancestor::span//a//text()").Select(DownloadPrint.Trim).zToStringValues("/"); //string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(data.Category); data.PostAuthor = xeDiv.XPathValue(".//span/i[@class='icon-user']/ancestor::span//a//text()"); string date = xeDiv.XPathValue(".//span/i[@class='icon-date']/ancestor::span//a//text()"); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date); } xeDiv = xePost.XPathElement(".//div[@class='blockcontent']"); List <string> description = new List <string>(); description.AddRange(xeDiv.XPathValues(".//p[@class='release-name']//text()")); //data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (webResult.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } description.AddRange(xeDiv.XPathValues(".//table//td//blockquote//text()")); //xeDiv = xePost.XPathElement(".//div[@class='clearfix']"); xeDiv = xePost.XPathElement(".//div[@class='upload-infos clearfix']"); description.AddRange(xeDiv.XPathValues(".//table//text()")); data.Description = description.ToArray(); string title = null; // xePost.XPathElements(".//script/parent::div//following-sibling::h2") Func <XXElement, XNodeFilter> filter = xe => { if (xe.XElement.Name == "h2") { title = xe.XPathValue(".//text()"); } else if (xe.XElement.Name == "script") { return(XNodeFilter.Stop); } else if (xe.XElement.Name == "div") { return(XNodeFilter.SelectNode); } return(XNodeFilter.DontSelectNode); }; foreach (XXElement xe in xePost.XPathElements(".//div[@class='prez_2']//following-sibling::*").zFilterElements(filter)) { //string s = xe.XPathValue(".//text()"); //// Liens de téléchargement - Pack 1 //if (s.StartsWith("Liens de téléchargement")) //{ // s = s.Substring(23).Trim(' ', '-'); // if (s == "") // s = title; // else if (title != null) // s = title + " - " + s; // title = null; data.DownloadLinks_new.AddItem(title); title = null; //foreach (XXElement xe2 in xe.XPathElements("following-sibling::div[1]//a")) foreach (XXElement xe2 in xe.XPathElements(".//a")) { //s = xe2.DescendantTextList().FirstOrDefault(); // <strong class="hebergeur"> string server = xe2.XPathValue(".//strong[@class='hebergeur']//text()"); string link = xe2.XPathValue("@href"); if (__getLinksExtremeProtect && __extremeProtect.IsLinkProtected(link)) { data.DownloadLinks_new.AddServer(server, link); data.DownloadLinks_new.AddLinks(__extremeProtect.UnprotectLink(link)); } else { data.DownloadLinks_new.AddServer(server); data.DownloadLinks_new.AddLink(link); } } //} //else if (s != null) // title = s; } //xeDiv = xePost.XPathElement(".//div[@class='blockfooter links']"); ////data.category = xeDiv.DescendantTextList(".//i[@class='icon-cats']/parent::span//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); //data.Category = xeDiv.XPathElements(".//i[@class='icon-cats']/parent::span//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); //string category = data.Category.ToLowerInvariant(); //data.PrintType = GetPrintType(category); ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
protected override IPost GetData(LoadDataFromWeb_v4 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); Ebookdz_PostDetail data = new Ebookdz_PostDetail(); data.SourceUrl = loadDataFromWeb.WebRequest.HttpRequest.Url; data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; data.Id = GetPostDetailKey(loadDataFromWeb.WebRequest.HttpRequest); // <div class="body_bd"> XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']"); // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()", DownloadPrint.Trim); data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 string lowerTitle = null; if (data.Title != null) { lowerTitle = data.Title.ToLowerInvariant(); } //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return(text != "forum" && !text.EndsWith(lowerTitle)); }).Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(category); //Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // <div id="postlist" class="postlist restrain"> XXElement xe = xePost.XPathElement(".//div[@id='postlist']"); // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues(""); date = date.Replace('\xA0', ' '); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date); } //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.Trim); data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars); // <div class="postbody"> xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (loadDataFromWeb.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.Description = textValues.description; data.Language = textValues.language; data.Size = textValues.size; data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
protected override RapideDdl_PostDetail GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = loadDataFromWeb.request.Url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GetPostDetailKey(data.sourceUrl); XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(DownloadPrint.TrimFunc1).Where( data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(DownloadPrint.Trim).Where( s => { s = s.ToLowerInvariant(); return(s != "" && !s.Contains("acceuil") && !s.Contains("accueil")); } ).zToStringValues("/"); string category = data.category.ToLowerInvariant(); data.printType = GetPostType(category); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: DownloadPrint.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(DownloadPrint.Trim).LastOrDefault(); //ExtractTitleInfos(data); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title); if (titleInfos.foundInfo) { data.originalTitle = data.title; data.title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); string date = xe.XPathValue(".//span[@class='date']//text()"); //data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(date, loadDataFromWeb.loadFromWebDate); data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date); } data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); if (loadDataFromWeb.request.LoadImage) { data.images = DownloadPrint.LoadImages(data.images).ToArray(); } //RapideDdl.SetTextValues(data, xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a" )); // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); List <string> downloadLinks = new List <string>(); foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } data.downloadLinks = downloadLinks.ToArray(); //if (__trace) // RapideDdl_LoadPostDetail.Trace_RapideDdl_PostDetail(data); return(data); }
public static ZoneEbooks_HeaderPage LoadHeaderPageFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { XXElement xeSource = new XXElement(request.GetXmlDocument().Root); //string url = request.Url; ZoneEbooks_HeaderPage data = new ZoneEbooks_HeaderPage(); // post list : // <div id="post-1838" class="post-1838 post type-post status-publish format-standard hentry category-journaux tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ebook tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ebook-gratuit tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-gratuit tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-telechargement tag-telecharge-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ddl tag-telecharge-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-uptobox tag-telechargement-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre tag-telecharger-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre tag-telecharger-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ebook tag-telecharger-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf clear-block count-1 odd author-admin first"> // _hxr.ReadSelect("//div[starts-with(@id, 'post-')]:.:EmptyRow"); // next page : // <a href='http://zone-ebooks.com/page/2' class='nextpostslink'>»</a> // _hxr.ReadSelect("//a[@class='nextpostslink']:.:EmptyRow", "./@href"); data.urlNextPage = zurl.GetUrl(request.Url, xeSource.XPathValue("//a[@class='nextpostslink']/@href")); Trace.WriteLine("urlNextPage \"{0}\"", data.urlNextPage); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[starts-with(@id, 'post-')]"); List <ZoneEbooks_PostHeader> headers = new List <ZoneEbooks_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { ZoneEbooks_PostHeader header = new ZoneEbooks_PostHeader(); header.sourceUrl = request.Url; header.loadFromWebDate = DateTime.Now; //<h2 class="title"> // <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html" // rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre"> // Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre</a> //</h2> XXElement xe = xeHeader.XPathElement(".//*[@class='title']//a"); header.urlDetail = xe.XPathValue("@href"); header.title = xe.XPathValue(".//text()"); //<div class="post-date"> // <span class="ext">Il y a 2 heures</span> //</div> string postDate = xeHeader.XPathValue(".//div[@class='post-date']//text()"); //WriteLine("post date \"{0}\"", postDate); //Il y a 57 secondes //Il y a 3 minutes //Il y a 1 heure //Il y a 1 jour //Il y a 2 semaines //Il y a 2 mois if (postDate != null) { header.infos.Add("postDate", new ZString(postDate)); } //<div class="post-info"> // <span class="a">par // <a href="http://zone-ebooks.com/author/admin" title="Articles par admin "> // admin // </a> // </span> // dans // <a href="http://zone-ebooks.com/category/journaux" rel="tag" title="Journaux (158 sujets)">Journaux</a> //</div> xe = xeHeader.XPathElement(".//div[@class='post-info']"); header.author = xe.XPathValue(".//a//text()"); header.category = xe.XPathValue("./a//text()"); //<div class="post-content clear-block"> xe = xeHeader.XPathElement(".//div[starts-with(@class, 'post-content')]"); //<img title="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre PDF" // alt="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre PDF" // src="http://i.imgur.com/f7aWDHF.jpg" width="362" height="446" /> //header.images = xe.XPathImages(request.Url, __imagesToSkip); //header.images = xe.XPathImages(request.Url, imageHtml => !__imagesToSkip.ContainsKey(imageHtml.Source)); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url), imageHtml => !__imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url), imageHtml => !__imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).Where(imageHtml => !__imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); if (request.LoadImage) { pb.old.Http_v2.LoadImageFromWeb(header.images); } // image "infos sur le livre" http://i.imgur.com/GTPfRoB.png // image "description" http://i.imgur.com/Ruuh4CP.png //********************************************************************************************************************************************************************************** // pb image "infos sur le livre" // zone-ebooks.com_img_info_livre_02_02.html // zone-ebooks.com_img_info_livre_02_02.xml // <div style="text-align: center;"> // image ok // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/RHWAvUQ.jpg" /> // <p> // image "infos sur le livre" // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/GTPfRoB.png" /> // </p> // ... // <p> // image "description" // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/Ruuh4CP.png" /> // </p> //********************************************************************************************************************************************************************************** headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }
protected override TelechargementPlus_PostDetail GetData() { XXElement xeSource = new XXElement(GetXmlDocument().Root); TelechargementPlus_PostDetail data = new TelechargementPlus_PostDetail(); data.sourceUrl = Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); XXElement xe = xePost.XPathElement(".//div[@class='heading']//div[@class='binner']"); //data.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(data.infos, xe.XPathValue(".//text()"))); // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1) data.title = TelechargementPlus.ExtractTextValues(data.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()"))); data.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//a//text()")); //data.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); data.category = xe.XPathElements(".//div[@class='storeinfo']").DescendantTexts().Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //TelechargementPlus_Print print = new TelechargementPlus_Print(); //print.url = Url; //print.loadFromWebDate = DateTime.Now; //data.infos.SetValues(data.infos); //<div class="base"> // <div class="heading"> // <div class="binner"> // <h1> // Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct] Gratuit</h1> // <div class="storeinfo"> // <a href="http://www.telechargement-plus.com/2013/10/14/">Aujourd'hui, 11:59</a> // | Catégorie: // <a href="http://www.telechargement-plus.com/e-book-magazines/">E-Book / Magazines</a>, // <a href="http://www.telechargement-plus.com/e-book-magazines/journaux/">Journaux</a>, // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">Magazines</a> // <!-- | Views: 16--> // </div> // </div> // </div> // <div class="maincont"> // <div class="binner"> // <div class="shortstory"> // <div class="story-text"> // <center> // <span id="post-img"> // <img src="/templates/film-gratuit/images/prez/livre.png" alt="E-Book / Magazines, Journaux, Magazines" /> // </span> // </center> // <span id="post-img"> // <div style="text-align: center;"> // <br /> // <!--dle_image_begin:http://www.hapshack.com/images/TX72Y.jpg|--> // <img src="http://www.hapshack.com/images/TX72Y.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // <br /> // <br /> // <b>Editeur :</b> Presse Fr<br /> // <b>Date de sortie :</b> 2013 // <br /> // <b>H�bergeur : </b>Multi / // <b> // <!--colorstart:#FF0000--> // <span style="color: #FF0000"> // <!--/colorstart--> // [Link Direct]<!--colorend--> // </span><!--/colorend--> // </b> // <br /> // <br /> // <!--dle_image_begin:http://prezup.eu/prez/infossurlebook.png|--> // <img src="http://prezup.eu/prez/infossurlebook.png" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // <br /> // <br /> // <b>Advanced Cr�ation Photoshop HS�rie N�19 - Novembre 2013 [Lien Direct]</b> // <br /> // PDF | French | 186 pages | 100 MB<br /> // <br /> // <b>Le CD | zipper/22 Fichiers &+ | 520 MB</b><br /> // 37 Projets complets<br /> // SAVOIR TOUT FAIRE : Avec Photoshop Volume XIII<br /> // SPECIAL PHOTOMONTAGE & PEINTURE NUMERIQUE<br /> // BONUS : 2 Tutoriels Illustrator<br /> // / / / // <br /> // <br /> // </div> // </span> // <span id="post-img"> // <div id="news-id-86887" style="display: inline;"> // *<br /> // *<br /> // *<br /> // <div style="text-align: center;"> // <b> // <!--sizestart:6--> // <span style="font-size: 24pt;"> // <!--/sizestart--> // <!--colorstart:#FF6600--> // <span style="color: #FF6600"> // <!--/colorstart--> // Cloudzer<!--colorend--> // </span><!--/colorend--><!--sizeend--> // </span><!--/sizeend--> // = // <!--colorstart:#FF0000--> // <span style="color: #FF0000"> // <!--/colorstart--> // [Link Direct]<!--colorend--> // </span><!--/colorend--> // </b> // <br /> // <br /> // <a href="http://clz.to/q83zrwga" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/0THnp.gif|--> // <img src="http://www.hapshack.com/images/0THnp.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <a href="http://ul.to/ukqruco3" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/9MfYk.gif|--> // <img src="http://www.hapshack.com/images/9MfYk.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // <a href="http://hulkfile.eu/gap3aafrlmaj.html" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/Js84x.jpg|--> // <img src="http://www.hapshack.com/images/Js84x.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // <a href="http://turbobit.net/blki3znuvzeg.html" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/QYeW0.gif|--> // <img src="http://www.hapshack.com/images/QYeW0.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // *<br /> // *<br /> // <b>Le CD &+ : </b> // <br /> // http://clz.to/o58urag6<br /> // http://ul.to/rpqjypm4<br /> // http://hulkfile.eu/i2k3bbz835zg.html<br /> // http://turbobit.net/v644k3dd8izl.html<br /> // <br /> // <br /> // Bonne lecture<br /> // ************* // </div> // </div> // </span> //XXElement xe = _xePost.XPathElement(".//div[@class='heading']//div[@class='binner']"); //_post.title = _print.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(_print.infos, xe.XPathValue(".//text()"))); //string postDate = xe.XPathValue(".//a//text()"); ////WriteLine("postDate : \"{0}\"", postDate); //// Aujourd'hui, 17:13 ////if (postDate != null) //// _print.infos.SetValue("postDate", new ZString(postDate)); ////_print.creationDate = FrboardPrint.GetDateTime(date.Trim(_trimAll), time.Trim(_trimAll)); //_post.creationDate = TelechargementPlus.ParseDateTime(postDate); //_print.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(s => TelechargementPlus.TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //print.title = data.title; //print.category = data.category; xe = xePost.XPathElement(".//div[@class='maincont']//div[@class='binner']//div[@class='story-text']"); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, TelechargementPlus.ImagesToSkip, node => node is XElement && ((XElement)node).Name == "a" ? false : true); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), node => node is XElement && ((XElement)node).Name == "a" ? false : true); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), // node => node is XElement && ((XElement)node).Name == "a" ? false : true).ToList(); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), // node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode).ToList(); data.images = xe.XPathElements(".//span[@id='post-img']") .DescendantNodes(node => XmlDescendant.ImageFilter(node, node2 => node2 is XElement && ((XElement)node2).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode)) .Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, Url)) .Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)) .ToList(); if (_loadImage) { pb.old.Http_v2.LoadImageFromWeb(data.images); } //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); data.SetTextValues(xe.XPathElements(".//span[@id='post-img']").DescendantTexts(node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode)); data.downloadLinks.AddRange(xe.XPathValues(".//span[@id='post-img']//a/@href")); ////<h1 class="shd"> //// <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html"> //// [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013 //// </a> ////</h1> //XXElement xe = xePost.XPathElement(".//*[@class='shd']//a"); //_print.url = xe.XPathValue("@href"); //_print.title = TrimString(ExtractTextValues(xe.XPathValue(".//text()"))); ////<div class="shdinf"> //// <div class="shdinf"> //// <span class="rcol">Auteur: //// <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/"> //// bakafa //// </a> //// </span> //// <span class="date"> //// <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b> //// </span> //// <span class="lcol">Catégorie: //// <a href="http://www.telechargement-plus.com/e-book-magazines/"> //// E-Book / Magazines //// </a> » //// <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/"> //// Magazines //// </a> //// </span> //// </div> ////</div> //xe = xePost.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); //_print.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()"); //string postDate = xe.XPathValue(".//span[@class='date']//text()"); //// Aujourd'hui, 17:13 //if (postDate != null) // _print.infos.SetValue("postDate", new ZString(postDate)); //_print.category = xe.DescendantTextList(".//span[@class='lcol']").Select(s => TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); ////.zForEach(s => s.Trim()) ////<span id="post-img"> //// <div id="news-id-86236" style="display: inline;"> //// <div style="text-align: center;"> //// <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|--> //// <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" //// title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end--> //// <br /> //// <b> //// <br /> //// Ici Paris N°3562 - 9 au 15 Octobre 2013<br /> //// French | 52 pages | HQ PDF | 101 MB //// </b> //// <br /> //// <br /> //// Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font //// l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les //// coulisses de la télé) indiscrétions, potins.<br /> //// </div> //// </div> ////</span> //xe = xePost.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); //_print.images = xe.XPathImages(".//img", _imagesToSkip); //if (_loadImage) // Http2.LoadImageFromWeb(_print.images); return(data); }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new ZoneEbooksPostHeader(); _postHeader.sourceUrl = _sourceUrl; //<h2 class="title"> // <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html" // rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre"> // Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre</a> //</h2> XXElement xe = xeArticle.XPathElement(".//*[@class='title']//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); //<div class="post-date"> // <span class="ext">Il y a 2 heures</span> //</div> string postDate = xeArticle.XPathValue(".//div[@class='post-date']//text()"); //WriteLine("post date \"{0}\"", postDate); //Il y a 57 secondes //Il y a 3 minutes //Il y a 1 heure //Il y a 1 jour //Il y a 2 semaines //Il y a 2 mois if (postDate != null) { _postHeader.infos.Add("postDate", new ZString(postDate)); } //<div class="post-info"> // <span class="a">par // <a href="http://zone-ebooks.com/author/admin" title="Articles par admin "> // admin // </a> // </span> // dans // <a href="http://zone-ebooks.com/category/journaux" rel="tag" title="Journaux (158 sujets)">Journaux</a> //</div> xe = xeArticle.XPathElement(".//div[@class='post-info']"); _postHeader.postAuthor = xe.XPathValue(".//a//text()"); _postHeader.category = xe.XPathValue("./a//text()"); //<div class="post-content clear-block"> xe = xeArticle.XPathElement(".//div[starts-with(@class, 'post-content')]"); //<img title="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre PDF" // alt="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre PDF" // src="http://i.imgur.com/f7aWDHF.jpg" width="362" height="446" /> //_postHeader.images = xe.XPathImages(".//img", _url, _imagesToSkip); //_postHeader.images = xe.XPathImages(_url, _imagesToSkip); //_postHeader.images = xe.XPathImages(_url, imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)); //_postHeader.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, _url), imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); //_postHeader.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, _url), imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); _postHeader.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, _url)).Where(imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); if (_loadImage) { pb.old.Http_v2.LoadImageFromWeb(_postHeader.images); } // image "infos sur le livre" http://i.imgur.com/GTPfRoB.png // image "description" http://i.imgur.com/Ruuh4CP.png //********************************************************************************************************************************************************************************** // pb image "infos sur le livre" // zone-ebooks.com_img_info_livre_02_02.html // zone-ebooks.com_img_info_livre_02_02.xml // <div style="text-align: center;"> // image ok // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/RHWAvUQ.jpg" /> // <p> // image "infos sur le livre" // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/GTPfRoB.png" /> // </p> // ... // <p> // image "description" // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/Ruuh4CP.png" /> // </p> //********************************************************************************************************************************************************************************** //xe = xeArticle.XPathElement(".//div[@class='entry_top']"); //_postHeader.image = xe.XPathImage(".//img"); //foreach (string s in xe.XElement.zDescendantTexts()) // _postHeader.SetInfo(s); //if (_loadImage && _postHeader.image.Source != null) // Http2.LoadImageFromWeb(_postHeader.image.Source); //xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
//protected override Telechargementz_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); Telechargementz_PostDetail data = new Telechargementz_PostDetail(); data.SourceUrl = loadDataFromWeb.request.Url; data.LoadFromWebDate = loadDataFromWeb.loadFromWebDate; data.Id = GetPostDetailKey(data.SourceUrl); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); data.PostAuthor = xePost.XPathValue(".//div[@class='title-info']//a//text()"); // , 26.12.14 string date = xePost.XPathValue(".//div[@class='title-info']//a/following-sibling::text()"); if (date != null) { data.PostCreationDate = zdate.ParseDateTimeLikeToday(date.Trim(' ', ','), loadDataFromWeb.loadFromWebDate, "dd.MM.yy"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date); } } else { pb.Trace.WriteLine("creationDate not found \"{0}\"", data.SourceUrl); } //data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTextList(func: DownloadPrint.TrimFunc1).FirstOrDefault(); data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTexts().Select(DownloadPrint.Trim).FirstOrDefault(); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } XXElement xe = xePost.XPathElement(".//div[starts-with(@id, 'news-id-')]"); if (xe.XElement == null) { pb.Trace.WriteLine("element not found \".//div[starts-with(@id, 'news-id-')]\""); } //data.Images = new List<UrlImage>(); //data.Images.Add(xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault()); //data.Images = new UrlImage[] { xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault() }; WebImage image = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault(); if (image != null) { data.Images = new WebImage[] { image } } ; // force load image to get image width and height if (loadDataFromWeb.request.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray(); //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); //string category = data.category.ToLowerInvariant(); //data.printType = GetPrintType(category); ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // get infos, description, language, size, nbPages // nodeFilter: not <a> and not <span> // nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span") // nodeFilter: not <a> //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); data.PrintType = PrintType.UnknowEBook; if (data.infos.ContainsKey("Bd") || data.infos.ContainsKey("bd") || data.infos.ContainsKey("BD")) { data.PrintType = PrintType.Comics; } // Editeur : Presse fr else if (data.infos.ContainsKey("editeur") && data.infos["editeur"] is ZString && ((string)data.infos["editeur"]).ToLowerInvariant() == "presse fr") { data.PrintType = PrintType.Print; } else if (data.infos.ContainsKey("isbn")) { data.PrintType = PrintType.Book; } //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span"), returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
//protected override GoldenDdl_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); GoldenDdl_PostDetail data = new GoldenDdl_PostDetail(); data.sourceUrl = loadDataFromWeb.request.Url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GetPostDetailKey(data.sourceUrl); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='hdiin']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.category.ToLowerInvariant(); data.printType = GetPrintType(category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); //data.title = xePost.XPathValue(".//div[@class='bheading']//text()", DownloadPrint.Trim); data.title = xePost.XPathValue(".//div[@class='bheading']//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title); if (titleInfos.foundInfo) { data.originalTitle = data.title; data.title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } string date = xePost.XPathValue(".//div[@class='datenews']//text()"); data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date); } data.postAuthor = xePost.XPathValue(".//div[@class='argr']//a//text()"); XXElement xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (loadDataFromWeb.request.LoadImage) { data.images = DownloadPrint.LoadImages(data.images).ToArray(); } // get infos, description, language, size, nbPages //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.title); PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); List <string> downloadLinks = new List <string>(); foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } data.downloadLinks = downloadLinks.ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
//public static TelechargementPlus_HeaderPage Load_old(string url, HttpRequestParameters requestParameters = null, bool reload = false, bool loadImage = false) //{ // RequestFromWeb request = new RequestFromWeb(url, requestParameters, reload, loadImage); // return _loadHeaderPage_old.Load(request); //} public static TelechargementPlus_HeaderPage LoadHeaderPageFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { // loadDataFromWeb XXElement xeSource = new XXElement(request.GetXmlDocument().Root); string url = request.Url; TelechargementPlus_HeaderPage data = new TelechargementPlus_HeaderPage(); // post list : // <div class="base shortstory"> // _hxr.ReadSelect("//div[@class='base shortstory']:.:EmptyRow", ".//text()"); // next page : // <div class="navigation"> // <div align="center"> // <span>Prev.</span> // <span>1</span> // <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">2</a> // ... // <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">Next</a> // </div> // </div> // _hxr.ReadSelect("//div[@class='navigation']//a[text()='Next']:.:EmptyRow", "text()", "@href"); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <TelechargementPlus_PostHeader> headers = new List <TelechargementPlus_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { TelechargementPlus_PostHeader header = new TelechargementPlus_PostHeader(); //_postHeader.sourceUrl = _sourceUrl; header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; //<h1 class="shd"> // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html"> // [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013 // </a> //</h1> XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(header.infos, xe.XPathValue(".//text()"))); // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1) header.title = TelechargementPlus.ExtractTextValues(header.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()"))); //<div class="shdinf"> // <div class="shdinf"> // <span class="rcol">Auteur: // <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/"> // bakafa // </a> // </span> // <span class="date"> // <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b> // </span> // <span class="lcol">Catégorie: // <a href="http://www.telechargement-plus.com/e-book-magazines/"> // E-Book / Magazines // </a> » // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/"> // Magazines // </a> // </span> // </div> //</div> xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); header.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()"); //string postDate = xe.XPathValue(".//span[@class='date']//text()"); // Aujourd'hui, 17:13 //if (postDate != null) // _postHeader.infos.SetValue("postDate", new ZString(postDate)); header.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()")); //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); header.category = xe.XPathElements(".//span[@class='lcol']").DescendantTexts().Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //Trace.CurrentTrace.WriteLine("post header category \"{0}\"", _postHeader.category); //.zForEach(s => s.Trim()) //<span id="post-img"> // <div id="news-id-86236" style="display: inline;"> // <div style="text-align: center;"> // <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|--> // <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" // title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end--> // <br /> // <b> // <br /> // Ici Paris N°3562 - 9 au 15 Octobre 2013<br /> // French | 52 pages | HQ PDF | 101 MB // </b> // <br /> // <br /> // Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font // l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les // coulisses de la télé) indiscrétions, potins.<br /> // </div> // </div> //</span> xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); //_postHeader.images = xe.XPathImages(".//img", _url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList(); if (request.LoadImage) { pb.old.Http_v2.LoadImageFromWeb(header.images); } //header.SetTextValues(xe.DescendantTextList()); header.SetTextValues(xe.DescendantTexts()); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }