//public TData LoadData(WebRequest webRequest) public WebDataResult <TData> LoadData(WebRequest webRequest) { WebResult loadDataFromWeb = Load(webRequest); TData data; if (loadDataFromWeb.LoadResult) { //return _getData(loadDataFromWeb); data = _getData(loadDataFromWeb); //if (webRequest.LoadImageFromWeb || webRequest.LoadImageToData || webRequest.RefreshImage) //{ // if (!(data is ILoadImages)) // throw new PBException($"{typeof(TData).zGetTypeName()} is not ILoadImages"); // ((ILoadImages)data).LoadImages(WebImageRequest.FromWebRequest(webRequest)); //} //return data; } else { //return default(TData); data = default(TData); } return(new WebDataResult <TData> { Request = webRequest, Result = loadDataFromWeb, Data = data }); }
public WebResult Load(WebRequest webRequest) { WebResult webResult = new WebResult { WebRequest = webRequest }; DateTime loadFromWebDate; HttpRequest httpRequest = webRequest.HttpRequest; if (_urlCache != null) { //string urlPath = _urlCache.GetUrlPath(httpRequest); webResult.UrlCachePathResult = _urlCache.GetUrlPathResult(httpRequest); string urlPath = webResult.UrlCachePathResult.Path; if (webRequest.ReloadFromWeb || !zFile.Exists(urlPath)) { _InitLoadFromWeb(httpRequest); if (!HttpManager.CurrentHttpManager.LoadToFile(httpRequest, urlPath, _urlCache.SaveRequest, _GetHttpRequestParameters())) return webResult; } httpRequest = new HttpRequest { Url = urlPath }; // get last write time as loadFromWebDate, dont take creation time because creation time is modified when copying the file //loadFromWebDate = new FileInfo(urlPath).LastWriteTime; loadFromWebDate = zFile.CreateFileInfo(urlPath).LastWriteTime; } else loadFromWebDate = DateTime.Now; _InitLoadFromWeb(httpRequest); webResult.Http = HttpManager.CurrentHttpManager.Load(httpRequest, _GetHttpRequestParameters()); if (webResult.Http != null) { webResult.LoadResult = true; webResult.LoadFromWebDate = loadFromWebDate; } return webResult; }
public WebResult Load(WebRequest webRequest) { WebResult webResult = new WebResult { WebRequest = webRequest }; DateTime loadFromWebDate; HttpRequest httpRequest = webRequest.HttpRequest; if (_urlCache != null) { //string urlPath = _urlCache.GetUrlPath(httpRequest); webResult.UrlCachePathResult = _urlCache.GetUrlPathResult(httpRequest); string urlPath = webResult.UrlCachePathResult.Path; if (webRequest.ReloadFromWeb || !zFile.Exists(urlPath)) { _InitLoadFromWeb(httpRequest); if (!HttpManager.CurrentHttpManager.LoadToFile(httpRequest, urlPath, _urlCache.SaveRequest, _GetHttpRequestParameters())) { return(webResult); } } httpRequest = new HttpRequest { Url = urlPath }; // get last write time as loadFromWebDate, dont take creation time because creation time is modified when copying the file //loadFromWebDate = new FileInfo(urlPath).LastWriteTime; loadFromWebDate = zFile.CreateFileInfo(urlPath).LastWriteTime; } else { loadFromWebDate = DateTime.Now; } _InitLoadFromWeb(httpRequest); webResult.Http = HttpManager.CurrentHttpManager.Load(httpRequest, _GetHttpRequestParameters()); if (webResult.Http != null) { webResult.LoadResult = true; webResult.LoadFromWebDate = loadFromWebDate; } return(webResult); }
//private static Ebookdz_HeaderPage GetData(WebResult webResult) private static IEnumDataPages<IHeaderData> GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Ebookdz_HeaderPage_v2 data = new Ebookdz_HeaderPage_v2(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; //data.Id = _GetPageKey(webResult.WebRequest.HttpRequest); // <div class="threadpagenav"> // <span class="prev_next"> // <a rel="next" href="forumdisplay.php?f=157&page=2&s=fec27f3bac2b58debbb727ab8725c8a4" title="Page suivante - Résultats de 21 à 40 sur 61"> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='threadpagenav']//span[@class='prev_next']//a[@rel='next']/@href")); List<Ebookdz_PostHeader> headers = new List<Ebookdz_PostHeader>(); // <div class="body_bd"> // <div id="threadlist" class="threadlist"> // <ol id="threads" class="threads"> // <li class="threadbit " id="thread_111977"> // <h3 class="threadtitle"> // <a title="" class="title" href="showthread.php?t=111977&s=fec27f3bac2b58debbb727ab8725c8a4" id="thread_title_111977">La Provence Marseille du lundi 26 janvier 2015</a> foreach (XXElement xeHeader in xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li")) { Ebookdz_PostHeader header = new Ebookdz_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//h3[@class='threadtitle']//a[@class='title']"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe.XPathValue("@href"))); headers.Add(header); } data.PostHeaders = headers.ToArray(); //Trace.WriteLine(data.zToJson()); return data; }
protected override IEnumDataPages<IHeaderData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; Predicate<string> filter = __forumFilter; List<EbookdzForumData> forums = new List<EbookdzForumData>(); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) continue; EbookdzForumData forum = new EbookdzForumData(); forum.SourceUrl = url; forum.LoadFromWebDate = webResult.LoadFromWebDate; forum.Name = name; forum.UrlDetail = GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); forums.Add(forum); } data.Headers = forums.ToArray(); return data; }
protected virtual IEnumDataPages<IHeaderData> GetDataPage(WebResult webResult) { throw new PBException("GetHeaderPageData() not implemented"); }
private static MagazinesGratuits_PostDetail_v2 GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); MagazinesGratuits_PostDetail_v2 data = new MagazinesGratuits_PostDetail_v2(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; //data.Id = _GetPostDetailKey(webResult.WebRequest.HttpRequest); data.Key = _GetPostDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, __lastPostDate); if (data.PostCreationDate != null) __lastPostDate = new Date(data.PostCreationDate.Value); if (__trace) pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); //data.Title = xePost.XPathValue(".//div[@class='title']//a//text()", DownloadPrint.TrimFunc1); data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Ebooks en Epub / Livre //data.Category = xePost.DescendantTextList(".//div[@class='postdata']//span[@class='category']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); xe = xePost.XPathElement(".//div[@class='entry']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; // force load image to get image width and height if (webResult.WebRequest.LoadImage) data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages // xe.DescendantTextList(".//p") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase)) if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) return XNodeFilter.Stop; } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") return XNodeFilter.Stop; } return XNodeFilter.SelectNode; } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.DescendantNodes( // node => // { // if (!(node is XElement)) // return true; // XElement xe2 = (XElement)node; // if (xe2.Name != "p") // return true; // XAttribute xa = xe2.Attribute("class"); // if (xa == null) // return true; // if (xa.Value != "submeta") // return true; // return false; // }, // node => node is XElement && ((XElement)node).Name == "a") // .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) return XNodeFilter.DontSelectNode; XElement xe2 = (XElement)node; if (xe2.Name == "a") return XNodeFilter.SelectNode; if (xe2.Name != "p") return XNodeFilter.DontSelectNode; XAttribute xa = xe2.Attribute("class"); if (xa == null) return XNodeFilter.DontSelectNode; if (xa.Value != "submeta") return XNodeFilter.DontSelectNode; //return XNodeFilter.SkipNode; return XNodeFilter.Stop; }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); //// <div id="postlist" class="postlist restrain"> //xe = xePost.XPathElement(".//div[@id='postlist']"); //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //date = date.Replace('\xA0', ' '); //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); //if (data.PostCreationDate == null) // pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1); //// <div class="postbody"> //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); //// get infos, description, language, size, nbPages //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); //data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; //data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.XPathValues(".//a/@href"); if (__trace) pb.Trace.WriteLine(data.zToJson()); return data; }
public static IEnumDataPages<PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage<PostHeader> data = new PostHeaderDataPage<PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; // <div id="vba_news4"> IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']"); List<PostHeader> headers = new List<PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']"); XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); //pb.Trace.WriteLine(header.Title); headers.Add(header); } data.Headers = headers.ToArray(); return data; }
private static Vosbooks_PostDetail_v2 GetData(WebResult webResult) { //XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); Vosbooks_PostDetail_v2 data = new Vosbooks_PostDetail_v2(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetPostDetailKey(webResult.WebRequest.HttpRequest); // <div id="page"> // <div id="wrapper"> // <table id="layout"> // <tr>...</tr> // <tr> // <td class="sidebars">...</td> // <td> // <div id="left-col"> // <div id="content-padding"> // <div id="content"> // ... // <div class="post" id="post-74299" style="margin-top: 0;"> // // <table id="post-head"> // <tr> // <td id="head-date"> // <div class="date"><span>jan</span> 29</div> // </td> // <td> // <div class="title"> // <h2><a href="http://www.vosbooks.net/74299-livre/les-imposteurs-francois-cavanna.html" rel="bookmark" title="Les imposteurs – François Cavanna" >Les imposteurs – François Cavanna </a></h2> // <div class="postdata"> // <span class="category"> // <a href="http://www.vosbooks.net/category/livre/ebooks-epub" rel="category tag">Ebooks en Epub</a>, // <a href="http://www.vosbooks.net/category/livre" rel="category tag">Livre</a> // </span> // </div> // </div> // </td> // </tr> // </table> // // <div class="entry"> // ... // <p style="text-align: center;"> // <img class="alignnone" src="http://imageshack.com/a/img538/3859/6JXSxu.jpg" alt="Les imposteurs – François Cavanna" title="Les imposteurs – François Cavanna" height="540" width="420" /> // </p> // </tr> XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); //string[] dates = xe.DescendantTextList(".//td[@id='head-date']", func: Vosbooks.TrimFunc1).ToArray(); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, __lastPostDate); if (data.PostCreationDate != null) __lastPostDate = new Date(data.PostCreationDate.Value); if (__trace) pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); //data.Title = xePost.XPathValue(".//div[@class='title']//a//text()", DownloadPrint.TrimFunc1); data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Ebooks en Epub / Livre //data.Category = xePost.DescendantTextList(".//div[@class='postdata']//span[@class='category']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); xe = xePost.XPathElement(".//div[@class='entry']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; // force load image to get image width and height if (webResult.WebRequest.LoadImage) data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages // xe.DescendantTextList(".//p") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase)) if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) return XNodeFilter.Stop; } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") return XNodeFilter.Stop; } return XNodeFilter.SelectNode; } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.DescendantNodes( // node => // { // if (!(node is XElement)) // return true; // XElement xe2 = (XElement)node; // if (xe2.Name != "p") // return true; // XAttribute xa = xe2.Attribute("class"); // if (xa == null) // return true; // if (xa.Value != "submeta") // return true; // return false; // }, // node => node is XElement && ((XElement)node).Name == "a") // .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) return XNodeFilter.DontSelectNode; XElement xe2 = (XElement)node; if (xe2.Name == "a") return XNodeFilter.SelectNode; if (xe2.Name != "p") return XNodeFilter.DontSelectNode; XAttribute xa = xe2.Attribute("class"); if (xa == null) return XNodeFilter.DontSelectNode; if (xa.Value != "submeta") return XNodeFilter.DontSelectNode; //return XNodeFilter.SkipNode; return XNodeFilter.Stop; }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); //// <div id="postlist" class="postlist restrain"> //xe = xePost.XPathElement(".//div[@id='postlist']"); //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //date = date.Replace('\xA0', ' '); //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); //if (data.PostCreationDate == null) // pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1); //// <div class="postbody"> //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); //// get infos, description, language, size, nbPages //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); //data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; //data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.XPathValues(".//a/@href"); if (__trace) pb.Trace.WriteLine(data.zToJson()); return data; }
private static IEnumDataPages<IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Vosbooks_HeaderPage_v2 data = new Vosbooks_HeaderPage_v2(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); // <div id="page"> // <div id="wrapper"> // <table id="layout"> // <tr> // <td></td> // <td> // <div id="left-col"> // <div id="content-padding"> // <div id="content"> // <div style="height:264px;" class="cover_global" data-zt="divbyzt">...</div> // ... // </div> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); List<Vosbooks_PostHeader_v1> headers = new List<Vosbooks_PostHeader_v1>(); foreach (XXElement xeHeader in xeHeaders) { Vosbooks_PostHeader_v1 header = new Vosbooks_PostHeader_v1(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") continue; // <div style="" data-zt="divbyzt"> // <div data-zt="divbyzt"> // <div data-zt="divbyzt"> // <center> // <strong> // <a href="http://www.vosbooks.net/74231-journaux/pack-journaux-francais-du-28-janvier-2015.html" title=""> // Pack Journaux Français Du 28 Janvier 2015 // <span class="detail_release" data-zt="spanbyzt"></span> // </a> // </strong> // </center> // </div> // </div> // </div> XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.PostHeaders = headers.ToArray(); return data; }
private static Ebookdz_PostDetail_v2 GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); Ebookdz_PostDetail_v2 data = new Ebookdz_PostDetail_v2(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetPostDetailKey(webResult.WebRequest.HttpRequest); // <div class="body_bd"> XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']"); // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars); data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").zNotNullFunc(s => s.Trim(DownloadPrint.TrimChars)); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 string lowerTitle = null; if (data.Title != null) lowerTitle = data.Title.ToLowerInvariant(); //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // <div id="postlist" class="postlist restrain"> XXElement xe = xePost.XPathElement(".//div[@id='postlist']"); // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues(""); date = date.Replace('\xA0', ' '); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); if (data.PostCreationDate == null) pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); if (__trace) pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars); data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").zNotNullFunc(s => s.Trim(DownloadPrint.TrimChars)); // <div class="postbody"> xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (webResult.WebRequest.LoadImage) data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); // modif pour avoir les liens de http://www.ebookdz.com/forum/showthread.php?t=113291 //data.DownloadLinks = xe.XPathValues(".//a/@href"); data.DownloadLinks = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote").XPathValues(".//a/@href").ToArray(); if (__trace) pb.Trace.WriteLine(data.zToJson()); return data; }