public IEnumerable<Ebookdz_Forum> LoadMainForum(Predicate<string> filter = null, bool reload = false) { LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest { Url = __urlForum }, reload: reload)); if (loadDataFromWeb.LoadResult) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) continue; string url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href"))); //if (url != null) //{ // PBUriBuilder uriBuilder = new PBUriBuilder(url); // uriBuilder.RemoveQueryValue("s"); // url = uriBuilder.ToString(); //} yield return new Ebookdz_Forum { Forum = name, Url = url }; } } }
protected override IEnumDataPages<IHeaderData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; Predicate<string> filter = __forumFilter; List<EbookdzForumData> forums = new List<EbookdzForumData>(); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) continue; EbookdzForumData forum = new EbookdzForumData(); forum.SourceUrl = url; forum.LoadFromWebDate = webResult.LoadFromWebDate; forum.Name = name; forum.UrlDetail = GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); forums.Add(forum); } data.Headers = forums.ToArray(); return data; }
public static TelechargementPlus_HeaderPage LoadHeaderFromWeb_GetData(LoadDataFromWeb_v1<TelechargementPlus_HeaderPage> loadDataFromWeb, bool loadImage = false) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.Url; TelechargementPlus_HeaderPage data = new TelechargementPlus_HeaderPage(); // post list : // <div class="base shortstory"> // _hxr.ReadSelect("//div[@class='base shortstory']:.:EmptyRow", ".//text()"); // next page : // <div class="navigation"> // <div align="center"> // <span>Prev.</span> // <span>1</span> // <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">2</a> // ... // <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">Next</a> // </div> // </div> // _hxr.ReadSelect("//div[@class='navigation']//a[text()='Next']:.:EmptyRow", "text()", "@href"); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href")); IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List<TelechargementPlus_PostHeader> headers = new List<TelechargementPlus_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { TelechargementPlus_PostHeader header = new TelechargementPlus_PostHeader(); //_postHeader.sourceUrl = _sourceUrl; header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; //<h1 class="shd"> // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html"> // [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013 // </a> //</h1> XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(header.infos, xe.XPathValue(".//text()"))); // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1) header.title = TelechargementPlus.ExtractTextValues(header.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()"))); //<div class="shdinf"> // <div class="shdinf"> // <span class="rcol">Auteur: // <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/"> // bakafa // </a> // </span> // <span class="date"> // <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b> // </span> // <span class="lcol">Catégorie: // <a href="http://www.telechargement-plus.com/e-book-magazines/"> // E-Book / Magazines // </a> » // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/"> // Magazines // </a> // </span> // </div> //</div> xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); header.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()"); //string postDate = xe.XPathValue(".//span[@class='date']//text()"); // Aujourd'hui, 17:13 //if (postDate != null) // _postHeader.infos.SetValue("postDate", new ZString(postDate)); header.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()")); //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); header.category = xe.XPathElements(".//span[@class='lcol']").DescendantTexts().Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //Trace.CurrentTrace.WriteLine("post header category \"{0}\"", _postHeader.category); //.zForEach(s => s.Trim()) //<span id="post-img"> // <div id="news-id-86236" style="display: inline;"> // <div style="text-align: center;"> // <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|--> // <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" // title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end--> // <br /> // <b> // <br /> // Ici Paris N°3562 - 9 au 15 Octobre 2013<br /> // French | 52 pages | HQ PDF | 101 MB // </b> // <br /> // <br /> // Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font // l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les // coulisses de la télé) indiscrétions, potins.<br /> // </div> // </div> //</span> xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); //_postHeader.images = xe.XPathImages(".//img", _url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList(); if (loadImage) pb.old.Http_v2.LoadImageFromWeb(header.images); //header.SetTextValues(xe.DescendantTextList()); header.SetTextValues(xe.DescendantTexts()); headers.Add(header); } data.postHeaders = headers.ToArray(); return data; }
//private static Ebookdz_HeaderPage GetData(WebResult webResult) private static IEnumDataPages<IHeaderData> GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Ebookdz_HeaderPage_v2 data = new Ebookdz_HeaderPage_v2(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; //data.Id = _GetPageKey(webResult.WebRequest.HttpRequest); // <div class="threadpagenav"> // <span class="prev_next"> // <a rel="next" href="forumdisplay.php?f=157&page=2&s=fec27f3bac2b58debbb727ab8725c8a4" title="Page suivante - Résultats de 21 à 40 sur 61"> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='threadpagenav']//span[@class='prev_next']//a[@rel='next']/@href")); List<Ebookdz_PostHeader> headers = new List<Ebookdz_PostHeader>(); // <div class="body_bd"> // <div id="threadlist" class="threadlist"> // <ol id="threads" class="threads"> // <li class="threadbit " id="thread_111977"> // <h3 class="threadtitle"> // <a title="" class="title" href="showthread.php?t=111977&s=fec27f3bac2b58debbb727ab8725c8a4" id="thread_title_111977">La Provence Marseille du lundi 26 janvier 2015</a> foreach (XXElement xeHeader in xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li")) { Ebookdz_PostHeader header = new Ebookdz_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//h3[@class='threadtitle']//a[@class='title']"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe.XPathValue("@href"))); headers.Add(header); } data.PostHeaders = headers.ToArray(); //Trace.WriteLine(data.zToJson()); return data; }
public static IEnumDataPages<PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage<PostHeader> data = new PostHeaderDataPage<PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; // <div id="vba_news4"> IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']"); List<PostHeader> headers = new List<PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']"); XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); //pb.Trace.WriteLine(header.Title); headers.Add(header); } data.Headers = headers.ToArray(); return data; }
public static IKeyData<int> GetForumHeaderPageData(LoadDataFromWeb_v4 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); string url = loadDataFromWeb.WebRequest.HttpRequest.Url; Ebookdz_HeaderPage data = new Ebookdz_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; //data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest); // <div id="above_threadlist" class="above_threadlist"> // <div class="threadpagenav"> // <span class="prev_next"> // <a rel="next" href="forumdisplay.php?f=74&page=2&s=4807e931448c05da34dd54fbd0308479" title="Page suivante - Résultats de 21 à 40 sur 66"> data.UrlNextPage = GetUrl(zurl.GetUrl(url, xeSource.XPathValue("//div[@id='above_threadlist']//span[@class='prev_next']//a[@rel='next']/@href"))); // <div class="body_bd"> XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']"); // <div id="breadcrumb" class="breadcrumb"> // <ul class="floatcontainer"> // <li class="navbit"> // Forum / Journaux / Presse quotidienne / Autres Journaux // <div id="threadlist" class="threadlist"> // <ol id="threads" class="threads"> IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li"); List<Ebookdz_PostHeader> headers = new List<Ebookdz_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { Ebookdz_PostHeader header = new Ebookdz_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; // <div class="threadinfo" title=""> // <div class="inner"> // <a title="" class="title" href="showthread.php?t=111210&s=4807e931448c05da34dd54fbd0308479" id="thread_title_111210">L'OPINION du mardi 20 janvier 2015</a> XXElement xe = xeHeader.XPathElement(".//div[@class='threadinfo']//a[@class='title']"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe.XPathValue("@href"))); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.PostHeaders = headers.ToArray(); //return (IEnumDataPages_new2<int, IHeaderData_new>)data; return (IKeyData<int>)data; }
private static IEnumDataPages<IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Vosbooks_HeaderPage_v2 data = new Vosbooks_HeaderPage_v2(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); // <div id="page"> // <div id="wrapper"> // <table id="layout"> // <tr> // <td></td> // <td> // <div id="left-col"> // <div id="content-padding"> // <div id="content"> // <div style="height:264px;" class="cover_global" data-zt="divbyzt">...</div> // ... // </div> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); List<Vosbooks_PostHeader_v1> headers = new List<Vosbooks_PostHeader_v1>(); foreach (XXElement xeHeader in xeHeaders) { Vosbooks_PostHeader_v1 header = new Vosbooks_PostHeader_v1(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") continue; // <div style="" data-zt="divbyzt"> // <div data-zt="divbyzt"> // <div data-zt="divbyzt"> // <center> // <strong> // <a href="http://www.vosbooks.net/74231-journaux/pack-journaux-francais-du-28-janvier-2015.html" title=""> // Pack Journaux Français Du 28 Janvier 2015 // <span class="detail_release" data-zt="spanbyzt"></span> // </a> // </strong> // </center> // </div> // </div> // </div> XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.PostHeaders = headers.ToArray(); return data; }
public IEnumerable<Ebookdz_Forum> LoadSubForum(string url, string forum, Predicate<string> filter = null, bool reload = false) { LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest { Url = url }, reload: reload)); if (loadDataFromWeb.LoadResult) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); // <div class="body_bd"> // <div id="forumbits" class="forumbits"> // <ol> // <li id="forum10" class="forumbit_post new L1"> // <div class="forumrow"> // <ol id="childforum_for_161" class="childsubforum"> // <div class="titleline"> foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li")) { XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a"); string category = xe2.XPathValue(".//text()"); url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href"))); yield return new Ebookdz_Forum { Forum = forum, Category = category, Url = url }; foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a")) { string name = xe3.XPathValue(".//text()"); if (filter != null && !filter(name)) continue; url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe3.XPathValue("@href"))); yield return new Ebookdz_Forum { Forum = forum, Category = category, Name = name, Url = url }; } } } }