// header get data protected override IEnumDataPages <IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@id='wp_page_numbers']//li[last()]//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='content']/div"); List <MagazinesGratuits_PostHeader> headers = new List <MagazinesGratuits_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { MagazinesGratuits_PostHeader header = new MagazinesGratuits_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") { break; } XXElement xe = xeHeader.XPathElement(".//center/strong/a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); header.Category = xeHeader.XPathValue(".//div[@class='cover_infos_genre']/a//text()"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
// header get data protected override IEnumDataPages <IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; // <div id="vba_news4"> IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']"); XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
protected override IEnumDataPages <IHeaderData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; Predicate <string> filter = __forumFilter; List <EbookdzForumData> forums = new List <EbookdzForumData>(); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } EbookdzForumData forum = new EbookdzForumData(); forum.SourceUrl = url; forum.LoadFromWebDate = webResult.LoadFromWebDate; forum.Name = name; forum.UrlDetail = GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); forums.Add(forum); } data.Headers = forums.ToArray(); return(data); }
protected override IEnumDataPages<IHeaderData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; Predicate<string> filter = __forumFilter; List<EbookdzForumData> forums = new List<EbookdzForumData>(); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) continue; EbookdzForumData forum = new EbookdzForumData(); forum.SourceUrl = url; forum.LoadFromWebDate = webResult.LoadFromWebDate; forum.Name = name; forum.UrlDetail = GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); forums.Add(forum); } data.Headers = forums.ToArray(); return data; }
// header get data protected override IEnumDataPages <IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); //PostHeaderDataPage<PostHeader> data = new PostHeaderDataPage<PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); // <div id="page"> // <div id="wrapper"> // <table id="layout"> // <tr> // <td></td> // <td> // <div id="left-col"> // <div id="content-padding"> // <div id="content"> // <div style="height:264px;" class="cover_global" data-zt="divbyzt">...</div> // ... // </div> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); //List<Vosbooks_PostHeader> headers = new List<Vosbooks_PostHeader>(); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { //Vosbooks_PostHeader header = new Vosbooks_PostHeader(); PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") { continue; } // <div style="" data-zt="divbyzt"> // <div data-zt="divbyzt"> // <div data-zt="divbyzt"> // <center> // <strong> // <a href="http://www.vosbooks.net/74231-journaux/pack-journaux-francais-du-28-janvier-2015.html" title=""> // Pack Journaux Français Du 28 Janvier 2015 // <span class="detail_release" data-zt="spanbyzt"></span> // </a> // </strong> // </center> // </div> // </div> // </div> XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }