// header get data protected override IEnumDataPages <PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
protected override IEnumDataPages <BlogDemoorHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; BlogDemoorHeaderDataPages data = new BlogDemoorHeaderDataPages(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//a[@class='nextpage']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='item_div']"); List <BlogDemoorHeaderData> headers = new List <BlogDemoorHeaderData>(); foreach (XXElement xeHeader in xeHeaders) { BlogDemoorHeaderData header = new BlogDemoorHeaderData(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//h2/a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); // <div class="dateheader">23 juillet 2016</div> header.Date = xeHeader.XPathValue(".//div[@class='dateheader']/text()"); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new Pdf4frPostHeader(); _postHeader.sourceUrl = _sourceUrl; XXElement xe = xeArticle.XPathElement("./header//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); xe = xeArticle.XPathElement(".//div[@class='entry_top']"); ////_postHeader.image = xe.XPathImage(".//img", _url); //foreach (string s in xe.XElement.zDescendantTextList()) foreach (string s in xe.XElement.zDescendantTexts()) { _postHeader.SetInfo(s); } if (_loadImage && _postHeader.image.Source != null) { Http_v2.LoadImageFromWeb(_postHeader.image.Source); } xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new Magazine3kPostHeader(); _postHeader.sourceUrl = _sourceUrl; //<h2 class="title"> // <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html" // rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre"> // Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre</a> //</h2> XXElement xe = xeArticle.XPathElement(".//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); //xe = xeArticle.XPathElement(".//div[@class='entry_top']"); //_postHeader.image = xe.XPathImage(".//img"); //foreach (string s in xe.XElement.zDescendantTexts()) // _postHeader.SetInfo(s); //if (_loadImage && _postHeader.image.Source != null) // Http2.LoadImageFromWeb(_postHeader.image.Source); //xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
// header get data protected override IEnumDataPages <IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@id='wp_page_numbers']//li[last()]//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='content']/div"); List <MagazinesGratuits_PostHeader> headers = new List <MagazinesGratuits_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { MagazinesGratuits_PostHeader header = new MagazinesGratuits_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") { break; } XXElement xe = xeHeader.XPathElement(".//center/strong/a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); header.Category = xeHeader.XPathValue(".//div[@class='cover_infos_genre']/a//text()"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
private static OnisepInstitution_HeaderPage GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; OnisepInstitution_HeaderPage data = new OnisepInstitution_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); // <table class="oni_tableSearchResults" style="width: 463px; margin-left: 5px" summary=""> // ... // <tbody> // <tr class=oni_odd> // <td class="oni_first"><a href="/Ressources/Univers-Postbac/Postbac/Aquitaine/Pyrenees-Atlantiques/Academie-Basque-du-Sport"> Académie Basque du Sport </a></td> // <td>Biarritz</td> // <td>64200</td> // </tr> // ... // </tbody> // </table> // <div class="pagenavigator"> // <p> // <span class="pages"> // <span class="current">1</span> // <span class="other"><a href="/content/search/(offset)/10?&SubTreeArray=243418&etabRecherche=1&idFormation=&limit=10">2</a></span> // ... // </span> // <span class="next"><a href="/content/search/(offset)/10?&SubTreeArray=243418&etabRecherche=1&idFormation=&limit=10"><span class="text">Suivant »</span></a></span> // </p> // <div class="break"></div> // </div> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='pagenavigator']//span[@class='next']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@class='oni_tableSearchResults']//tbody//tr"); List <OnisepInstitution_Header> headers = new List <OnisepInstitution_Header>(); foreach (XXElement xeHeader in xeHeaders) { OnisepInstitution_Header header = new OnisepInstitution_Header(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; XXElement xe = xeHeader.XPathElement(".//td[1]"); header.Institution = OnisepInstitution.Trim(xe.XPathValue(".//text()")); header.UrlDetail = zurl.GetUrl(url, xe.XPathValue(".//a/@href")); header.City = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.PostalCode = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[3]//text()")); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
// header get data protected override IEnumDataPages <PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; // <div id="vba_news4"> IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']"); XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); //pb.Trace.WriteLine(header.Title); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
public static RapideDdl_HeaderPage LoadHeaderPageFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { // loadDataFromWeb XXElement xeSource = new XXElement(request.GetXmlDocument().Root); string url = request.Url; RapideDdl_HeaderPage data = new RapideDdl_HeaderPage(); //data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href")); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <RapideDdl_PostHeader> headers = new List <RapideDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { RapideDdl_PostHeader header = new RapideDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); // xe.XPathValue(".//text()", Download.Print.RapideDdl.RapideDdl.TrimFunc1) /////////////////////////////////header.title = Download.Print.RapideDdl.RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars)); //xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); xe = xeHeader.XPathElement(".//div[@class='shdinf']"); header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); // Aujourd'hui, 17:13 ////////////////////////////////header.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)header.loadFromWebDate); //xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).ToList(); //if (request.LoadImage) // Http2.LoadImageFromWeb(header.images); //header.SetTextValues(xe.DescendantTextList()); header.SetTextValues(xe.DescendantTexts()); xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(RapideDdl.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //header.category = xe.DescendantTextList(".//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); header.category = xe.XPathElements(".//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }
private static Ebookdz_ForumPage_v1 GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Ebookdz_ForumPage_v1 data = new Ebookdz_ForumPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; //data.Id = _GetPageKey(webResult.WebRequest.HttpRequest); //data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); data.UrlNextPage = null; List <Ebookdz_Forum_v1> forums = new List <Ebookdz_Forum_v1>(); Predicate <string> filter = null; // <div class="body_bd"> // <div id="forumbits" class="forumbits"> // <ol> // <li id="forum10" class="forumbit_post new L1"> // <div class="forumrow"> // <ol id="childforum_for_161" class="childsubforum"> // <div class="titleline"> foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li")) { XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a"); string category = xe2.XPathValue(".//text()"); string urlCategory = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); // Forum = forum //forums.Add(new Ebookdz_Forum { Category = category, Url = urlCategory }); foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a")) { string name = xe3.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } string urlSubForum = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe3.XPathValue("@href"))); // Forum = forum forums.Add(new Ebookdz_Forum_v1 { SourceUrl = url, LoadFromWebDate = webResult.LoadFromWebDate, Category = category, Name = name, Url = urlSubForum }); } } data.Forums = forums.ToArray(); return(data); }
protected override IEnumDataPages <EbookdzForumData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <EbookdzForumData> data = new PostHeaderDataPage <EbookdzForumData>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; List <EbookdzForumData> forums = new List <EbookdzForumData>(); Predicate <string> filter = null; // <div class="body_bd"> // <div id="forumbits" class="forumbits"> // <ol> // <li id="forum10" class="forumbit_post new L1"> // <div class="forumrow"> // <ol id="childforum_for_161" class="childsubforum"> // <div class="titleline"> foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li")) { XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a"); string category = xe2.XPathValue(".//text()"); string urlCategory = Ebookdz_MainForum.GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); // Forum = forum //forums.Add(new Ebookdz_Forum { Category = category, Url = urlCategory }); foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a")) { string name = xe3.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } string urlSubForum = Ebookdz_MainForum.GetUrl(zurl.GetUrl(url, xe3.XPathValue("@href"))); // Forum = forum forums.Add(new EbookdzForumData { SourceUrl = url, LoadFromWebDate = webResult.LoadFromWebDate, Category = category, Name = name, UrlDetail = urlSubForum }); } } data.Headers = forums.ToArray(); return(data); }
// header get data protected override IEnumDataPages <PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation ignore-select']//a[starts-with(text(), 'Suivant')]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='blockbox']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; header.UrlDetail = xeHeader.XPathValue(".//h2[@class='blocktitle']//a/@href"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
//protected override Telechargementz_HeaderPage GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IEnumDataPages_v1 <int, IHeaderData_v1> GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; Telechargementz_HeaderPage data = new Telechargementz_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = Telechargementz_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()=\"vers l'avant\"]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='custom-post']"); List <Telechargementz_PostHeader> headers = new List <Telechargementz_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { Telechargementz_PostHeader header = new Telechargementz_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue(".//div[@class='custom-poster']//a/@href"); headers.Add(header); } data.postHeaders = headers.ToArray(); return((IEnumDataPages_v1 <int, IHeaderData_v1>)data); }
protected override IEnumDataPages_v1 <int, IHeaderData_v1> GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; ExtremeDown_HeaderPage data = new ExtremeDown_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = ExtremeDown_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation ignore-select']//a[starts-with(text(), 'Suivant')]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='blockbox']"); List <ExtremeDown_PostHeader> headers = new List <ExtremeDown_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { ExtremeDown_PostHeader header = new ExtremeDown_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue(".//h2[@class='blocktitle']//a/@href"); headers.Add(header); } data.postHeaders = headers.ToArray(); return((IEnumDataPages_v1 <int, IHeaderData_v1>)data); }
public static string GetLogin(XXElement xeSource) { // ebookdz.com_forum_showthread.php_t_109595_01_02.html : // <div id="toplinks" class="toplinks"> // <li class="welcomelink">Bienvenue, <a href="member.php?u=49369"><b>la_beuze</b></a></li> return(xeSource.XPathValue("//div[@id='toplinks']//li[@class='welcomelink']//a//text()")); }
protected void InitXml() { // <div class="PAGENAVIGLIST"> // <a href="/Gesat/EtablissementList-10-10.html" title="page suivante">></a> _urlNextPage = GetUrl(_xelement.XPathValue("//div[@class='PAGENAVIGLIST']//a[@title='page suivante']/@href")); // <div class="ETABLISSEMENT STAR-1 ODD"> <div class="ETABLISSEMENT STAR-0 ODD"> <div class="ETABLISSEMENT STAR-1 EVEN"> _xmlEnum = _xelement.XPathElements("//div[starts-with(@class, 'ETABLISSEMENT STAR-')]").GetEnumerator(); }
private static Handeco_HeaderPage GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Handeco_HeaderPage data = new Handeco_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); //data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); //IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); //List<Handeco_PostHeader> headers = new List<Handeco_PostHeader>(); //foreach (XXElement xeHeader in xeHeaders) //{ // Handeco_PostHeader header = new Handeco_PostHeader(); // header.SourceUrl = url; // header.LoadFromWebDate = webResult.LoadFromWebDate; // if (xeHeader.XPathValue("@class") == "page-nav") // continue; // XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); // //header.Title = xe.XPathValue(".//text()"); // header.UrlDetail = xe.XPathValue("./@href"); // headers.Add(header); //} //data.PostHeaders = headers.ToArray(); //return data; // <div class="paginationControl"> // page n : <a href="/fournisseurs/rechercher/page/2#resultats">></a> | // last page : <span class="disabled">></span> | data.UrlNextPage = zurl.RemoveFragment(zurl.GetUrl(url, xeSource.XPathValue("//div[@class='paginationControl']//*[position()=last()-1]/@href"))); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table//tr[position() > 1]"); List <Handeco_Header> headers = new List <Handeco_Header>(); foreach (XXElement xeHeader in xeHeaders) { Handeco_Header header = new Handeco_Header(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; header.Name = Handeco.Trim(xeHeader.XPathValue(".//td[1]//text()")); header.UrlDetail = zurl.RemoveFragment(zurl.GetUrl(url, xeHeader.XPathValue(".//td[1]//a/@href"))); //header.Siret = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Type = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Groupes = xeHeader.XPathValues(".//td[3]//text()").Select(Handeco.Trim).ToArray(); header.Activités = xeHeader.XPathValues(".//td[4]//text()").Select(Handeco.Trim).ToArray(); header.PostalCode = Handeco.Trim(xeHeader.XPathValue(".//td[5]//text()")); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
// header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override IEnumDataPages <PostHeader> GetHeaderPageData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); string url = httpResult.Http.HttpRequest.Url; PostHeaderHeaderDataPages_v2 data = new PostHeaderHeaderDataPages_v2(); data.SourceUrl = url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = GetPageKey(httpResult.Http.HttpRequest); // <div id='dle-content'> // <div class="leftpane"> // <div class="movieposter" title="Watch Movie Pachamama : Cuisine des premières nations"> // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html"> // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html" title="Pachamama : Cuisine des premières nations"> // <img src="http://pxhst.co/avaxhome/cd/2a/00152acd.jpeg" width="110" height="150" alt="télécharger Pachamama : Cuisine des premières nations" title="télécharger Pachamama : Cuisine des premières nations" /> // </a> // </div> // </div> // ... // <div class="navigation" align="center"> // <div class="clear"></div> // <span>← Previous</span> <span>1</span> // <a href="http://www.telecharger-magazine.com/page/2/">2</a> // ... // <a href="http://www.telecharger-magazine.com/page/2/">Next →</a> // <div class="clear"></div> // </div> XXElement xe = xeSource.XPathElement("//div[@id='dle-content']"); data.UrlNextPage = zurl.GetUrl(url, xe.XPathValue(".//a[starts-with(text(), 'Next')]/@href")); IEnumerable <XXElement> xeHeaders = xe.XPathElements(".//div[@class='leftpane']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = httpResult.Http.RequestTime; if (xeHeader.XPathValue("@class") == "page-nav") { continue; } XXElement xe2 = xeHeader.XPathElement(".//a/a"); header.Title = xe2.AttribValue("title"); header.UrlDetail = xe2.AttribValue("href"); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
protected override IEnumDataPages <PostHeader> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetKey(webResult.WebRequest.HttpRequest); // <div class="threadpagenav"> // <span class="prev_next"> // <a rel="next" href="forumdisplay.php?f=157&page=2&s=fec27f3bac2b58debbb727ab8725c8a4" title="Page suivante - Résultats de 21 à 40 sur 61"> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='threadpagenav']//span[@class='prev_next']//a[@rel='next']/@href")); List <PostHeader> headers = new List <PostHeader>(); // <div class="body_bd"> // <div id="threadlist" class="threadlist"> // <ol id="threads" class="threads"> // <li class="threadbit " id="thread_111977"> // <h3 class="threadtitle"> // <a title="" class="title" href="showthread.php?t=111977&s=fec27f3bac2b58debbb727ab8725c8a4" id="thread_title_111977">La Provence Marseille du lundi 26 janvier 2015</a> foreach (XXElement xeHeader in xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li")) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//h3[@class='threadtitle']//a[@class='title']"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = Ebookdz_MainForum.GetUrl(zurl.GetUrl(url, xe.XPathValue("@href"))); headers.Add(header); } data.Headers = headers.ToArray(); //Trace.WriteLine(data.zToJson()); return(data); }
private static Ebookdz_ForumPage_v1 GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Ebookdz_ForumPage_v1 data = new Ebookdz_ForumPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetPageKey(webResult.WebRequest.HttpRequest); //data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); data.UrlNextPage = null; Predicate <string> filter = __forumFilter; List <Ebookdz_Forum_v1> forums = new List <Ebookdz_Forum_v1>(); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } Ebookdz_Forum_v1 forum = new Ebookdz_Forum_v1(); forum.SourceUrl = url; forum.LoadFromWebDate = webResult.LoadFromWebDate; forum.Name = name; forum.Url = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); forums.Add(forum); } data.Forums = forums.ToArray(); return(data); }
protected void InitXml() { // post list : // <div id="post-1838" class="post-1838 post type-post status-publish format-standard hentry category-journaux tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ebook tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ebook-gratuit tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-gratuit tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf tag-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-telechargement tag-telecharge-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ddl tag-telecharge-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-uptobox tag-telechargement-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre tag-telecharger-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre tag-telecharger-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-ebook tag-telecharger-le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf clear-block count-1 odd author-admin first"> // _hxr.ReadSelect("//div[starts-with(@id, 'post-')]:.:EmptyRow"); // next page : // <a href='http://zone-ebooks.com/page/2' class='nextpostslink'>»</a> // _hxr.ReadSelect("//a[@class='nextpostslink']:.:EmptyRow", "./@href"); _urlNextPage = _xelement.XPathValue("//a[@class='nextpostslink']/@href"); _xmlEnum = _xelement.XPathElements("//div[starts-with(@id, 'post-')]").GetEnumerator(); }
public static RapideDdl_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { XXElement xeSource = new XXElement(request.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = request.Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); //data.title = RapideDdl.ExtractTextValues(data.infos, xePost.XPathValue(".//div[@class='base fullstory']//text()", RapideDdl.TrimFunc1)); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", node => !(node is XElement) || ((XElement)node).Name != "a", RapideDdl.TrimFunc1).FirstOrDefault(); //data.title = xePost.XPathValue(".//div[@class='spbar']/text()", RapideDdl.TrimFunc1); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); //////////////data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)data.loadFromWebDate); data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(request.Url, nodeFilter: node => node is XElement && ((XElement)node).Name == "a"); //data.images = xe.XPathImages(request.Url); //data.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url)).ToList(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).ToList(); //if (request.LoadImage) // force load image to get image width and height pb.old.Http_v2.LoadImageFromWeb(data.images); //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); //data.SetTextValues(xe.DescendantTextList(".//div")); data.SetTextValues(xe.XPathElements(".//div").DescendantTexts()); //data.downloadLinks.AddRange(xe.XPathValues(".//div[2]//a/@href")); //foreach (XXElement xe2 in xe.XPathElements("div/div").Skip(1)) foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 //data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.StartsWith("http://www.zupmage.eu"))); data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } return(data); }
protected override IEnumDataPages <BlogDemoorData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; BlogDemoorDataPages <BlogDemoorData> dataPage = new BlogDemoorDataPages <BlogDemoorData>(); dataPage.SourceUrl = url; dataPage.LoadFromWebDate = webResult.LoadFromWebDate; dataPage.Id = GetPageKey(webResult.WebRequest.HttpRequest); dataPage.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); List <BlogDemoorData> dataList = new List <BlogDemoorData>(); foreach (XXElement xeHeader in xeHeaders) { BlogDemoorData data = new BlogDemoorData(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") { continue; } // <div style="" data-zt="divbyzt"> // <div data-zt="divbyzt"> // <div data-zt="divbyzt"> // <center> // <strong> // <a href="http://www.vosbooks.net/74231-journaux/pack-journaux-francais-du-28-janvier-2015.html" title=""> // Pack Journaux Français Du 28 Janvier 2015 // <span class="detail_release" data-zt="spanbyzt"></span> // </a> // </strong> // </center> // </div> // </div> // </div> XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); data.Title = xe.XPathValue(".//text()"); dataList.Add(data); } dataPage.Data = dataList.ToArray(); return(dataPage); }
protected void _GetDetailData(XXElement xeSource, BlogDemoorDetailData data) { // <div id="content"> XXElement xe = xeSource.XPathElement("//div[@id='content']//div[@class='item_div']"); data.Title = xe.XPathValue(".//h2//text()"); string date = xe.XPathValue(".//div[@class='dateheader']/text()"); Date d; if (Date.TryParseExact(date, "d MMMM yyyy", __cultureInfo, DateTimeStyles.None, out d)) { data.Date = d; } else { Trace.WriteLine($"date not found \"{date}\""); } //<div class="articlebody" itemprop="articleBody"> XXElement xeBody = xe.XPathElement(".//div[@class='articlebody']"); if (xeBody.XElement != null) { data.Content = xeBody.XElement.ToString(); } //data.Images = xeBody.XPathValues(".//a/@href").Where(url => new Uri(url).Host.EndsWith(".canalblog.com")).Select(url => new WebImage(zurl.GetUrl(data.SourceUrl, url))).ToArray(); data.Images = GetImageUrls(xeBody.XPathElements(".//img")).Select(url => new WebImage(zurl.GetUrl(data.SourceUrl, url))).ToArray(); // force load image to get image width and height //if (webResult.WebRequest.LoadImage) // data.LoadImages(); //if (__trace) // pb.Trace.WriteLine(data.zToJson()); }
protected override FreeTelechargement_HeaderPage GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { throw new PBException("attention mismatch between free-telechargement.org and golden-ddl.net"); #pragma warning disable 162 XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; FreeTelechargement_HeaderPage data = new FreeTelechargement_HeaderPage(); // <div class="pagination"> data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='pagination']//a[starts-with(text(), 'suiv ')]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='content']//table//a"); List <FreeTelechargement_PostHeader> headers = new List <FreeTelechargement_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { FreeTelechargement_PostHeader header = new FreeTelechargement_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue("@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); header.images = xeHeader.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); #pragma warning restore 162 }
//protected override GoldenDdl_HeaderPage GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IEnumDataPages_v1 <int, IHeaderData_v1> GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; GoldenDdl_HeaderPage data = new GoldenDdl_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GoldenDdl_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='base']"); List <GoldenDdl_PostHeader> headers = new List <GoldenDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { GoldenDdl_PostHeader header = new GoldenDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue(".//div[@class='bheading']//a/@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return((IEnumDataPages_v1 <int, IHeaderData_v1>)data); }
public IEnumerable <Ebookdz_Forum> LoadSubForum(string url, string forum, Predicate <string> filter = null, bool reload = false) { LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest { Url = url }, reload: reload)); if (loadDataFromWeb.LoadResult) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); // <div class="body_bd"> // <div id="forumbits" class="forumbits"> // <ol> // <li id="forum10" class="forumbit_post new L1"> // <div class="forumrow"> // <ol id="childforum_for_161" class="childsubforum"> // <div class="titleline"> foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li")) { XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a"); string category = xe2.XPathValue(".//text()"); url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href"))); yield return(new Ebookdz_Forum { Forum = forum, Category = category, Url = url }); foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a")) { string name = xe3.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe3.XPathValue("@href"))); yield return(new Ebookdz_Forum { Forum = forum, Category = category, Name = name, Url = url }); } } } }
protected override Handeco_HeaderPage GetData() { XXElement xeSource = new XXElement(GetXmlDocument().Root); string url = Url; Handeco_HeaderPage data = new Handeco_HeaderPage(); // <div class="paginationControl"> // page n : <a href="/fournisseurs/rechercher/page/2#resultats">></a> | // last page : <span class="disabled">></span> | data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='paginationControl']//*[position()=last()-1]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table//tr[position() > 1]"); List <Handeco_HeaderCompany> headers = new List <Handeco_HeaderCompany>(); foreach (XXElement xeHeader in xeHeaders) { Handeco_HeaderCompany header = new Handeco_HeaderCompany(); header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; //header.name = xeHeader.XPathValue(".//td[1]//text()", _trimFunc1); header.name = _trimFunc1(xeHeader.XPathValue(".//td[1]//text()")); header.urlDetail = zurl.GetUrl(url, xeHeader.XPathValue(".//td[1]//a/@href")); //header.siret = xeHeader.XPathValue(".//td[2]//text()", _trimFunc1); header.siret = _trimFunc1(xeHeader.XPathValue(".//td[2]//text()")); //header.type = xeHeader.XPathValue(".//td[3]//text()", _trimFunc1); header.type = _trimFunc1(xeHeader.XPathValue(".//td[3]//text()")); //header.group = xeHeader.XPathValue(".//td[4]//text()", _trimFunc1); //header.groupes = xeHeader.XPathValues(".//td[4]//text()", _trimFunc1); header.groupes = xeHeader.XPathValues(".//td[4]//text()").Select(_trimFunc1).ToArray(); //header.sector = xeHeader.XPathValue(".//td[5]//text()"); //header.activités = xeHeader.XPathValues(".//td[5]//text()", _trimFunc1); header.activités = xeHeader.XPathValues(".//td[5]//text()").Select(_trimFunc1).ToArray(); //header.postalCode = xeHeader.XPathValue(".//td[6]//text()", _trimFunc1); header.postalCode = _trimFunc1(xeHeader.XPathValue(".//td[6]//text()")); headers.Add(header); } data.headerCompanies = headers.ToArray(); return(data); }
public IEnumerable <Ebookdz_Forum> LoadMainForum(Predicate <string> filter = null, bool reload = false) { LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest { Url = __urlForum }, reload: reload)); if (loadDataFromWeb.LoadResult) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href"); // <ol id="forums" class="floatcontainer"> foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li")) { // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985 XXElement xe2 = xe.XPathElement(".//a"); string name = xe2.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } string url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href"))); //if (url != null) //{ // PBUriBuilder uriBuilder = new PBUriBuilder(url); // uriBuilder.RemoveQueryValue("s"); // url = uriBuilder.ToString(); //} yield return(new Ebookdz_Forum { Forum = name, Url = url }); } } }
// header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override IEnumDataPages <Handeco_Header_v2> GetHeaderPageData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); string url = httpResult.Http.HttpRequest.Url; Handeco_HeaderDataPages data = new Handeco_HeaderDataPages(); data.SourceUrl = url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = GetPageKey(httpResult.Http.HttpRequest); // <div class="paginationControl"> // page n : <a href="/fournisseurs/rechercher/page/2#resultats">></a> | // last page : <span class="disabled">></span> | data.UrlNextPage = zurl.RemoveFragment(zurl.GetUrl(url, xeSource.XPathValue("//div[@class='paginationControl']//*[position()=last()-1]/@href"))); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table//tr[position() > 1]"); List <Handeco_Header_v2> headers = new List <Handeco_Header_v2>(); foreach (XXElement xeHeader in xeHeaders) { Handeco_Header_v2 header = new Handeco_Header_v2(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; header.Name = Handeco.Trim(xeHeader.XPathValue(".//td[1]//text()")); header.UrlDetail = zurl.RemoveFragment(zurl.GetUrl(url, xeHeader.XPathValue(".//td[1]//a/@href"))); //header.Siret = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Type = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Groupes = xeHeader.XPathValues(".//td[3]//text()").Select(Handeco.Trim).ToArray(); header.Activités = xeHeader.XPathValues(".//td[4]//text()").Select(Handeco.Trim).ToArray(); header.PostalCode = Handeco.Trim(xeHeader.XPathValue(".//td[5]//text()")); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
private static OnisepInstitution_Detail GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); OnisepInstitution_Detail data = new OnisepInstitution_Detail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetKey(webResult.WebRequest.HttpRequest); XXElement xeData = xeSource.XPathElement("//div[@id='oni_content-page']//div[@class='oni_innerContent']//div[@id='oni_zoom-block']"); data.Institution = OnisepInstitution.Trim(xeData.XPathValue(".//h1/text()")); // <span class="oni_span-title">Code UAI : 0062080D</span> string s = OnisepInstitution.Trim(xeData.XPathValue(".//span[@class='oni_span-title']/text()")); if (s != null && s.StartsWith("Code UAI :", StringComparison.InvariantCultureIgnoreCase)) { data.UAICode = OnisepInstitution.Trim(s.Substring(10)); } XXElement xe = xeData.XPathElement(".//div[@class='oni_fiche-info-1']"); data.Address = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='street-address']/text()")); data.PostalCode = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='postal-code']/text()")); data.City = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='locality']/text()")); data.Tel = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='tel']/text()")); s = xe.XPathValues(".//p[@class='vcard']//text()").Select(OnisepInstitution.Trim).Where(t => t.StartsWith("Fax :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault(); if (s != null) { data.Fax = OnisepInstitution.Trim(s.Substring(5)); } s = xe.XPathValue(".//a[@class='email']/@href"); if (s != null && s.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase)) { s = s.Substring(7); } data.Mail = s; data.WebSite = xe.DescendantTextNodes().Where(xt => string.Equals(OnisepInstitution.Trim(xt.Value), "site :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault() //.zXPathValue(".//following-sibling::a/@href"); .zXPathValue(".//following::a/@href"); foreach (XXElement xe2 in xeData.XPathElements(".//div[@class='oni_fiche-info-2']//li")) { string[] values = xe2.DescendantTexts().Take(2).ToArray(); if (values.Length != 2) { continue; } switch (OnisepInstitution.Trim(values[0]).ToLower()) { case "statut de l'établissement :": data.InstitutionStatus = OnisepInstitution.Trim(values[1]); break; case "hébergement :": data.Lodging = OnisepInstitution.Trim(values[1]); break; case "présence d'une ulis": data.Ulis = OnisepInstitution.Trim(values[1]); break; } } data.StudyLevels = xeData.XPathElements(".//div[@class='oni_nav-in']//ul[@class='oni_nav-in-ul']//li").Select(li => li.DescendantTexts().zConcatStrings()).Where(txt => txt != null).ToArray(); data.BacLevel = GetBacLevel(data.StudyLevels); // Address = text in <span class="street-address"> // PostalCode = text in <span class="postal-code"> // City = text in <span class="locality"> // Tel = text in <span class="tel"> // Fax = text start with "Fax :" // Mail = @href start with mailto: in <a class="email"> //bool address = false; //foreach (XNode node in xeData.XPathElement(".//div[@class='oni_fiche-info-1']//p[@class='vcard']").DescendantNodes()) //{ // if (node is XElement) // { // XElement xe = (XElement)node; // if (xe.Name == "span") // { // XAttribute attribute = xe.Attribute("class"); // if (attribute != null && attribute.Value == "street-address") // address = true; // } // else // address = false; // } // if (node is XText) // { // if (address) // { // data.Address = OnisepInstitution.Trim(((XText)node).Value); // address = false; // } // } //} if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }