protected override IEnumDataPages_v1 <int, IHeaderData_v1> GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; ExtremeDown_HeaderPage data = new ExtremeDown_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = ExtremeDown_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation ignore-select']//a[starts-with(text(), 'Suivant')]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='blockbox']"); List <ExtremeDown_PostHeader> headers = new List <ExtremeDown_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { ExtremeDown_PostHeader header = new ExtremeDown_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue(".//h2[@class='blocktitle']//a/@href"); headers.Add(header); } data.postHeaders = headers.ToArray(); return((IEnumDataPages_v1 <int, IHeaderData_v1>)data); }
protected override IEnumDataPages <BlogDemoorHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; BlogDemoorHeaderDataPages data = new BlogDemoorHeaderDataPages(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//a[@class='nextpage']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='item_div']"); List <BlogDemoorHeaderData> headers = new List <BlogDemoorHeaderData>(); foreach (XXElement xeHeader in xeHeaders) { BlogDemoorHeaderData header = new BlogDemoorHeaderData(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//h2/a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); // <div class="dateheader">23 juillet 2016</div> header.Date = xeHeader.XPathValue(".//div[@class='dateheader']/text()"); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
public static void Test_XXElement_DescendantTextList_01() { string url = @"c:\pib\dev_data\exe\runsource\download\sites\rapide-ddl\cache\detail\39000\ebooks_magazine_39023-multi-lautomobile-no821-octobre-2014.html"; pb.old.Http_v2.LoadUrl(url); XXElement xe = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root).XPathElement("//div[@class='lcolomn mainside']").XPathElement(".//div[@class='maincont']"); //string xpath = ".//div"; //foreach (string s in xe.DescendantTextList()) foreach (string s in xe.DescendantTexts()) { Trace.WriteLine(s); } //foreach (string s in from xe2 in xe.XElement.XPathSelectElements(xpath) from s in xe2.zDescendantTextList() select s) //{ // Trace.WriteLine(s); //} //foreach (XElement xe2 in xe.XElement.XPathSelectElements(xpath)) //{ // Trace.WriteLine("XElement {0}", xe2.zGetPath()); // foreach (string s in xe2.zDescendantTextList()) // { // Trace.WriteLine(s); // } //} }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new Magazine3kPostHeader(); _postHeader.sourceUrl = _sourceUrl; //<h2 class="title"> // <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html" // rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre"> // Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre</a> //</h2> XXElement xe = xeArticle.XPathElement(".//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); //xe = xeArticle.XPathElement(".//div[@class='entry_top']"); //_postHeader.image = xe.XPathImage(".//img"); //foreach (string s in xe.XElement.zDescendantTexts()) // _postHeader.SetInfo(s); //if (_loadImage && _postHeader.image.Source != null) // Http2.LoadImageFromWeb(_postHeader.image.Source); //xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
protected void LoadUrl() { _loadUrlResult = false; string url = _url; if (_urlFile != null) { if (_reload || !zFile.Exists(_urlFile)) { //if (!Http2.LoadToFile(_url, _urlFile)) if (!Http_v3.LoadToFile(_url, _urlFile)) { return; } } url = _urlFile; } //if (Http2.LoadUrl(url)) if (Http_v3.LoadUrl(url)) { //************************************************************************** // new from LoadFromWebBase1 //_xelement = new XXElement(Http2.HtmlReader.XDocument.Root); _xelement = new XXElement(Http_v3.Http.zGetXDocument().Root); _xmlEnum = GetXmlEnumerator(); //************************************************************************** _loadUrlResult = true; return; } }
public static string GetLogin(XXElement xeSource) { // ebookdz.com_forum_showthread.php_t_109595_01_02.html : // <div id="toplinks" class="toplinks"> // <li class="welcomelink">Bienvenue, <a href="member.php?u=49369"><b>la_beuze</b></a></li> return(xeSource.XPathValue("//div[@id='toplinks']//li[@class='welcomelink']//a//text()")); }
private static bool SetActivityValue(Activity activity, string valueName, XXElement xe) { bool ret = true; switch (valueName.ToLower()) { // NOTRE OFFRE (html) case "description": activity.Description = GetTextValue(xe); break; case "moyens techniques disponibles": activity.MoyensTechniquesDisponibles = GetTextValue(xe); break; case "effectif total mobilisable (etp)": activity.EffectifTotalMobilisable = GetTextValue(xe); break; case "modalités pratiques": activity.ModalitésPratiques = GetTextValue(xe); break; case "couverture géographique": activity.CouvertureGéographique = GetTextValue(xe); break; default: ret = false; break; } return(ret); }
// header get data protected override IEnumDataPages <PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation ignore-select']//a[starts-with(text(), 'Suivant')]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='blockbox']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; header.UrlDetail = xeHeader.XPathValue(".//h2[@class='blocktitle']//a/@href"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new Pdf4frPostHeader(); _postHeader.sourceUrl = _sourceUrl; XXElement xe = xeArticle.XPathElement("./header//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); xe = xeArticle.XPathElement(".//div[@class='entry_top']"); ////_postHeader.image = xe.XPathImage(".//img", _url); //foreach (string s in xe.XElement.zDescendantTextList()) foreach (string s in xe.XElement.zDescendantTexts()) { _postHeader.SetInfo(s); } if (_loadImage && _postHeader.image.Source != null) { Http_v2.LoadImageFromWeb(_postHeader.image.Source); } xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
// header get data protected override IEnumDataPages <PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
//protected override Telechargementz_HeaderPage GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IEnumDataPages_v1 <int, IHeaderData_v1> GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; Telechargementz_HeaderPage data = new Telechargementz_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = Telechargementz_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()=\"vers l'avant\"]/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='dle-content']//div[@class='custom-post']"); List <Telechargementz_PostHeader> headers = new List <Telechargementz_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { Telechargementz_PostHeader header = new Telechargementz_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; header.urlDetail = xeHeader.XPathValue(".//div[@class='custom-poster']//a/@href"); headers.Add(header); } data.postHeaders = headers.ToArray(); return((IEnumDataPages_v1 <int, IHeaderData_v1>)data); }
private static bool SetContactValue(Contact contact, string valueName, XXElement xe) { bool ret = true; switch (valueName.ToLower()) { // CONTACTS (html) case "prénom et nom": contact.Nom = GetTextValue(xe); break; case "fonction": contact.Fonction = GetTextValue(xe); break; case "téléphone": contact.Tel = GetTextValue(xe); break; case "mobile": contact.Mobile = GetTextValue(xe); break; case "e-mail": contact.Email = GetEmail(GetTextValue(xe)); break; default: ret = false; break; } return(ret); }
private static Handeco_HeaderPage GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Handeco_HeaderPage data = new Handeco_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); //data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); //IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); //List<Handeco_PostHeader> headers = new List<Handeco_PostHeader>(); //foreach (XXElement xeHeader in xeHeaders) //{ // Handeco_PostHeader header = new Handeco_PostHeader(); // header.SourceUrl = url; // header.LoadFromWebDate = webResult.LoadFromWebDate; // if (xeHeader.XPathValue("@class") == "page-nav") // continue; // XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); // //header.Title = xe.XPathValue(".//text()"); // header.UrlDetail = xe.XPathValue("./@href"); // headers.Add(header); //} //data.PostHeaders = headers.ToArray(); //return data; // <div class="paginationControl"> // page n : <a href="/fournisseurs/rechercher/page/2#resultats">></a> | // last page : <span class="disabled">></span> | data.UrlNextPage = zurl.RemoveFragment(zurl.GetUrl(url, xeSource.XPathValue("//div[@class='paginationControl']//*[position()=last()-1]/@href"))); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table//tr[position() > 1]"); List <Handeco_Header> headers = new List <Handeco_Header>(); foreach (XXElement xeHeader in xeHeaders) { Handeco_Header header = new Handeco_Header(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; header.Name = Handeco.Trim(xeHeader.XPathValue(".//td[1]//text()")); header.UrlDetail = zurl.RemoveFragment(zurl.GetUrl(url, xeHeader.XPathValue(".//td[1]//a/@href"))); //header.Siret = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Type = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Groupes = xeHeader.XPathValues(".//td[3]//text()").Select(Handeco.Trim).ToArray(); header.Activités = xeHeader.XPathValues(".//td[4]//text()").Select(Handeco.Trim).ToArray(); header.PostalCode = Handeco.Trim(xeHeader.XPathValue(".//td[5]//text()")); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
// header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override IEnumDataPages <PostHeader> GetHeaderPageData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); string url = httpResult.Http.HttpRequest.Url; PostHeaderHeaderDataPages_v2 data = new PostHeaderHeaderDataPages_v2(); data.SourceUrl = url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = GetPageKey(httpResult.Http.HttpRequest); // <div id='dle-content'> // <div class="leftpane"> // <div class="movieposter" title="Watch Movie Pachamama : Cuisine des premières nations"> // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html"> // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html" title="Pachamama : Cuisine des premières nations"> // <img src="http://pxhst.co/avaxhome/cd/2a/00152acd.jpeg" width="110" height="150" alt="télécharger Pachamama : Cuisine des premières nations" title="télécharger Pachamama : Cuisine des premières nations" /> // </a> // </div> // </div> // ... // <div class="navigation" align="center"> // <div class="clear"></div> // <span>← Previous</span> <span>1</span> // <a href="http://www.telecharger-magazine.com/page/2/">2</a> // ... // <a href="http://www.telecharger-magazine.com/page/2/">Next →</a> // <div class="clear"></div> // </div> XXElement xe = xeSource.XPathElement("//div[@id='dle-content']"); data.UrlNextPage = zurl.GetUrl(url, xe.XPathValue(".//a[starts-with(text(), 'Next')]/@href")); IEnumerable <XXElement> xeHeaders = xe.XPathElements(".//div[@class='leftpane']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = httpResult.Http.RequestTime; if (xeHeader.XPathValue("@class") == "page-nav") { continue; } XXElement xe2 = xeHeader.XPathElement(".//a/a"); header.Title = xe2.AttribValue("title"); header.UrlDetail = xe2.AttribValue("href"); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
protected override void SetXml(XElement xelement) { _xeSource = new XXElement(xelement); // <div class="PAGENAVIGLIST"> // <a href="/Gesat/EtablissementList-10-10.html" title="page suivante">></a> _urlNextPage = GetUrl(_xeSource.XPathValue("//div[@class='PAGENAVIGLIST']//a[@title='page suivante']/@href")); // <div class="ETABLISSEMENT STAR-1 ODD"> <div class="ETABLISSEMENT STAR-0 ODD"> <div class="ETABLISSEMENT STAR-1 EVEN"> _xmlEnum = _xeSource.XPathElements("//div[starts-with(@class, 'ETABLISSEMENT STAR-')]").GetEnumerator(); }
// header get data protected override IEnumDataPages <IHeaderData> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@id='wp_page_numbers']//li[last()]//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='content']/div"); List <MagazinesGratuits_PostHeader> headers = new List <MagazinesGratuits_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { MagazinesGratuits_PostHeader header = new MagazinesGratuits_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") { break; } XXElement xe = xeHeader.XPathElement(".//center/strong/a"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); header.Category = xeHeader.XPathValue(".//div[@class='cover_infos_genre']/a//text()"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
private static OnisepInstitution_HeaderPage GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; OnisepInstitution_HeaderPage data = new OnisepInstitution_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); // <table class="oni_tableSearchResults" style="width: 463px; margin-left: 5px" summary=""> // ... // <tbody> // <tr class=oni_odd> // <td class="oni_first"><a href="/Ressources/Univers-Postbac/Postbac/Aquitaine/Pyrenees-Atlantiques/Academie-Basque-du-Sport"> Académie Basque du Sport </a></td> // <td>Biarritz</td> // <td>64200</td> // </tr> // ... // </tbody> // </table> // <div class="pagenavigator"> // <p> // <span class="pages"> // <span class="current">1</span> // <span class="other"><a href="/content/search/(offset)/10?&SubTreeArray=243418&etabRecherche=1&idFormation=&limit=10">2</a></span> // ... // </span> // <span class="next"><a href="/content/search/(offset)/10?&SubTreeArray=243418&etabRecherche=1&idFormation=&limit=10"><span class="text">Suivant »</span></a></span> // </p> // <div class="break"></div> // </div> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='pagenavigator']//span[@class='next']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@class='oni_tableSearchResults']//tbody//tr"); List <OnisepInstitution_Header> headers = new List <OnisepInstitution_Header>(); foreach (XXElement xeHeader in xeHeaders) { OnisepInstitution_Header header = new OnisepInstitution_Header(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; XXElement xe = xeHeader.XPathElement(".//td[1]"); header.Institution = OnisepInstitution.Trim(xe.XPathValue(".//text()")); header.UrlDetail = zurl.GetUrl(url, xe.XPathValue(".//a/@href")); header.City = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.PostalCode = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[3]//text()")); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
// header get data protected override IEnumDataPages <PostHeader> GetHeaderPageData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <PostHeader> data = new PostHeaderDataPage <PostHeader>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; // <div id="vba_news4"> IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = webResult.LoadFromWebDate; //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']"); XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = xe.XPathValue("./@href"); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); //pb.Trace.WriteLine(header.Title); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
public bool MoveNext() { while (_xmlEnum.MoveNext()) { XXElement xeHeader = _xmlEnum.Current; _header = new Gesat_HeaderCompany(); _header.sourceUrl = _url; _header.loadFromWebDate = DateTime.Now; //<span class="NOM"><a title="ESAT BETTY LAUNAY-MOULIN VERT" href="/Gesat/Hauts-de-Seine,92/Bois-Colombes,35494/esat-betty-launay-moulin-vert-competences-et-handicap-92,e1837/">ESAT BETTY LAUNAY-MOULIN VERT</a></span> //_header.companyName = xeHeader.ExplicitXPathValue(".//span[@class='NOM']//a//text()"); XXElement xe = xeHeader.XPathElement(".//span[@class='NOM']//a"); if (xe != null) { _header.url = GetUrl(xe.ExplicitXPathValue("@href")); //_header.name = xe.ExplicitXPathValue(".//text()", _trimFunc1); _header.name = _trimFunc1(xe.ExplicitXPathValue(".//text()")); } //<span class="VILLE">E.S.A.T.<br />Bois-Colombes (92)</span> xe = xeHeader.XPathElement(".//span[@class='VILLE']"); if (xe != null) { //IEnumerator<string> texts = xe.DescendantTextList().GetEnumerator(); IEnumerator <string> texts = xe.DescendantTexts().GetEnumerator(); if (texts.MoveNext()) { _header.type = texts.Current.Trim(); } else { Trace.CurrentTrace.WriteLine("error companyType not found"); } if (texts.MoveNext()) { _header.location = texts.Current.Trim(); } else { Trace.CurrentTrace.WriteLine("error companyLocation not found"); } } // <span class="TELEPHONE">01 47 86 11 48</span> //_header.phone = xeHeader.ExplicitXPathValue(".//span[@class='TELEPHONE']//text()", _trimFunc1); _header.phone = _trimFunc1(xeHeader.ExplicitXPathValue(".//span[@class='TELEPHONE']//text()")); //<img info_bulle="Signataire de la charte Ethique et Valeurs" border="0" alt="/images/bullesGesat/pictoCharte.png" src="/images/bullesGesat/pictoCharte.png" style=" border: 0;" /> //<img info_bulle="Lauréat des trophées HandiResponsables 2013" border="0" alt="/images/bullesGesat/LAURIERS-OR-2013.png" src="/images/bullesGesat/LAURIERS-OR-2013.png" style=" border: 0;" /> //_header.infos = xeHeader.XPathValues(".//img/@info_bulle", _trimFunc1); _header.infos = xeHeader.XPathValues(".//img/@info_bulle").Select(_trimFunc1).ToArray(); //_header.SetInfo(xeHeader.XPathValues(".//img/@info_bulle")); return(true); } return(false); }
public static RapideDdl_HeaderPage LoadHeaderPageFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { // loadDataFromWeb XXElement xeSource = new XXElement(request.GetXmlDocument().Root); string url = request.Url; RapideDdl_HeaderPage data = new RapideDdl_HeaderPage(); //data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href")); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <RapideDdl_PostHeader> headers = new List <RapideDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { RapideDdl_PostHeader header = new RapideDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); // xe.XPathValue(".//text()", Download.Print.RapideDdl.RapideDdl.TrimFunc1) /////////////////////////////////header.title = Download.Print.RapideDdl.RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars)); //xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); xe = xeHeader.XPathElement(".//div[@class='shdinf']"); header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); // Aujourd'hui, 17:13 ////////////////////////////////header.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)header.loadFromWebDate); //xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).ToList(); //if (request.LoadImage) // Http2.LoadImageFromWeb(header.images); //header.SetTextValues(xe.DescendantTextList()); header.SetTextValues(xe.DescendantTexts()); xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(RapideDdl.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //header.category = xe.DescendantTextList(".//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); header.category = xe.XPathElements(".//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }
public static RapideDdl_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { XXElement xeSource = new XXElement(request.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = request.Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); //data.title = RapideDdl.ExtractTextValues(data.infos, xePost.XPathValue(".//div[@class='base fullstory']//text()", RapideDdl.TrimFunc1)); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", node => !(node is XElement) || ((XElement)node).Name != "a", RapideDdl.TrimFunc1).FirstOrDefault(); //data.title = xePost.XPathValue(".//div[@class='spbar']/text()", RapideDdl.TrimFunc1); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); //////////////data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)data.loadFromWebDate); data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(request.Url, nodeFilter: node => node is XElement && ((XElement)node).Name == "a"); //data.images = xe.XPathImages(request.Url); //data.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url)).ToList(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).ToList(); //if (request.LoadImage) // force load image to get image width and height pb.old.Http_v2.LoadImageFromWeb(data.images); //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); //data.SetTextValues(xe.DescendantTextList(".//div")); data.SetTextValues(xe.XPathElements(".//div").DescendantTexts()); //data.downloadLinks.AddRange(xe.XPathValues(".//div[2]//a/@href")); //foreach (XXElement xe2 in xe.XPathElements("div/div").Skip(1)) foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 //data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.StartsWith("http://www.zupmage.eu"))); data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } return(data); }
// detail get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override Vosbooks_PostDetail_v6 GetDetailData(HttpResult<string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); Vosbooks_PostDetail_v6 data = new Vosbooks_PostDetail_v6(); data.SourceUrl = httpResult.Http.HttpRequest.Url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = _GetDetailKey(httpResult.Http.HttpRequest); _GetDetailData(xeSource, data); return data; }
public static void Test_LoadWithCookies_01(string url) { HttpRequestParameters_v1 requestParameters = new HttpRequestParameters_v1(); string cookiesFile = XmlConfig.CurrentConfig.GetExplicit("Ebookdz/CookiesFile"); requestParameters.cookies = zcookies.LoadCookies(cookiesFile); Trace.WriteLine("load cookies from \"{0}\"", cookiesFile); pb.old.Http_v2.LoadUrl(url, requestParameters); XXElement xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root); Trace.WriteLine("Login : \"{0}\"", Test_GetLogin_01(xeSource)); Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource)); }
private static Ebookdz_ForumPage_v1 GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; Ebookdz_ForumPage_v1 data = new Ebookdz_ForumPage_v1(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; //data.Id = _GetPageKey(webResult.WebRequest.HttpRequest); //data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); data.UrlNextPage = null; List <Ebookdz_Forum_v1> forums = new List <Ebookdz_Forum_v1>(); Predicate <string> filter = null; // <div class="body_bd"> // <div id="forumbits" class="forumbits"> // <ol> // <li id="forum10" class="forumbit_post new L1"> // <div class="forumrow"> // <ol id="childforum_for_161" class="childsubforum"> // <div class="titleline"> foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li")) { XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a"); string category = xe2.XPathValue(".//text()"); string urlCategory = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); // Forum = forum //forums.Add(new Ebookdz_Forum { Category = category, Url = urlCategory }); foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a")) { string name = xe3.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } string urlSubForum = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe3.XPathValue("@href"))); // Forum = forum forums.Add(new Ebookdz_Forum_v1 { SourceUrl = url, LoadFromWebDate = webResult.LoadFromWebDate, Category = category, Name = name, Url = urlSubForum }); } } data.Forums = forums.ToArray(); return(data); }
protected override BlogDemoorDetailData GetDetailData_v2(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); BlogDemoorDetailData data = new BlogDemoorDetailData(); data.SourceUrl = httpResult.Http.HttpRequest.Url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = _GetDetailKey(httpResult.Http.HttpRequest); _GetDetailData(xeSource, data); return(data); }
protected override BlogDemoorDetailData GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); BlogDemoorDetailData data = new BlogDemoorDetailData(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetDetailKey(webResult.WebRequest.HttpRequest); _GetDetailData(xeSource, data); return(data); }
// detail get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override TelechargerMagazine_PostDetail_v3 GetDetailData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); TelechargerMagazine_PostDetail_v3 data = new TelechargerMagazine_PostDetail_v3(); data.SourceUrl = httpResult.Http.HttpRequest.Url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = _GetDetailKey(httpResult.Http.HttpRequest); _GetDetailData(xeSource, data); return(data); }
// detail image cache get sub-directory, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> //protected override string GetDetailImageCacheUrlSubDirectory(WebData<Handeco_Detail_v2> data) //{ // string subPath = null; // subPath = data.Result_v2.Http.HttpRequest.UrlCachePath.SubPath; // return zpath.PathSetExtension(subPath, null); //} // detail get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override Handeco_Detail_v2 GetDetailData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); Handeco_Detail_v2 data = new Handeco_Detail_v2(); data.SourceUrl = httpResult.Http.HttpRequest.Url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = _GetDetailKey(httpResult.Http.HttpRequest); _GetDetailData(xeSource, data); return(data); }
protected override IEnumDataPages <EbookdzForumData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; PostHeaderDataPage <EbookdzForumData> data = new PostHeaderDataPage <EbookdzForumData>(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetKey(webResult.WebRequest.HttpRequest); data.UrlNextPage = null; List <EbookdzForumData> forums = new List <EbookdzForumData>(); Predicate <string> filter = null; // <div class="body_bd"> // <div id="forumbits" class="forumbits"> // <ol> // <li id="forum10" class="forumbit_post new L1"> // <div class="forumrow"> // <ol id="childforum_for_161" class="childsubforum"> // <div class="titleline"> foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li")) { XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a"); string category = xe2.XPathValue(".//text()"); string urlCategory = Ebookdz_MainForum.GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href"))); // Forum = forum //forums.Add(new Ebookdz_Forum { Category = category, Url = urlCategory }); foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a")) { string name = xe3.XPathValue(".//text()"); if (filter != null && !filter(name)) { continue; } string urlSubForum = Ebookdz_MainForum.GetUrl(zurl.GetUrl(url, xe3.XPathValue("@href"))); // Forum = forum forums.Add(new EbookdzForumData { SourceUrl = url, LoadFromWebDate = webResult.LoadFromWebDate, Category = category, Name = name, UrlDetail = urlSubForum }); } } data.Headers = forums.ToArray(); return(data); }
protected override IEnumDataPages <BlogDemoorData> GetDataPage(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; BlogDemoorDataPages <BlogDemoorData> dataPage = new BlogDemoorDataPages <BlogDemoorData>(); dataPage.SourceUrl = url; dataPage.LoadFromWebDate = webResult.LoadFromWebDate; dataPage.Id = GetPageKey(webResult.WebRequest.HttpRequest); dataPage.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div"); List <BlogDemoorData> dataList = new List <BlogDemoorData>(); foreach (XXElement xeHeader in xeHeaders) { BlogDemoorData data = new BlogDemoorData(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; if (xeHeader.XPathValue("@class") == "page-nav") { continue; } // <div style="" data-zt="divbyzt"> // <div data-zt="divbyzt"> // <div data-zt="divbyzt"> // <center> // <strong> // <a href="http://www.vosbooks.net/74231-journaux/pack-journaux-francais-du-28-janvier-2015.html" title=""> // Pack Journaux Français Du 28 Janvier 2015 // <span class="detail_release" data-zt="spanbyzt"></span> // </a> // </strong> // </center> // </div> // </div> // </div> XXElement xe = xeHeader.XPathElement(".//div/div/div//a"); data.Title = xe.XPathValue(".//text()"); dataList.Add(data); } dataPage.Data = dataList.ToArray(); return(dataPage); }