protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new Pdf4frPostHeader(); _postHeader.sourceUrl = _sourceUrl; XXElement xe = xeArticle.XPathElement("./header//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); xe = xeArticle.XPathElement(".//div[@class='entry_top']"); ////_postHeader.image = xe.XPathImage(".//img", _url); //foreach (string s in xe.XElement.zDescendantTextList()) foreach (string s in xe.XElement.zDescendantTexts()) { _postHeader.SetInfo(s); } if (_loadImage && _postHeader.image.Source != null) { Http_v2.LoadImageFromWeb(_postHeader.image.Source); } xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
public bool MoveNext() { while (_xmlEnum.MoveNext()) { XXElement xeHeader = _xmlEnum.Current; _header = new Gesat_HeaderCompany(); _header.sourceUrl = _url; _header.loadFromWebDate = DateTime.Now; //<span class="NOM"><a title="ESAT BETTY LAUNAY-MOULIN VERT" href="/Gesat/Hauts-de-Seine,92/Bois-Colombes,35494/esat-betty-launay-moulin-vert-competences-et-handicap-92,e1837/">ESAT BETTY LAUNAY-MOULIN VERT</a></span> //_header.companyName = xeHeader.ExplicitXPathValue(".//span[@class='NOM']//a//text()"); XXElement xe = xeHeader.XPathElement(".//span[@class='NOM']//a"); if (xe != null) { _header.url = GetUrl(xe.ExplicitXPathValue("@href")); //_header.name = xe.ExplicitXPathValue(".//text()", _trimFunc1); _header.name = _trimFunc1(xe.ExplicitXPathValue(".//text()")); } //<span class="VILLE">E.S.A.T.<br />Bois-Colombes (92)</span> xe = xeHeader.XPathElement(".//span[@class='VILLE']"); if (xe != null) { //IEnumerator<string> texts = xe.DescendantTextList().GetEnumerator(); IEnumerator <string> texts = xe.DescendantTexts().GetEnumerator(); if (texts.MoveNext()) { _header.type = texts.Current.Trim(); } else { Trace.CurrentTrace.WriteLine("error companyType not found"); } if (texts.MoveNext()) { _header.location = texts.Current.Trim(); } else { Trace.CurrentTrace.WriteLine("error companyLocation not found"); } } // <span class="TELEPHONE">01 47 86 11 48</span> //_header.phone = xeHeader.ExplicitXPathValue(".//span[@class='TELEPHONE']//text()", _trimFunc1); _header.phone = _trimFunc1(xeHeader.ExplicitXPathValue(".//span[@class='TELEPHONE']//text()")); //<img info_bulle="Signataire de la charte Ethique et Valeurs" border="0" alt="/images/bullesGesat/pictoCharte.png" src="/images/bullesGesat/pictoCharte.png" style=" border: 0;" /> //<img info_bulle="Lauréat des trophées HandiResponsables 2013" border="0" alt="/images/bullesGesat/LAURIERS-OR-2013.png" src="/images/bullesGesat/LAURIERS-OR-2013.png" style=" border: 0;" /> //_header.infos = xeHeader.XPathValues(".//img/@info_bulle", _trimFunc1); _header.infos = xeHeader.XPathValues(".//img/@info_bulle").Select(_trimFunc1).ToArray(); //_header.SetInfo(xeHeader.XPathValues(".//img/@info_bulle")); return(true); } return(false); }
public static RapideDdl_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { XXElement xeSource = new XXElement(request.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = request.Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/"); //data.title = RapideDdl.ExtractTextValues(data.infos, xePost.XPathValue(".//div[@class='base fullstory']//text()", RapideDdl.TrimFunc1)); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", node => !(node is XElement) || ((XElement)node).Name != "a", RapideDdl.TrimFunc1).FirstOrDefault(); //data.title = xePost.XPathValue(".//div[@class='spbar']/text()", RapideDdl.TrimFunc1); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault(); XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); //////////////data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)data.loadFromWebDate); data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(request.Url, nodeFilter: node => node is XElement && ((XElement)node).Name == "a"); //data.images = xe.XPathImages(request.Url); //data.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url)).ToList(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).ToList(); //if (request.LoadImage) // force load image to get image width and height pb.old.Http_v2.LoadImageFromWeb(data.images); //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); //data.SetTextValues(xe.DescendantTextList(".//div")); data.SetTextValues(xe.XPathElements(".//div").DescendantTexts()); //data.downloadLinks.AddRange(xe.XPathValues(".//div[2]//a/@href")); //foreach (XXElement xe2 in xe.XPathElements("div/div").Skip(1)) foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 //data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.StartsWith("http://www.zupmage.eu"))); data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } return(data); }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new Magazine3kPostHeader(); _postHeader.sourceUrl = _sourceUrl; //<h2 class="title"> // <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html" // rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre"> // Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre</a> //</h2> XXElement xe = xeArticle.XPathElement(".//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); //xe = xeArticle.XPathElement(".//div[@class='entry_top']"); //_postHeader.image = xe.XPathImage(".//img"); //foreach (string s in xe.XElement.zDescendantTexts()) // _postHeader.SetInfo(s); //if (_loadImage && _postHeader.image.Source != null) // Http2.LoadImageFromWeb(_postHeader.image.Source); //xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
// header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override IEnumDataPages <PostHeader> GetHeaderPageData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); string url = httpResult.Http.HttpRequest.Url; PostHeaderHeaderDataPages_v2 data = new PostHeaderHeaderDataPages_v2(); data.SourceUrl = url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = GetPageKey(httpResult.Http.HttpRequest); // <div id='dle-content'> // <div class="leftpane"> // <div class="movieposter" title="Watch Movie Pachamama : Cuisine des premières nations"> // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html"> // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html" title="Pachamama : Cuisine des premières nations"> // <img src="http://pxhst.co/avaxhome/cd/2a/00152acd.jpeg" width="110" height="150" alt="télécharger Pachamama : Cuisine des premières nations" title="télécharger Pachamama : Cuisine des premières nations" /> // </a> // </div> // </div> // ... // <div class="navigation" align="center"> // <div class="clear"></div> // <span>← Previous</span> <span>1</span> // <a href="http://www.telecharger-magazine.com/page/2/">2</a> // ... // <a href="http://www.telecharger-magazine.com/page/2/">Next →</a> // <div class="clear"></div> // </div> XXElement xe = xeSource.XPathElement("//div[@id='dle-content']"); data.UrlNextPage = zurl.GetUrl(url, xe.XPathValue(".//a[starts-with(text(), 'Next')]/@href")); IEnumerable <XXElement> xeHeaders = xe.XPathElements(".//div[@class='leftpane']"); List <PostHeader> headers = new List <PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { PostHeader header = new PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = httpResult.Http.RequestTime; if (xeHeader.XPathValue("@class") == "page-nav") { continue; } XXElement xe2 = xeHeader.XPathElement(".//a/a"); header.Title = xe2.AttribValue("title"); header.UrlDetail = xe2.AttribValue("href"); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
protected override Unea_DetailCompany1 GetData() { XXElement xeSource = new XXElement(GetXmlDocument().Root); Unea_DetailCompany1 data = new Unea_DetailCompany1(); data.sourceUrl = Url; data.loadFromWebDate = DateTime.Now; // <div class="fiche"> XXElement xeFiche = xeSource.XPathElement(".//div[@class='fiche']"); //// <div class="fiche_bloc"> //IEnumerator<XXElement> xeFicheBlocs = xeFiche.XPathElements(".//div[@class='fiche_bloc']").GetEnumerator(); //// fiche_bloc no 1 //if (!xeFicheBlocs.MoveNext()) //{ // Trace.CurrentTrace.WriteLine("error fiche_bloc no 1 not found \"<div class='fiche_bloc'>\""); // return data; //} //XXElement xe = xeFicheBlocs.Current; //GetDataFicheBlocNo1(data, xe); //// fiche_bloc no 2 //if (!xeFicheBlocs.MoveNext()) //{ // Trace.CurrentTrace.WriteLine("error fiche_bloc no 2 not found \"<div class='fiche_bloc'>\""); // return data; //} //xe = xeFicheBlocs.Current; //GetDataFicheBlocNo2(data, xe); //// fiche_bloc no 3 //if (!xeFicheBlocs.MoveNext()) //{ // Trace.CurrentTrace.WriteLine("error fiche_bloc no 3 not found \"<div class='fiche_bloc'>\""); // return data; //} //xe = xeFicheBlocs.Current; //GetDataFicheBlocNo3(data, xe); GetNewDataFicheBloc(data, xeFiche); return(data); }
protected void _GetDetailData(XXElement xeSource, BlogDemoorDetailData data) { // <div id="content"> XXElement xe = xeSource.XPathElement("//div[@id='content']//div[@class='item_div']"); data.Title = xe.XPathValue(".//h2//text()"); string date = xe.XPathValue(".//div[@class='dateheader']/text()"); Date d; if (Date.TryParseExact(date, "d MMMM yyyy", __cultureInfo, DateTimeStyles.None, out d)) { data.Date = d; } else { Trace.WriteLine($"date not found \"{date}\""); } //<div class="articlebody" itemprop="articleBody"> XXElement xeBody = xe.XPathElement(".//div[@class='articlebody']"); if (xeBody.XElement != null) { data.Content = xeBody.XElement.ToString(); } //data.Images = xeBody.XPathValues(".//a/@href").Where(url => new Uri(url).Host.EndsWith(".canalblog.com")).Select(url => new WebImage(zurl.GetUrl(data.SourceUrl, url))).ToArray(); data.Images = GetImageUrls(xeBody.XPathElements(".//img")).Select(url => new WebImage(zurl.GetUrl(data.SourceUrl, url))).ToArray(); // force load image to get image width and height //if (webResult.WebRequest.LoadImage) // data.LoadImages(); //if (__trace) // pb.Trace.WriteLine(data.zToJson()); }
public static void Test_DebridLink_01() { //HttpRun.Load("https://api.debrid-link.fr/rest/token/1R6858wC6lO15X8i/new"); string urlBase = "https://api.debrid-link.fr/rest/"; //string login = RunSource.CurrentRunSource.Config.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/Login"); string login = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/Login"); string password = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/Password"); //string publickey = "1R6858wC6lO15X8i"; string publickey = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/PublicKey"); //string sessidTime = "all"; string sessidTime = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/SessidTime"); string url = urlBase + string.Format("token/{0}/new", publickey); HttpRequestParameters requestParameters = new HttpRequestParameters { Encoding = Encoding.UTF8 }; Http http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = url }, requestParameters); DateTime dt = DateTime.Now; http.ResultText.zTraceJson(); BsonDocument doc = BsonSerializer.Deserialize <BsonDocument>(http.ResultText); string token = doc.zGet("value.token").zAsString(); string validTokenUrl = doc.zGet("value.validTokenUrl").zAsString(); string key = doc.zGet("value.key").zAsString(); int ts = doc.zGet("ts").zAsInt(); Trace.WriteLine("request time : \"{0:dd/MM/yyyy HH:mm:ss}\"", dt); Trace.WriteLine("result : \"{0}\"", doc.zGet("result").zAsString()); Trace.WriteLine("token : \"{0}\"", token); Trace.WriteLine("validTokenUrl : \"{0}\"", validTokenUrl); Trace.WriteLine("key : \"{0}\"", key); Trace.WriteLine("ts : \"{0}\"", ts); Trace.WriteLine("ts : \"{0:dd/MM/yyyy HH:mm:ss}\"", zdate.UnixTimeStampToDateTime(ts)); Trace.WriteLine("ts : \"{0}\"", zdate.UnixTimeStampToDateTime(ts) - dt); // validTokenUrl : "https://secure.debrid-link.fr/user/2_2d481d8991e4db60f43d24d9d387b75699db7a0157182967/login" http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = validTokenUrl }, requestParameters); // <script>if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; }</script> // <form action='' method='POST' class='form-horizontal'> // <input type='text' class='form-control' name='user'> // <input type='password' class='form-control' name='password'> // <input type='hidden' value='10_a3a206c4398f195283a4843d44f017f3211275e443747173' name='token'> // <input type='submit' style='display:none'> // <button type='submit' name='authorizedToken' value='1' class='btn btn-dl'>Envoyer</button> XXElement xeSource = http.zGetXDocument().zXXElement(); // script : if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; } string script = xeSource.XPathValue("//head//script//text()"); if (script == null) { Trace.WriteLine("//head//script not found"); return; } Trace.WriteLine("script : \"{0}\"", script); Regex rg = new Regex("top\\.location\\.href=[\"'](.*)[\"']", RegexOptions.CultureInvariant | RegexOptions.IgnoreCase); Match match = rg.Match(script); if (!match.Success) { Trace.WriteLine("top.location.href='...' not found in script"); return; } url = match.Groups[1].Value; Trace.WriteLine("login url : \"{0}\"", url); XXElement xeForm = xeSource.XPathElement("//form"); string action = xeForm.AttribValue("action"); Trace.WriteLine("form action : \"{0}\"", action); if (action != null && action != "") { url = action; } HttpRequestMethod method = Http.GetHttpRequestMethod(xeForm.AttribValue("method")); Trace.WriteLine("form method : {0}", method); //XmlConfig localConfig = new XmlConfig(RunSource.CurrentRunSource.Config.GetExplicit("LocalConfig")); //string login = localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Login"); //string password = localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Password"); StringBuilder content = new StringBuilder(); bool first = true; string name, value; //foreach (XXElement xe in xeForm.XPathElements(".//input")) foreach (XXElement xe in xeForm.DescendantFormItems()) { name = xe.AttribValue("name"); if (name == null) { continue; } if (name == "user") { value = login; } else if (name == "password") { value = password; } else if (name == "sessidTime") { value = sessidTime; } else { value = xe.AttribValue("value"); } if (!first) { content.Append('&'); } content.AppendFormat("{0}={1}", name, value); Trace.WriteLine("{0}={1}", name, value); first = false; } //XXElement xeButton = xeForm.XPathElement(".//button"); //name = xeButton.AttribValue("name"); //value = xeButton.AttribValue("value"); //if (name != null && value != null) //{ // content.AppendFormat("&{0}={1}", name, value); // Trace.WriteLine("{0}={1}", name, value); //} // "user=la_beuze&password=xxxxxx&sessidTime=all&token=10_56b51ee12ad5dabcac620230cda436cab94bd37154742765&authorizedToken=1" // user=la_beuze&password=pbeuz0&sessidTime=all&token=10_3205776c76bb0479b1d57e9bf834b38ae2c5d10669848384&authorizedToken=1 Trace.WriteLine("content : \"{0}\"", content.ToString()); http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = url, Method = method, Content = content.ToString() }, requestParameters); // <div class='panel-body'> // <div class='alert alert-success'> // La session a bien été activée. Vous pouvez utiliser l'application API Test // </div> // </div> xeSource = http.zGetXDocument().zXXElement(); string loginMessage = xeSource.XPathValue("//div[@class='panel-body']//text()").Trim(); Trace.WriteLine("login message : \"{0}\"", loginMessage); }
private void _Login(string url) { // https://debrid-link.fr/user/2_21c744ba958f13fac08ee5c8855f72ab9a3b3e3224789126/login Http.Http http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = url }, _requestParameters); XXElement xeSource = HttpManager.CurrentHttpManager.GetXDocument(http).zXXElement(); string loginUrl = __loginUrl; XXElement xeForm = xeSource.XPathElement("//form"); string action = xeForm.AttribValue("action"); if (action != null && action != "") { loginUrl = action; } string method = xeForm.AttribValue("method"); HttpRequestMethod httpMethod = HttpRequestMethod.Get; if (method != null && method != "") { httpMethod = HttpTools.GetHttpRequestMethod(method); } StringBuilder content = new StringBuilder(); bool first = true; string name, value; foreach (XXElement xe in xeForm.DescendantFormItems()) { name = xe.AttribValue("name"); if (name == null) { continue; } if (name == "user") { value = _login; } else if (name == "password") { value = _password; } //else if (name == "sessidTime") // value = GetConnexionLifetime(_connexionLifetime); else { value = xe.AttribValue("value"); } if (!first) { content.Append('&'); } content.AppendFormat("{0}={1}", name, value); if (__trace) { if (name != "password") { pb.Trace.WriteLine(" {0}={1}", name, value); } else { pb.Trace.WriteLine(" {0}=xxx", name); } } first = false; } if (__trace) { pb.Trace.WriteLine(" form login url : \"{0}\"", loginUrl); pb.Trace.WriteLine(" form action : \"{0}\"", action); pb.Trace.WriteLine(" form method : {0}", httpMethod); //pb.Trace.WriteLine(" form values : {0}", content.ToString()); } http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = loginUrl, Method = httpMethod, Content = content.ToString() }, _requestParameters); xeSource = http.zGetXDocument().zXXElement(); //<div class="alert alert-success"> XXElement xeLogin = xeSource.XPathElement("//div[@class='alert alert-success']"); if (xeLogin.XElement == null) { throw new PBException("can't login to debrid-link.fr"); } }
//protected override GoldenDdl_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); GoldenDdl_PostDetail data = new GoldenDdl_PostDetail(); data.sourceUrl = loadDataFromWeb.request.Url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GetPostDetailKey(data.sourceUrl); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.category = xePost.XPathElements(".//div[@class='hdiin']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.category.ToLowerInvariant(); data.printType = GetPrintType(category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); //data.title = xePost.XPathValue(".//div[@class='bheading']//text()", DownloadPrint.Trim); data.title = xePost.XPathValue(".//div[@class='bheading']//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title); if (titleInfos.foundInfo) { data.originalTitle = data.title; data.title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } string date = xePost.XPathValue(".//div[@class='datenews']//text()"); data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date); } data.postAuthor = xePost.XPathValue(".//div[@class='argr']//a//text()"); XXElement xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (loadDataFromWeb.request.LoadImage) { data.images = DownloadPrint.LoadImages(data.images).ToArray(); } // get infos, description, language, size, nbPages //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.title); PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); List <string> downloadLinks = new List <string>(); foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } data.downloadLinks = downloadLinks.ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
protected override IPost GetData(LoadDataFromWeb_v4 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); Ebookdz_PostDetail data = new Ebookdz_PostDetail(); data.SourceUrl = loadDataFromWeb.WebRequest.HttpRequest.Url; data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; data.Id = GetPostDetailKey(loadDataFromWeb.WebRequest.HttpRequest); // <div class="body_bd"> XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']"); // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()", DownloadPrint.Trim); data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015 string lowerTitle = null; if (data.Title != null) { lowerTitle = data.Title.ToLowerInvariant(); } //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/"); data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return(text != "forum" && !text.EndsWith(lowerTitle)); }).Select(DownloadPrint.Trim).zToStringValues("/"); string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(category); //Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // <div id="postlist" class="postlist restrain"> XXElement xe = xePost.XPathElement(".//div[@id='postlist']"); // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues(""); date = date.Replace('\xA0', ' '); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date); } //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.Trim); data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars); // <div class="postbody"> xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (loadDataFromWeb.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.Description = textValues.description; data.Language = textValues.language; data.Size = textValues.size; data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
public void Connexion() { if (_connexionFile == null) { throw new PBException("DebriderDebridLink connexion file is null"); } //if (_serverTimeFile == null) // throw new PBException("DebriderDebridLink server time file is null"); pb.Trace.WriteLine("{0:dd-MM-yyyy HH:mm:ss} - new connexion to debrid-link.fr", DateTime.Now); if (__trace) { pb.Trace.WriteLine("DebriderDebridLink.Connexion() :"); } string url = __url + string.Format("/token/{0}/new", _publicKey); HttpRequestParameters requestParameters = new HttpRequestParameters { Encoding = Encoding.UTF8 }; DateTime dt = DateTime.Now; Http.Http http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = url }, requestParameters); BsonDocument result = BsonSerializer.Deserialize <BsonDocument>(http.ResultText); if (__trace) { pb.Trace.WriteLine(" result :"); pb.Trace.WriteLine(result.zToJson()); } DebridLinkConnexion connexion = new DebridLinkConnexion(); //DebridLinkServerTime serverTime = new DebridLinkServerTime(); connexion.ConnexionTime = dt; //string token = doc.zGet("value.token").zAsString(); connexion.Token = result.zGet("value.token").zAsString(); string validTokenUrl = result.zGet("value.validTokenUrl").zAsString(); //string key = doc.zGet("value.key").zAsString(); connexion.Key = result.zGet("value.key").zAsString(); int ts = result.zGet("ts").zAsInt(); connexion.ClientTime = dt; connexion.ServerTime = zdate.UnixTimeStampToDateTime(ts); connexion.ServerTimeGap = connexion.ServerTime - dt; connexion.ConnexionLifetime = _connexionLifetime; connexion.EndConnexionTime = connexion.ConnexionTime + GetConnexionTimespan(connexion.ConnexionLifetime) - TimeSpan.FromMinutes(5); if (__trace) { pb.Trace.WriteLine(" request time : \"{0:dd/MM/yyyy HH:mm:ss}\"", dt); pb.Trace.WriteLine(" result : \"{0}\"", result.zGet("result").zAsString()); pb.Trace.WriteLine(" token : \"{0}\"", connexion.Token); pb.Trace.WriteLine(" validTokenUrl : \"{0}\"", validTokenUrl); pb.Trace.WriteLine(" key : \"{0}\"", connexion.Key); pb.Trace.WriteLine(" server time : {0} - {1:dd/MM/yyyy HH:mm:ss}", ts, connexion.ServerTime); pb.Trace.WriteLine(" server time gap : {0}", connexion.ServerTimeGap); } // validTokenUrl : "https://secure.debrid-link.fr/user/2_2d481d8991e4db60f43d24d9d387b75699db7a0157182967/login" http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = validTokenUrl }, requestParameters); // <script>if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; }</script> // <form action='' method='POST' class='form-horizontal'> // <input type='text' class='form-control' name='user'> // <input type='password' class='form-control' name='password'> // <select name='sessidTime' class='form-control'> // <option value='all' selected='selected'> Toujours</option> // ... // </select> // <input type='hidden' value='10_a3a206c4398f195283a4843d44f017f3211275e443747173' name='token'> // <button type='submit' name='authorizedToken' value='1' class='btn btn-dl'>Envoyer</button> // <input type='submit' style='display:none'> XXElement xeSource = http.zGetXDocument().zXXElement(); // le script n'est plus là dans la page html 24/03/2015 // script : if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; } //string script = xeSource.XPathValue("//head//script//text()"); //if (script == null) //{ // //Trace.WriteLine("//head//script not found"); // //return; // throw new PBException("DebriderDebridLink.Connect() : //head//script not found"); //} //if (__trace) // pb.Trace.WriteLine(" script : \"{0}\"", script); //Regex rg = new Regex("top\\.location\\.href=[\"'](.*)[\"']", RegexOptions.CultureInvariant | RegexOptions.IgnoreCase); //Match match = rg.Match(script); //if (!match.Success) //{ // //Trace.WriteLine("top.location.href='...' not found in script"); // //return; // throw new PBException("DebriderDebridLink.Connect() : top.location.href='...' not found in script"); //} //url = match.Groups[1].Value; url = "https://secure.debrid-link.fr/login"; if (__trace) { pb.Trace.WriteLine(" login url : \"{0}\"", url); } XXElement xeForm = xeSource.XPathElement("//form"); string action = xeForm.AttribValue("action"); if (__trace) { pb.Trace.WriteLine(" form action : \"{0}\"", action); } if (action != null && action != "") { url = action; } HttpRequestMethod method = HttpTools.GetHttpRequestMethod(xeForm.AttribValue("method")); if (__trace) { pb.Trace.WriteLine(" form method : {0}", method); } StringBuilder content = new StringBuilder(); bool first = true; string name, value; foreach (XXElement xe in xeForm.DescendantFormItems()) { name = xe.AttribValue("name"); if (name == null) { continue; } if (name == "user") { value = _login; } else if (name == "password") { value = _password; } else if (name == "sessidTime") { value = GetConnexionLifetime(_connexionLifetime); } else { value = xe.AttribValue("value"); } if (!first) { content.Append('&'); } content.AppendFormat("{0}={1}", name, value); if (__trace) { if (name != "password") { pb.Trace.WriteLine(" {0}={1}", name, value); } else { pb.Trace.WriteLine(" {0}=xxx", name); } } first = false; } // "user=la_beuze&password=xxxxxx&sessidTime=all&token=10_56b51ee12ad5dabcac620230cda436cab94bd37154742765&authorizedToken=1" //if (__trace) // pb.Trace.WriteLine("content : \"{0}\"", content.ToString()); http = HttpManager.CurrentHttpManager.Load(new HttpRequest { Url = url, Method = method, Content = content.ToString() }, requestParameters); // <div class='panel-body'> // <div class='alert alert-success'> // La session a bien été activée. Vous pouvez utiliser l'application API Test // </div> // </div> xeSource = http.zGetXDocument().zXXElement(); //string loginMessage = xeSource.ExplicitXPathValue("//div[@class='panel-body']//text()"); string loginMessage = xeSource.ExplicitXPathValue("//div[@class='alert alert-success']//text()"); if (__trace) { pb.Trace.WriteLine(" login message : \"{0}\"", loginMessage); } //if (loginMessage == null || !loginMessage.Trim().StartsWith("La session a bien été activée", StringComparison.InvariantCultureIgnoreCase)) if (loginMessage == null || !loginMessage.Trim().StartsWith("Vous avez été connecté avec succès", StringComparison.InvariantCultureIgnoreCase)) { throw new PBException("DebriderDebridLink.Connect() : wrong login message \"{0}\"", loginMessage); } connexion.zSave(_connexionFile); _connexion = connexion; //serverTime.zSave(_serverTimeFile); //_serverTime = serverTime; }
protected override bool _MoveNext() { while (_xmlEnum.MoveNext()) { // xe = xeArticle.XPathElement("./header//a"); // url = xe.XPathValue("@href"); // title = xe.XPathValue(".//text()"); // xe = xeArticle.XPathElement(".//div[@class='entry_top']"); // xe2 = xe.XPathElement(".//img"); XXElement xeArticle = _xmlEnum.Current; _postHeader = new ZoneEbooksPostHeader(); _postHeader.sourceUrl = _sourceUrl; //<h2 class="title"> // <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html" // rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre"> // Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre</a> //</h2> XXElement xe = xeArticle.XPathElement(".//*[@class='title']//a"); _postHeader.url = xe.XPathValue("@href"); _postHeader.title = xe.XPathValue(".//text()"); //<div class="post-date"> // <span class="ext">Il y a 2 heures</span> //</div> string postDate = xeArticle.XPathValue(".//div[@class='post-date']//text()"); //WriteLine("post date \"{0}\"", postDate); //Il y a 57 secondes //Il y a 3 minutes //Il y a 1 heure //Il y a 1 jour //Il y a 2 semaines //Il y a 2 mois if (postDate != null) { _postHeader.infos.Add("postDate", new ZString(postDate)); } //<div class="post-info"> // <span class="a">par // <a href="http://zone-ebooks.com/author/admin" title="Articles par admin "> // admin // </a> // </span> // dans // <a href="http://zone-ebooks.com/category/journaux" rel="tag" title="Journaux (158 sujets)">Journaux</a> //</div> xe = xeArticle.XPathElement(".//div[@class='post-info']"); _postHeader.postAuthor = xe.XPathValue(".//a//text()"); _postHeader.category = xe.XPathValue("./a//text()"); //<div class="post-content clear-block"> xe = xeArticle.XPathElement(".//div[starts-with(@class, 'post-content')]"); //<img title="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre PDF" // alt="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre PDF" // src="http://i.imgur.com/f7aWDHF.jpg" width="362" height="446" /> //_postHeader.images = xe.XPathImages(".//img", _url, _imagesToSkip); //_postHeader.images = xe.XPathImages(_url, _imagesToSkip); //_postHeader.images = xe.XPathImages(_url, imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)); //_postHeader.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, _url), imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); //_postHeader.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, _url), imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); _postHeader.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, _url)).Where(imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList(); if (_loadImage) { pb.old.Http_v2.LoadImageFromWeb(_postHeader.images); } // image "infos sur le livre" http://i.imgur.com/GTPfRoB.png // image "description" http://i.imgur.com/Ruuh4CP.png //********************************************************************************************************************************************************************************** // pb image "infos sur le livre" // zone-ebooks.com_img_info_livre_02_02.html // zone-ebooks.com_img_info_livre_02_02.xml // <div style="text-align: center;"> // image ok // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/RHWAvUQ.jpg" /> // <p> // image "infos sur le livre" // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/GTPfRoB.png" /> // </p> // ... // <p> // image "description" // <img title="Florence Bellot, "Tresses et bracelets bresiliens"" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/Ruuh4CP.png" /> // </p> //********************************************************************************************************************************************************************************** //xe = xeArticle.XPathElement(".//div[@class='entry_top']"); //_postHeader.image = xe.XPathImage(".//img"); //foreach (string s in xe.XElement.zDescendantTexts()) // _postHeader.SetInfo(s); //if (_loadImage && _postHeader.image.Source != null) // Http2.LoadImageFromWeb(_postHeader.image.Source); //xe = xeArticle.XPathElement(".//footer"); return(true); } return(false); }
public static void Test_Connexion_01() { string exportDirectory = @"c:\pib\drive\google\dev_data\exe\runsource\download\sites\debrid-link.fr\model\login\new"; bool trace = DebridLinkFr_v2.Trace; DebridLinkFr_v2.Trace = false; Trace.WriteLine("test connexion to debrid-link.fr"); Trace.WriteLine(" export directory : \"{0}\"", exportDirectory); XmlConfig localConfig = GetLocalConfig(); string publicKey = localConfig.GetExplicit("DownloadAutomateManager/DebridLink/PublicKey"); Trace.WriteLine(" publicKey : \"{0}\"", publicKey); Trace.WriteLine(); string newTokenUrl = string.Format("https://debrid-link.fr/api/token/{0}/new", publicKey); string exportFile = "01_debrid-link.fr_api_new_token.txt"; Trace.WriteLine(" get new token key : \"{0}\"", newTokenUrl); Trace.WriteLine(" export to file : \"{0}\"", exportFile); DateTime requestTime = DateTime.Now; Http.Http http = Http.Http.LoadAsText(new HttpRequest { Url = newTokenUrl }, exportFile: zPath.Combine(exportDirectory, exportFile)); BsonDocument result = BsonDocument.Parse(http.ResultText); int serverTs = result.zGet("ts").zAsInt(); DateTime serverTime = zdate.UnixTimeStampToDateTime(serverTs); TimeSpan serverTimeGap = serverTime - requestTime; Trace.WriteLine(" server time : request time {0} server timestamp {1} server time {2} gap {3}", requestTime, serverTs, serverTime, serverTimeGap); Trace.WriteLine(" result :"); Trace.WriteLine(result.zToJson()); Trace.WriteLine(); string validTokenUrl = result.zGet("value.validTokenUrl").zAsString(); exportFile = "02_debrid-link.fr_api_valid_token.html"; Trace.WriteLine(" load valid token url : \"{0}\"", validTokenUrl); Trace.WriteLine(" export to file : \"{0}\"", exportFile); HttpRequestParameters httpRequestParameters = new HttpRequestParameters { Encoding = Encoding.UTF8 }; http = Http.Http.LoadAsText(new HttpRequest { Url = validTokenUrl }, httpRequestParameters, exportFile: zPath.Combine(exportDirectory, exportFile)); Trace.WriteLine(); string loginUrl = "https://debrid-link.fr/login"; exportFile = "03_debrid-link.fr_login.html"; Trace.WriteLine(" send login info : \"{0}\"", loginUrl); Trace.WriteLine(" export to file : \"{0}\"", exportFile); string content = string.Format("user={0}&password={1}&understand=true", localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Login"), localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Password")); string traceContent = string.Format("user={0}&password={1}&understand=true", "xxxxxx", "xxxxxx"); Trace.WriteLine(" content : \"{0}\"", traceContent); http = Http.Http.LoadAsText(new HttpRequest { Url = loginUrl, Method = HttpRequestMethod.Post, Content = content }, httpRequestParameters, exportFile: zPath.Combine(exportDirectory, exportFile)); Trace.WriteLine(); XXElement xe = http.zGetXDocument().zXXElement(); //<div class="alert alert-success"> xe = xe.XPathElement("//div[@class='alert alert-success']"); Trace.WriteLine(" verify login : {0}", xe.XElement != null ? "login ok" : "login error"); Trace.WriteLine(); string request = "/account/infos"; string urlRequest = "https://debrid-link.fr/api" + request; exportFile = "04_debrid-link.fr_account_infos.txt"; Trace.WriteLine(" get account infos : \"{0}\"", urlRequest); Trace.WriteLine(" export to file : \"{0}\"", exportFile); string key = result.zGet("value.key").zAsString(); DateTime time = DateTime.Now + serverTimeGap; int timestamp = zdate.DateTimeToUnixTimeStamp(time); string signature = DebridLinkFr_v2.GetSignature(timestamp, request, key); Trace.WriteLine(" signature : timestamp {0} request \"{1}\" key \"{2}\" signature \"{3}\"", timestamp, request, key, signature); string token = result.zGet("value.token").zAsString(); httpRequestParameters = new HttpRequestParameters { Encoding = Encoding.UTF8 }; httpRequestParameters.Headers["x-dl-token"] = token; httpRequestParameters.Headers["x-dl-sign"] = signature; httpRequestParameters.Headers["x-dl-ts"] = timestamp.ToString(); Trace.WriteLine(" set header : \"{0}\" = \"{1}\"", "x-dl-token", token); Trace.WriteLine(" set header : \"{0}\" = \"{1}\"", "x-dl-sign", signature); Trace.WriteLine(" set header : \"{0}\" = \"{1}\"", "x-dl-ts", timestamp); DateTime dt = DateTime.Now; http = Http.Http.LoadAsText(new HttpRequest { Url = urlRequest }, httpRequestParameters, exportFile: zPath.Combine(exportDirectory, exportFile)); result = BsonDocument.Parse(http.ResultText); // control server time int newTimestamp = result.zGet("ts").zAsInt(); DateTime newServerTime = zdate.UnixTimeStampToDateTime(newTimestamp); TimeSpan newServerTimeGap = newServerTime - dt; Trace.WriteLine(" new server time : {0} gap {1} timestamp {2} timestamp gap {3}", newServerTime, newServerTimeGap, newTimestamp, timestamp - newTimestamp); Trace.WriteLine(" result :"); Trace.WriteLine(result.zToJson()); Trace.WriteLine(); DebridLinkFr_v2.Trace = trace; }
//protected override Telechargementz_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb) protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); Telechargementz_PostDetail data = new Telechargementz_PostDetail(); data.SourceUrl = loadDataFromWeb.request.Url; data.LoadFromWebDate = loadDataFromWeb.loadFromWebDate; data.Id = GetPostDetailKey(data.SourceUrl); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); data.PostAuthor = xePost.XPathValue(".//div[@class='title-info']//a//text()"); // , 26.12.14 string date = xePost.XPathValue(".//div[@class='title-info']//a/following-sibling::text()"); if (date != null) { data.PostCreationDate = zdate.ParseDateTimeLikeToday(date.Trim(' ', ','), loadDataFromWeb.loadFromWebDate, "dd.MM.yy"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date); } } else { pb.Trace.WriteLine("creationDate not found \"{0}\"", data.SourceUrl); } //data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTextList(func: DownloadPrint.TrimFunc1).FirstOrDefault(); data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTexts().Select(DownloadPrint.Trim).FirstOrDefault(); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } XXElement xe = xePost.XPathElement(".//div[starts-with(@id, 'news-id-')]"); if (xe.XElement == null) { pb.Trace.WriteLine("element not found \".//div[starts-with(@id, 'news-id-')]\""); } //data.Images = new List<UrlImage>(); //data.Images.Add(xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault()); //data.Images = new UrlImage[] { xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault() }; WebImage image = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault(); if (image != null) { data.Images = new WebImage[] { image } } ; // force load image to get image width and height if (loadDataFromWeb.request.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray(); //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); //string category = data.category.ToLowerInvariant(); //data.printType = GetPrintType(category); ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); // get infos, description, language, size, nbPages // nodeFilter: not <a> and not <span> // nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span") // nodeFilter: not <a> //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); data.PrintType = PrintType.UnknowEBook; if (data.infos.ContainsKey("Bd") || data.infos.ContainsKey("bd") || data.infos.ContainsKey("BD")) { data.PrintType = PrintType.Comics; } // Editeur : Presse fr else if (data.infos.ContainsKey("editeur") && data.infos["editeur"] is ZString && ((string)data.infos["editeur"]).ToLowerInvariant() == "presse fr") { data.PrintType = PrintType.Print; } else if (data.infos.ContainsKey("isbn")) { data.PrintType = PrintType.Book; } //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span"), returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson()); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
private static Cdefi_Detail GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); Cdefi_Detail data = new Cdefi_Detail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetKey(webResult.WebRequest.HttpRequest); XXElement xeData = xeSource.XPathElement("//div[@id='body']//div[@class='wBloc']"); data.Institution = Cdefi.Trim(xeData.XPathValue(".//div[@class='schoolContentInfo']//div[@class='schoolContentInfo_infos verticalCenterToContainer']//h2/text()")); data.Address = Cdefi.Trim(xeData.XPathValue(".//div[@class='wPage'][1]//span[@class='editable']//text()")); foreach (string s in xeData.XPathElement(".//div[@class='wPage'][2]").DescendantTexts().Select(s => Cdefi.Trim(s))) { //string s2 = Cdefi.Trim(s); if (s.StartsWith("Nom du directeur", StringComparison.InvariantCultureIgnoreCase)) { int i = s.IndexOf(':'); if (i != -1) { data.Director = Cdefi.Trim(s.Substring(i + 1)); } } else if (s.StartsWith("Département", StringComparison.InvariantCultureIgnoreCase)) { int i = s.IndexOf(':'); if (i != -1) { data.Department = Cdefi.Trim(s.Substring(i + 1)); } } else if (s.StartsWith("Numéro de téléphone", StringComparison.InvariantCultureIgnoreCase)) { int i = s.IndexOf(':'); if (i != -1) { data.Tel = Cdefi.Trim(s.Substring(i + 1)); } } else if (s.StartsWith("Adresse email de contact", StringComparison.InvariantCultureIgnoreCase)) { int i = s.IndexOf(':'); if (i != -1) { data.Mail = Cdefi.Trim(s.Substring(i + 1)); } } else if (s.StartsWith("Adresse du site internet", StringComparison.InvariantCultureIgnoreCase)) { int i = s.IndexOf(':'); if (i != -1) { data.WebSite = Cdefi.Trim(s.Substring(i + 1)); } } else if (s.StartsWith("Nature de l'établissement", StringComparison.InvariantCultureIgnoreCase)) { int i = s.IndexOf(':'); if (i != -1) { data.InstitutionType = Cdefi.Trim(s.Substring(i + 1)); } } //else // pb.Trace.WriteLine("text : \"{0}\"", s); } if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
public static Http Login(XXElement xeSource) { XmlConfig localConfig = new XmlConfig(XmlConfig.CurrentConfig.GetExplicit("LocalConfig")); string login = localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Login"); //string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")); string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")).zToHex(lowercase: true); // <base href="http://www.ebookdz.com/forum/" /> string urlBase = xeSource.XPathValue("//head//base/@href"); //string urlBase = xeSource.XPathValue("//body//base/@href"); //Trace.WriteLine("urlBase : \"{0}\"", urlBase); XXElement xeForm = xeSource.XPathElement("//form[@id='navbar_loginform']"); if (xeForm.XElement == null) { //Trace.WriteLine("element not found \"//form[@id='navbar_loginform']\""); throw new PBException("element form not found \"//form[@id='navbar_loginform']\""); } //Trace.WriteLine("form action : \"{0}\"", xeForm.XPathValue("@action")); string urlForm = zurl.GetUrl(urlBase, xeForm.XPathValue("@action")); string method = xeForm.XPathValue("@method"); //Trace.WriteLine("urlForm : \"{0}\" method {1}", urlForm, method); StringBuilder sb = new StringBuilder(); bool first = true; foreach (XXElement xeInput in xeForm.XPathElements(".//input")) { string name = xeInput.XPathValue("@name"); if (name == null) { continue; } string value = null; if (name == "vb_login_username") { value = login; } else if (name == "vb_login_password") { value = null; } else if (name == "vb_login_md5password" || name == "vb_login_md5password_utf") { value = hashPassword; } else { value = xeInput.XPathValue("@value"); } if (!first) { sb.Append("&"); } sb.AppendFormat("{0}={1}", name, value); first = false; } string content = sb.ToString(); //Trace.WriteLine("content : \"{0}\"", content); HttpRequest httpRequest = new HttpRequest { Url = urlForm, Content = content, Method = Http.GetHttpRequestMethod(method) }; HttpRequestParameters httpRequestParameters = new HttpRequestParameters(); Http http = HttpManager.CurrentHttpManager.Load(httpRequest, httpRequestParameters); //xeSource = new XXElement(http.zGetXmlDocument().Root); //if (!IsLoggedIn(xeSource)) // throw new PBException("unable login to http://www.ebookdz.com/"); return(http); }
protected void InitXml() { _data = new Gesat_Company(); _data.url = _url; _data.loadFromWebDate = DateTime.Now; if (_header != null) { _data.name = _header.name; _data.type = _header.type; _data.location = _header.location; _data.phone = _header.phone; _data.infos = _header.infos; } // <div class="PAGES" id="content"> XXElement xe = _xeSource.XPathElement(".//div[@id='content']"); // <h1><span>ESAT BETTY LAUNAY-MOULIN VERT >></span><br />Coordonnées & activités</h1> //string s = xe.XPathValue(".//h1//text()", _trimFunc2); string s = _trimFunc2(xe.XPathValue(".//h1//text()")); //s = s.Trim(' ', '>'); if (!s.Equals(_data.name, StringComparison.InvariantCultureIgnoreCase)) { _data.headerName = _data.name; _data.name = s; } // <div class="BLOC B100 ACCROCHE"> // <div class="CONTENU-BLOC">Cet E.S.A.T. est ouvert depuis 1989 et accueille 55 personnes reconnues travailleurs handicapés. Il est situé dans la ville de // <a href="/Gesat/Hauts-de-Seine,92/Bois-Colombes,35494/" title="Bois-Colombes // Les ESAT et EA de la ville">Bois-Colombes</a> ( // <a href="/Gesat/Hauts-de-Seine,92/" title="Hauts-de-Seine // Les ESAT et EA du département">Hauts-de-Seine</a>) // </div></div> _data.descryption = xe.XPathConcatText(".//div[@class='BLOC B100 ACCROCHE']//text()", resultFunc: _trimFunc1); _data.descryption = _data.descryption.Replace("\r", ""); _data.descryption = _data.descryption.Replace("\n", ""); _data.descryption = _data.descryption.Replace("\t", ""); //_data.city = xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[1]//text()", _trimFunc1); _data.city = _trimFunc1(xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[1]//text()")); //_data.department = xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[2]//text()", _trimFunc1); _data.department = _trimFunc1(xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[2]//text()")); // <div class="ADRESSE">78, RUE RASPAIL<br />92270 Bois-Colombes</div> _data.address = xe.XPathConcatText(".//div[@class='ADRESSE']//text()", " ", itemFunc: _trimFunc1); _data.address = _data.address.Replace("\r", ""); _data.address = _data.address.Replace("\n", ""); _data.address = _data.address.Replace("\t", ""); // <div class="TEL">01 47 86 11 48</div> //s = xe.XPathValue(".//div[@class='TEL']//text()", _trimFunc1); s = _trimFunc1(xe.XPathValue(".//div[@class='TEL']//text()")); if (!s.Equals(_data.phone, StringComparison.InvariantCultureIgnoreCase)) { _data.headerPhone = _data.phone; _data.phone = s; } // <div class="FAX">01 47 82 42 64</div> //_data.fax = xe.XPathValue(".//div[@class='FAX']//text()", _trimFunc1); _data.fax = _trimFunc1(xe.XPathValue(".//div[@class='FAX']//text()")); // <div class="EMAIL">production.launay<img border="0" alt="arobase.png" src="/images/bulles/arobase.png" style=" border: 0;" />lemoulinvert.org</div> _data.email = xe.XPathConcatText(".//div[@class='EMAIL']//text()", "@", itemFunc: _trimFunc1); // <div class="WWW"><a href="http://www.esat-b-launay.com" target="_blank">www.esat-b-launay.com</a></div> //_data.webSite = xe.XPathValue(".//div[@class='WWW']//a/@href", _trimFunc1); _data.webSite = _trimFunc1(xe.XPathValue(".//div[@class='WWW']//a/@href")); // <div class="BLOC-FICHE BLOC-ACTIVITES"> // <dl><dt>Conditionnement, travaux à façon</dt></dl> // <dl><dt>Assemblage, montage</dt></dl> // <dl><dt>Mise sous pli, mailing, routage</dt></dl> // <dl><dt>Toutes activités en entreprise </dt></dl> // <dl><dt>Numérisation, saisie informatique</dt></dl> // <dl><dt>Remplissage, ensachage, flaconnage</dt></dl> // <dl><dt>Etiquetage, codage, badges</dt></dl> // <dl><dt>Secrétariat, travaux administratifs</dt></dl> // <dl><dt>Artisanats divers</dt></dl> // </div> //_data.activities = xe.XPathValues(".//div[@class='BLOC-FICHE BLOC-ACTIVITES']//dl//text()", _trimFunc1); _data.activities = xe.XPathValues(".//div[@class='BLOC-FICHE BLOC-ACTIVITES']//dl//text()").Select(_trimFunc1).ToArray(); }
// detail get data protected override Test_PostDetail GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); Test_PostDetail data = new Test_PostDetail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = _GetDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); //string[] dates = xe.DescendantTextList(".//td[@id='head-date']", func: Vosbooks.TrimFunc1).ToArray(); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, _lastPostDate); if (data.PostCreationDate != null) { _lastPostDate = new Date(data.PostCreationDate.Value); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); } data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); xe = xePost.XPathElement(".//div[@class='entry']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; // force load image to get image width and height //if (webResult.WebRequest.LoadImage) // data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase)) if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) { return(XNodeFilter.Stop); } } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") { return(XNodeFilter.Stop); } } return(XNodeFilter.SelectNode); } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) { return(XNodeFilter.DontSelectNode); } XElement xe2 = (XElement)node; if (xe2.Name == "a") { return(XNodeFilter.SelectNode); } if (xe2.Name != "p") { return(XNodeFilter.DontSelectNode); } XAttribute xa = xe2.Attribute("class"); if (xa == null) { return(XNodeFilter.DontSelectNode); } if (xa.Value != "submeta") { return(XNodeFilter.DontSelectNode); } //return XNodeFilter.SkipNode; return(XNodeFilter.Stop); }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
public static IKeyData <int> GetForumHeaderPageData(LoadDataFromWeb_v4 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root); string url = loadDataFromWeb.WebRequest.HttpRequest.Url; Ebookdz_HeaderPage data = new Ebookdz_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; //data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest); // <div id="above_threadlist" class="above_threadlist"> // <div class="threadpagenav"> // <span class="prev_next"> // <a rel="next" href="forumdisplay.php?f=74&page=2&s=4807e931448c05da34dd54fbd0308479" title="Page suivante - Résultats de 21 à 40 sur 66"> data.UrlNextPage = GetUrl(zurl.GetUrl(url, xeSource.XPathValue("//div[@id='above_threadlist']//span[@class='prev_next']//a[@rel='next']/@href"))); // <div class="body_bd"> XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']"); // <div id="breadcrumb" class="breadcrumb"> // <ul class="floatcontainer"> // <li class="navbit"> // Forum / Journaux / Presse quotidienne / Autres Journaux // <div id="threadlist" class="threadlist"> // <ol id="threads" class="threads"> IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li"); List <Ebookdz_PostHeader> headers = new List <Ebookdz_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { Ebookdz_PostHeader header = new Ebookdz_PostHeader(); header.SourceUrl = url; header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate; // <div class="threadinfo" title=""> // <div class="inner"> // <a title="" class="title" href="showthread.php?t=111210&s=4807e931448c05da34dd54fbd0308479" id="thread_title_111210">L'OPINION du mardi 20 janvier 2015</a> XXElement xe = xeHeader.XPathElement(".//div[@class='threadinfo']//a[@class='title']"); header.Title = xe.XPathValue(".//text()"); header.UrlDetail = GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe.XPathValue("@href"))); //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); //// Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); //xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList(); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.PostHeaders = headers.ToArray(); //return (IEnumDataPages_new2<int, IHeaderData_new>)data; return((IKeyData <int>)data); }
protected override TelechargementPlus_PostDetail GetData() { XXElement xeSource = new XXElement(GetXmlDocument().Root); TelechargementPlus_PostDetail data = new TelechargementPlus_PostDetail(); data.sourceUrl = Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); XXElement xe = xePost.XPathElement(".//div[@class='heading']//div[@class='binner']"); //data.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(data.infos, xe.XPathValue(".//text()"))); // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1) data.title = TelechargementPlus.ExtractTextValues(data.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()"))); data.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//a//text()")); //data.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); data.category = xe.XPathElements(".//div[@class='storeinfo']").DescendantTexts().Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //TelechargementPlus_Print print = new TelechargementPlus_Print(); //print.url = Url; //print.loadFromWebDate = DateTime.Now; //data.infos.SetValues(data.infos); //<div class="base"> // <div class="heading"> // <div class="binner"> // <h1> // Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct] Gratuit</h1> // <div class="storeinfo"> // <a href="http://www.telechargement-plus.com/2013/10/14/">Aujourd'hui, 11:59</a> // | Catégorie: // <a href="http://www.telechargement-plus.com/e-book-magazines/">E-Book / Magazines</a>, // <a href="http://www.telechargement-plus.com/e-book-magazines/journaux/">Journaux</a>, // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">Magazines</a> // <!-- | Views: 16--> // </div> // </div> // </div> // <div class="maincont"> // <div class="binner"> // <div class="shortstory"> // <div class="story-text"> // <center> // <span id="post-img"> // <img src="/templates/film-gratuit/images/prez/livre.png" alt="E-Book / Magazines, Journaux, Magazines" /> // </span> // </center> // <span id="post-img"> // <div style="text-align: center;"> // <br /> // <!--dle_image_begin:http://www.hapshack.com/images/TX72Y.jpg|--> // <img src="http://www.hapshack.com/images/TX72Y.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // <br /> // <br /> // <b>Editeur :</b> Presse Fr<br /> // <b>Date de sortie :</b> 2013 // <br /> // <b>H�bergeur : </b>Multi / // <b> // <!--colorstart:#FF0000--> // <span style="color: #FF0000"> // <!--/colorstart--> // [Link Direct]<!--colorend--> // </span><!--/colorend--> // </b> // <br /> // <br /> // <!--dle_image_begin:http://prezup.eu/prez/infossurlebook.png|--> // <img src="http://prezup.eu/prez/infossurlebook.png" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // <br /> // <br /> // <b>Advanced Cr�ation Photoshop HS�rie N�19 - Novembre 2013 [Lien Direct]</b> // <br /> // PDF | French | 186 pages | 100 MB<br /> // <br /> // <b>Le CD | zipper/22 Fichiers &+ | 520 MB</b><br /> // 37 Projets complets<br /> // SAVOIR TOUT FAIRE : Avec Photoshop Volume XIII<br /> // SPECIAL PHOTOMONTAGE & PEINTURE NUMERIQUE<br /> // BONUS : 2 Tutoriels Illustrator<br /> // / / / // <br /> // <br /> // </div> // </span> // <span id="post-img"> // <div id="news-id-86887" style="display: inline;"> // *<br /> // *<br /> // *<br /> // <div style="text-align: center;"> // <b> // <!--sizestart:6--> // <span style="font-size: 24pt;"> // <!--/sizestart--> // <!--colorstart:#FF6600--> // <span style="color: #FF6600"> // <!--/colorstart--> // Cloudzer<!--colorend--> // </span><!--/colorend--><!--sizeend--> // </span><!--/sizeend--> // = // <!--colorstart:#FF0000--> // <span style="color: #FF0000"> // <!--/colorstart--> // [Link Direct]<!--colorend--> // </span><!--/colorend--> // </b> // <br /> // <br /> // <a href="http://clz.to/q83zrwga" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/0THnp.gif|--> // <img src="http://www.hapshack.com/images/0THnp.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <a href="http://ul.to/ukqruco3" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/9MfYk.gif|--> // <img src="http://www.hapshack.com/images/9MfYk.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // <a href="http://hulkfile.eu/gap3aafrlmaj.html" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/Js84x.jpg|--> // <img src="http://www.hapshack.com/images/Js84x.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // <a href="http://turbobit.net/blki3znuvzeg.html" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/QYeW0.gif|--> // <img src="http://www.hapshack.com/images/QYeW0.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // *<br /> // *<br /> // <b>Le CD &+ : </b> // <br /> // http://clz.to/o58urag6<br /> // http://ul.to/rpqjypm4<br /> // http://hulkfile.eu/i2k3bbz835zg.html<br /> // http://turbobit.net/v644k3dd8izl.html<br /> // <br /> // <br /> // Bonne lecture<br /> // ************* // </div> // </div> // </span> //XXElement xe = _xePost.XPathElement(".//div[@class='heading']//div[@class='binner']"); //_post.title = _print.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(_print.infos, xe.XPathValue(".//text()"))); //string postDate = xe.XPathValue(".//a//text()"); ////WriteLine("postDate : \"{0}\"", postDate); //// Aujourd'hui, 17:13 ////if (postDate != null) //// _print.infos.SetValue("postDate", new ZString(postDate)); ////_print.creationDate = FrboardPrint.GetDateTime(date.Trim(_trimAll), time.Trim(_trimAll)); //_post.creationDate = TelechargementPlus.ParseDateTime(postDate); //_print.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(s => TelechargementPlus.TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //print.title = data.title; //print.category = data.category; xe = xePost.XPathElement(".//div[@class='maincont']//div[@class='binner']//div[@class='story-text']"); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, TelechargementPlus.ImagesToSkip, node => node is XElement && ((XElement)node).Name == "a" ? false : true); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), node => node is XElement && ((XElement)node).Name == "a" ? false : true); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), // node => node is XElement && ((XElement)node).Name == "a" ? false : true).ToList(); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), // node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode).ToList(); data.images = xe.XPathElements(".//span[@id='post-img']") .DescendantNodes(node => XmlDescendant.ImageFilter(node, node2 => node2 is XElement && ((XElement)node2).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode)) .Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, Url)) .Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)) .ToList(); if (_loadImage) { pb.old.Http_v2.LoadImageFromWeb(data.images); } //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); data.SetTextValues(xe.XPathElements(".//span[@id='post-img']").DescendantTexts(node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode)); data.downloadLinks.AddRange(xe.XPathValues(".//span[@id='post-img']//a/@href")); ////<h1 class="shd"> //// <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html"> //// [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013 //// </a> ////</h1> //XXElement xe = xePost.XPathElement(".//*[@class='shd']//a"); //_print.url = xe.XPathValue("@href"); //_print.title = TrimString(ExtractTextValues(xe.XPathValue(".//text()"))); ////<div class="shdinf"> //// <div class="shdinf"> //// <span class="rcol">Auteur: //// <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/"> //// bakafa //// </a> //// </span> //// <span class="date"> //// <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b> //// </span> //// <span class="lcol">Catégorie: //// <a href="http://www.telechargement-plus.com/e-book-magazines/"> //// E-Book / Magazines //// </a> » //// <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/"> //// Magazines //// </a> //// </span> //// </div> ////</div> //xe = xePost.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); //_print.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()"); //string postDate = xe.XPathValue(".//span[@class='date']//text()"); //// Aujourd'hui, 17:13 //if (postDate != null) // _print.infos.SetValue("postDate", new ZString(postDate)); //_print.category = xe.DescendantTextList(".//span[@class='lcol']").Select(s => TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); ////.zForEach(s => s.Trim()) ////<span id="post-img"> //// <div id="news-id-86236" style="display: inline;"> //// <div style="text-align: center;"> //// <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|--> //// <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" //// title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end--> //// <br /> //// <b> //// <br /> //// Ici Paris N°3562 - 9 au 15 Octobre 2013<br /> //// French | 52 pages | HQ PDF | 101 MB //// </b> //// <br /> //// <br /> //// Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font //// l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les //// coulisses de la télé) indiscrétions, potins.<br /> //// </div> //// </div> ////</span> //xe = xePost.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); //_print.images = xe.XPathImages(".//img", _imagesToSkip); //if (_loadImage) // Http2.LoadImageFromWeb(_print.images); return(data); }
// detail get data protected override MagazinesGratuits_PostDetail GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); MagazinesGratuits_PostDetail data = new MagazinesGratuits_PostDetail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Key = _GetDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, _lastPostDate); if (data.PostCreationDate != null) { _lastPostDate = new Date(data.PostCreationDate.Value); } if (__trace) { pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); } data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Ebooks en Epub / Livre data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); xe = xePost.XPathElement(".//div[@class='entry']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; // force load image to get image width and height if (webResult.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } // get infos, description, language, size, nbPages // xe.DescendantTextList(".//p") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase)) if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) { return(XNodeFilter.Stop); } } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") { return(XNodeFilter.Stop); } } return(XNodeFilter.SelectNode); } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.DescendantNodes( // node => // { // if (!(node is XElement)) // return true; // XElement xe2 = (XElement)node; // if (xe2.Name != "p") // return true; // XAttribute xa = xe2.Attribute("class"); // if (xa == null) // return true; // if (xa.Value != "submeta") // return true; // return false; // }, // node => node is XElement && ((XElement)node).Name == "a") // .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) { return(XNodeFilter.DontSelectNode); } XElement xe2 = (XElement)node; if (xe2.Name == "a") { return(XNodeFilter.SelectNode); } if (xe2.Name != "p") { return(XNodeFilter.DontSelectNode); } XAttribute xa = xe2.Attribute("class"); if (xa == null) { return(XNodeFilter.DontSelectNode); } if (xa.Value != "submeta") { return(XNodeFilter.DontSelectNode); } //return XNodeFilter.SkipNode; return(XNodeFilter.Stop); }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); //// <div id="postlist" class="postlist restrain"> //xe = xePost.XPathElement(".//div[@id='postlist']"); //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //date = date.Replace('\xA0', ' '); //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); //if (data.PostCreationDate == null) // pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1); //// <div class="postbody"> //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); //// get infos, description, language, size, nbPages //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); //data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; //data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.XPathValues(".//a/@href"); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
// detail get data protected override TelechargerMagazine_PostDetail GetDetailData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); TelechargerMagazine_PostDetail data = new TelechargerMagazine_PostDetail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetDetailKey(webResult.WebRequest.HttpRequest); // la date est juste la date du jour // <div id="calendar-layer"> // <table id="calendar" cellpadding="3" class="calendar"> // ... // <tr> // ... // <td class="day-active-v day-current" ><a class="day-active-v" href="http://www.telecharger-magazine.com/2015/07/17/" title="Article posté dans 17 Juillet 2015">17</a></td> // ... // </tr> // ... // </table> // </div> // <div id='dle-content'> // ... // <div class="right-full"> // // <div class="cat_name"> // Posted in: // <a href="http://www.telecharger-magazine.com/journaux/">Journaux</a> // </div> // // <h2 class="title"> // <img src="/templates/MStarter/images/title.png" alt="" class="img" /> // Journaux Français Du 17 Juillet 2015 // </h2> // // <div class="contenttext"> // la date est juste la date du jour // http://www.telecharger-magazine.com/2015/07/17/ //xeSource.XPathValue("//div[@id='calendar-layer']//table[@id='calendar']//td[@class='day-active-v day-current']//a/@href"); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']//div[@class='right-full']"); // Journaux data.Category = xePost.XPathValues(".//div[@class='cat_name']//a/text()").Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); data.Title = xePost.XPathValue(".//h2[@class='title']//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } XXElement xeContent = xePost.XPathElement(".//div[@class='contenttext']"); data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xeContent.XPathValue(".//img/@src"))) }; // force load image to get image width and height //if (webResult.WebRequest.LoadImageFromWeb) // data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xeContent.DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); if (text.ToLowerInvariant() == "description") { return(XNodeFilter.DontSelectNode); } } if (node is XElement) { XElement xe = (XElement)node; if (xe.Name == "a") { return(XNodeFilter.Stop); } } return(XNodeFilter.SelectNode); } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title, extractValuesFromText: false); data.Description = textValues.description; data.Infos.SetValues(textValues.infos); data.DownloadLinks = xeContent.DescendantNodes( node => { if (!(node is XElement)) { return(XNodeFilter.DontSelectNode); } XElement xe2 = (XElement)node; if (xe2.Name == "a") { return(XNodeFilter.SelectNode); } if (xe2.Name != "p") { return(XNodeFilter.DontSelectNode); } XAttribute xa = xe2.Attribute("class"); if (xa == null) { return(XNodeFilter.DontSelectNode); } if (xa.Value != "submeta") { return(XNodeFilter.DontSelectNode); } //return XNodeFilter.SkipNode; return(XNodeFilter.Stop); }) .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xeContent.XPathValues(".//a/@href").ToArray(); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
private static ExtremeDown_PostDetail_v2 GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); ExtremeDown_PostDetail_v2 data = new ExtremeDown_PostDetail_v2(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPostDetailKey(webResult.WebRequest.HttpRequest); XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); //data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()", DownloadPrint.Trim); data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } XXElement xeDiv = xePost.XPathElement(".//div[@class='blockheader']"); data.Category = xeDiv.XPathValues(".//i[@class='icon-cats']/ancestor::span//a//text()").Select(DownloadPrint.Trim).zToStringValues("/"); //string category = data.Category.ToLowerInvariant(); data.PrintType = GetPrintType(data.Category); data.PostAuthor = xeDiv.XPathValue(".//span/i[@class='icon-user']/ancestor::span//a//text()"); string date = xeDiv.XPathValue(".//span/i[@class='icon-date']/ancestor::span//a//text()"); data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.PostCreationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date); } xeDiv = xePost.XPathElement(".//div[@class='blockcontent']"); List <string> description = new List <string>(); description.AddRange(xeDiv.XPathValues(".//p[@class='release-name']//text()")); //data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); // force load image to get image width and height if (webResult.WebRequest.LoadImage) { data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); } description.AddRange(xeDiv.XPathValues(".//table//td//blockquote//text()")); //xeDiv = xePost.XPathElement(".//div[@class='clearfix']"); xeDiv = xePost.XPathElement(".//div[@class='upload-infos clearfix']"); description.AddRange(xeDiv.XPathValues(".//table//text()")); data.Description = description.ToArray(); string title = null; // xePost.XPathElements(".//script/parent::div//following-sibling::h2") Func <XXElement, XNodeFilter> filter = xe => { if (xe.XElement.Name == "h2") { title = xe.XPathValue(".//text()"); } else if (xe.XElement.Name == "script") { return(XNodeFilter.Stop); } else if (xe.XElement.Name == "div") { return(XNodeFilter.SelectNode); } return(XNodeFilter.DontSelectNode); }; foreach (XXElement xe in xePost.XPathElements(".//div[@class='prez_2']//following-sibling::*").zFilterElements(filter)) { //string s = xe.XPathValue(".//text()"); //// Liens de téléchargement - Pack 1 //if (s.StartsWith("Liens de téléchargement")) //{ // s = s.Substring(23).Trim(' ', '-'); // if (s == "") // s = title; // else if (title != null) // s = title + " - " + s; // title = null; data.DownloadLinks_new.AddItem(title); title = null; //foreach (XXElement xe2 in xe.XPathElements("following-sibling::div[1]//a")) foreach (XXElement xe2 in xe.XPathElements(".//a")) { //s = xe2.DescendantTextList().FirstOrDefault(); // <strong class="hebergeur"> string server = xe2.XPathValue(".//strong[@class='hebergeur']//text()"); string link = xe2.XPathValue("@href"); if (__getLinksExtremeProtect && __extremeProtect.IsLinkProtected(link)) { data.DownloadLinks_new.AddServer(server, link); data.DownloadLinks_new.AddLinks(__extremeProtect.UnprotectLink(link)); } else { data.DownloadLinks_new.AddServer(server); data.DownloadLinks_new.AddLink(link); } } //} //else if (s != null) // title = s; } //xeDiv = xePost.XPathElement(".//div[@class='blockfooter links']"); ////data.category = xeDiv.DescendantTextList(".//i[@class='icon-cats']/parent::span//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/"); //data.Category = xeDiv.XPathElements(".//i[@class='icon-cats']/parent::span//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); //string category = data.Category.ToLowerInvariant(); //data.PrintType = GetPrintType(category); ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
public static void Test_Login_01(string url) { XmlConfig localConfig = new XmlConfig(XmlConfig.CurrentConfig.GetExplicit("LocalConfig")); string login = localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Login"); string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")).zToHex(lowercase: true); string urlSite = "http://www.ebookdz.com/"; HttpRequestParameters_v1 requestParameters = new HttpRequestParameters_v1(); pb.old.Http_v2.LoadUrl(urlSite, requestParameters); XXElement xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root); Trace.WriteLine("Login : \"{0}\"", Test_GetLogin_01(xeSource)); Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource)); // <base href="http://www.ebookdz.com/forum/" /> string urlBase = xeSource.XPathValue("//head//base/@href"); //string urlBase = xeSource.XPathValue("//body//base/@href"); Trace.WriteLine("urlBase : \"{0}\"", urlBase); XXElement xeForm = xeSource.XPathElement("//form[@id='navbar_loginform']"); if (xeForm.XElement == null) { Trace.WriteLine("element not found \"//form[@id='navbar_loginform']\""); return; } Trace.WriteLine("form action : \"{0}\"", xeForm.XPathValue("@action")); string urlForm = zurl.GetUrl(urlBase, xeForm.XPathValue("@action")); string method = xeForm.XPathValue("@method"); Trace.WriteLine("urlForm : \"{0}\" method {1}", urlForm, method); StringBuilder sb = new StringBuilder(); bool first = true; foreach (XXElement xeInput in xeForm.XPathElements(".//input")) { string name = xeInput.XPathValue("@name"); if (name == null) { continue; } string value = null; if (name == "vb_login_username") { value = login; } else if (name == "vb_login_password") { value = null; } else if (name == "vb_login_md5password" || name == "vb_login_md5password_utf") { value = hashPassword; } else { value = xeInput.XPathValue("@value"); } if (!first) { sb.Append("&"); } sb.AppendFormat("{0}={1}", name, value); first = false; } string content = sb.ToString(); Trace.WriteLine("content : \"{0}\"", content); requestParameters.content = content; requestParameters.method = Http.GetHttpRequestMethod(method); pb.old.Http_v2.LoadUrl(urlForm, requestParameters); //CookieCollection cookies = requestParameters.cookies.GetCookies(new Uri(urlSite)); //Trace.WriteLine("cookies :"); //Trace.WriteLine(cookies.zToJson()); requestParameters.method = HttpRequestMethod.Get; requestParameters.content = null; pb.old.Http_v2.LoadUrl(url, requestParameters); xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root); Trace.WriteLine("Login : \"{0}\"", Test_GetLogin_01(xeSource)); Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource)); string cookiesFile = Path.Combine(XmlConfig.CurrentConfig.GetExplicit("Ebookdz/CookiesDir"), "cookies.txt"); Trace.WriteLine("save cookies to \"{0}\"", cookiesFile); //zfile.CreateFileDirectory(cookiesFile); //CookieCollection cookies = requestParameters.cookies.GetCookies(new Uri(urlSite)); //cookies.zSave(cookiesFile); zcookies.SaveCookies(requestParameters.cookies, urlSite, cookiesFile); //cookies = requestParameters.cookies.GetCookies(new Uri(urlSite)); //Trace.WriteLine("cookies :"); //Trace.WriteLine(cookies.zToJson()); }
protected void _GetDetailData(XXElement xeSource, Vosbooks_PostDetail_v6 data) { // <div id="page"> // <div id="wrapper"> // <table id="layout"> // <tr>...</tr> // <tr> // <td class="sidebars">...</td> // <td> // <div id="left-col"> // <div id="content-padding"> // <div id="content"> // ... // <div class="post" id="post-74299" style="margin-top: 0;"> // // <table id="post-head"> // <tr> // <td id="head-date"> // <div class="date"><span>jan</span> 29</div> // </td> // <td> // <div class="title"> // <h2><a href="http://www.vosbooks.net/74299-livre/les-imposteurs-francois-cavanna.html" rel="bookmark" title="Les imposteurs – François Cavanna" >Les imposteurs – François Cavanna </a></h2> // <div class="postdata"> // <span class="category"> // <a href="http://www.vosbooks.net/category/livre/ebooks-epub" rel="category tag">Ebooks en Epub</a>, // <a href="http://www.vosbooks.net/category/livre" rel="category tag">Livre</a> // </span> // </div> // </div> // </td> // </tr> // </table> // // <div class="entry"> // ... // <p style="text-align: center;"> // <img class="alignnone" src="http://imageshack.com/a/img538/3859/6JXSxu.jpg" alt="Les imposteurs – François Cavanna" title="Les imposteurs – François Cavanna" height="540" width="420" /> // </p> // </tr> XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']"); XXElement xe = xePost.XPathElement(".//table[@id='post-head']"); string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray(); data.PostCreationDate = GetDate(dates, _lastPostDate); if (data.PostCreationDate != null) _lastPostDate = new Date(data.PostCreationDate.Value); //if (__trace) // pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues()); data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title); if (titleInfos.foundInfo) { data.OriginalTitle = data.Title; data.Title = titleInfos.title; data.Infos.SetValues(titleInfos.infos); } // Ebooks en Epub / Livre data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/"); data.PrintType = GetPrintType(data.Category); //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType); xe = xePost.XPathElement(".//div[@class='entry']"); //data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) }; //string urlImage = xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"); string urlImage = xe.XPathValue("h3/following-sibling::p/img/@src"); if (urlImage != null) data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, urlImage)) }; // force load image to get image width and height //if (webResult.WebRequest.LoadImageFromWeb) // data.Images = DownloadPrint.LoadImages(data.Images).ToArray(); // get infos, description, language, size, nbPages // xe.DescendantTextList(".//p") PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues( xe.XPathElements(".//p").DescendantTexts( node => { if (node is XText) { string text = ((XText)node).Value.Trim(); if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase)) return XNodeFilter.Stop; } if (node is XElement) { XElement xe2 = (XElement)node; if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta") return XNodeFilter.Stop; } return XNodeFilter.SelectNode; } ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title); data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.DescendantNodes( // node => // { // if (!(node is XElement)) // return true; // XElement xe2 = (XElement)node; // if (xe2.Name != "p") // return true; // XAttribute xa = xe2.Attribute("class"); // if (xa == null) // return true; // if (xa.Value != "submeta") // return true; // return false; // }, // node => node is XElement && ((XElement)node).Name == "a") // .Select(node => ((XElement)node).Attribute("href").Value).ToArray(); data.DownloadLinks = xe.DescendantNodes( node => { if (!(node is XElement)) return XNodeFilter.DontSelectNode; XElement xe2 = (XElement)node; if (xe2.Name == "a") return XNodeFilter.SelectNode; if (xe2.Name != "p") return XNodeFilter.DontSelectNode; XAttribute xa = xe2.Attribute("class"); if (xa == null) return XNodeFilter.DontSelectNode; if (xa.Value != "submeta") return XNodeFilter.DontSelectNode; //return XNodeFilter.SkipNode; return XNodeFilter.Stop; }) .Select(node => ((XElement)node).Attribute("href").Value).Where(zurl.CheckUrl).ToArray(); //// <div id="postlist" class="postlist restrain"> //xe = xePost.XPathElement(".//div[@id='postlist']"); //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09 ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']"); //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues(""); //date = date.Replace('\xA0', ' '); //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm"); //if (data.PostCreationDate == null) // pb.Trace.WriteLine("unknow post creation date \"{0}\"", date); //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1); //// <div class="postbody"> //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div"); //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray(); //// get infos, description, language, size, nbPages //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title); //data.Description = textValues.description; //data.Language = textValues.language; //data.Size = textValues.size; //data.NbPages = textValues.nbPages; //data.Infos.SetValues(textValues.infos); //data.DownloadLinks = xe.XPathValues(".//a/@href"); //if (__trace) // pb.Trace.WriteLine(data.zToJson()); }
protected override RapideDdl_PostDetail GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); RapideDdl_PostDetail data = new RapideDdl_PostDetail(); data.sourceUrl = loadDataFromWeb.request.Url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = GetPostDetailKey(data.sourceUrl); XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']"); //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(DownloadPrint.TrimFunc1).Where( data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(DownloadPrint.Trim).Where( s => { s = s.ToLowerInvariant(); return(s != "" && !s.Contains("acceuil") && !s.Contains("accueil")); } ).zToStringValues("/"); string category = data.category.ToLowerInvariant(); data.printType = GetPostType(category); //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: DownloadPrint.TrimFunc1).LastOrDefault(); data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(DownloadPrint.Trim).LastOrDefault(); //ExtractTitleInfos(data); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title); if (titleInfos.foundInfo) { data.originalTitle = data.title; data.title = titleInfos.title; data.infos.SetValues(titleInfos.infos); } XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']"); string date = xe.XPathValue(".//span[@class='date']//text()"); //data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(date, loadDataFromWeb.loadFromWebDate); data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (data.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date); } data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); xe = xePost.XPathElement(".//div[@class='maincont']"); //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray(); if (loadDataFromWeb.request.LoadImage) { data.images = DownloadPrint.LoadImages(data.images).ToArray(); } //RapideDdl.SetTextValues(data, xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a" )); // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title); data.description = textValues.description; data.language = textValues.language; data.size = textValues.size; data.nbPages = textValues.nbPages; data.infos.SetValues(textValues.infos); List <string> downloadLinks = new List <string>(); foreach (XXElement xe2 in xe.XPathElements("div/div")) { // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif // http://pixhst.com/pictures/3029467 downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif"))); } data.downloadLinks = downloadLinks.ToArray(); //if (__trace) // RapideDdl_LoadPostDetail.Trace_RapideDdl_PostDetail(data); return(data); }
private static OnisepInstitution_Detail GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); OnisepInstitution_Detail data = new OnisepInstitution_Detail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetKey(webResult.WebRequest.HttpRequest); XXElement xeData = xeSource.XPathElement("//div[@id='oni_content-page']//div[@class='oni_innerContent']//div[@id='oni_zoom-block']"); data.Institution = OnisepInstitution.Trim(xeData.XPathValue(".//h1/text()")); // <span class="oni_span-title">Code UAI : 0062080D</span> string s = OnisepInstitution.Trim(xeData.XPathValue(".//span[@class='oni_span-title']/text()")); if (s != null && s.StartsWith("Code UAI :", StringComparison.InvariantCultureIgnoreCase)) { data.UAICode = OnisepInstitution.Trim(s.Substring(10)); } XXElement xe = xeData.XPathElement(".//div[@class='oni_fiche-info-1']"); data.Address = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='street-address']/text()")); data.PostalCode = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='postal-code']/text()")); data.City = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='locality']/text()")); data.Tel = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='tel']/text()")); s = xe.XPathValues(".//p[@class='vcard']//text()").Select(OnisepInstitution.Trim).Where(t => t.StartsWith("Fax :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault(); if (s != null) { data.Fax = OnisepInstitution.Trim(s.Substring(5)); } s = xe.XPathValue(".//a[@class='email']/@href"); if (s != null && s.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase)) { s = s.Substring(7); } data.Mail = s; data.WebSite = xe.DescendantTextNodes().Where(xt => string.Equals(OnisepInstitution.Trim(xt.Value), "site :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault() //.zXPathValue(".//following-sibling::a/@href"); .zXPathValue(".//following::a/@href"); foreach (XXElement xe2 in xeData.XPathElements(".//div[@class='oni_fiche-info-2']//li")) { string[] values = xe2.DescendantTexts().Take(2).ToArray(); if (values.Length != 2) { continue; } switch (OnisepInstitution.Trim(values[0]).ToLower()) { case "statut de l'établissement :": data.InstitutionStatus = OnisepInstitution.Trim(values[1]); break; case "hébergement :": data.Lodging = OnisepInstitution.Trim(values[1]); break; case "présence d'une ulis": data.Ulis = OnisepInstitution.Trim(values[1]); break; } } data.StudyLevels = xeData.XPathElements(".//div[@class='oni_nav-in']//ul[@class='oni_nav-in-ul']//li").Select(li => li.DescendantTexts().zConcatStrings()).Where(txt => txt != null).ToArray(); data.BacLevel = GetBacLevel(data.StudyLevels); // Address = text in <span class="street-address"> // PostalCode = text in <span class="postal-code"> // City = text in <span class="locality"> // Tel = text in <span class="tel"> // Fax = text start with "Fax :" // Mail = @href start with mailto: in <a class="email"> //bool address = false; //foreach (XNode node in xeData.XPathElement(".//div[@class='oni_fiche-info-1']//p[@class='vcard']").DescendantNodes()) //{ // if (node is XElement) // { // XElement xe = (XElement)node; // if (xe.Name == "span") // { // XAttribute attribute = xe.Attribute("class"); // if (attribute != null && attribute.Value == "street-address") // address = true; // } // else // address = false; // } // if (node is XText) // { // if (address) // { // data.Address = OnisepInstitution.Trim(((XText)node).Value); // address = false; // } // } //} if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }
protected override Unea_DetailCompany2 GetData() { XXElement xeSource = new XXElement(GetXmlDocument().Root); Unea_DetailCompany2 data = new Unea_DetailCompany2(); data.sourceUrl = Url; data.loadFromWebDate = DateTime.Now; // <div class='ctn_content-article'> XXElement xeContent = xeSource.XPathElement(".//div[@class='ctn_content-article']"); //IEnumerator<string> texts = xeContent.DescendantTextList(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "script" && ((XElement)node).Name != "table"), func: __trimFunc2).GetEnumerator(); IEnumerator <string> texts = xeContent.DescendantTexts(node => !(node is XElement) || (((XElement)node).Name != "script" && ((XElement)node).Name != "table") ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).Select(__trimFunc2).GetEnumerator(); // <h1> // <img src="http://unea.griotte.biz/BaseDocumentaire/Docs/Public/4017/LOGOAmpouleC.JPG" style='border-width:2px;border-color:#5593C9;' height='60px' /> // <span>Entreprise Adaptée</span><br /> // ALSACE ENTREPRISE ADAPTEE // </h1> if (texts.MoveNext() && texts.MoveNext()) { data.name = texts.Current; } // <h2>ALSACE ENTREPRISE ADAPTEE est implantée sur les sites de Colmar et Mulhouse avec un effectif de 106 salariés, avec les activités sous-traitance : assemblage de pièces, cintrage de tuyaux, montage complexe, ainsi qu'une activité prestation de service en espaces verts, ménage et transport.</h2> if (texts.MoveNext()) { data.presentation = texts.Current; } Unea_TextType textType = Unea_TextType.unknow; //foreach (XText xtext in xeContent.DescendantTextNodeList(".//table")) foreach (XText xtext in xeContent.XPathElements(".//table").DescendantTextNodes()) { string text = __trimFunc2(xtext.Value); if (text == "") { continue; } if (text.Equals("NOS ACTIVITES", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.activity; } else if (text.Equals("FILIERES METIER UNEA", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.sector; } else if (text.Equals("DOCUMENTS TÉLÉCHARGEABLES", StringComparison.InvariantCultureIgnoreCase)) { foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("following-sibling::ul//a")) { string url = xe2.XPathValue("@href"); //string name = name = xe2.XPathValue(".//text()", __trimFunc2); string name = __trimFunc2(xe2.XPathValue(".//text()")); if (!data.downloadDocuments.ContainsKey(url)) { data.downloadDocuments.Add(url, new Unea_Document() { name = name, url = url }); } else { Trace.CurrentTrace.WriteLine("warning download document already exists \"{0}\" \"{1}\"", name, url); } } // textType = novalues pour ne pas avoir Plaquette_AEA.pdf dans unknowInfos textType = Unea_TextType.novalues; } else if (text.Equals("NOUS CONTACTER", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.novalue; } else if (text.Equals("ADRESSE", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.address; } else if (text.Equals("TELEPHONE", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.phone; } else if (text.Equals("FAX", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.fax; } else if (text.Equals("EMAIL", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.email; } else if (text.Equals("SITE", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.webSite; } else if (text.Equals("QUI SOMMES NOUS", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.novalue; } else if (text.Equals("DIRIGEANT", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.leader; } else if (text.Equals("NOMBRE DE SALARIÉS", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.employeNumber; } else if (text.Equals("CHIFFRE D'AFFAIRE DE L'ANNÉE ÉCOULÉE", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.lastYearRevenue; } else if (text.Equals("NUMÉRO SIRET", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.siret; } else if (text.Equals("CERTIFICATION", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.certification; } else if (text.Equals("PRINCIPAUX CLIENTS", StringComparison.InvariantCultureIgnoreCase)) { textType = Unea_TextType.client; } else { switch (textType) { case Unea_TextType.activity: if (!data.activities.ContainsKey(text)) { data.activities.Add(text, null); } else { Trace.CurrentTrace.WriteLine("warning activity already exists \"{0}\"", text); } break; case Unea_TextType.sector: //data.sectors.Add(text); if (!data.sectors.ContainsKey(text)) { data.sectors.Add(text, null); } else { Trace.CurrentTrace.WriteLine("warning sector already exists \"{0}\"", text); } break; case Unea_TextType.address: if (data.address == null) { data.address = text; } else { data.address += " " + text; } break; case Unea_TextType.phone: data.phone = text; textType = Unea_TextType.unknow; break; case Unea_TextType.fax: data.fax = text; textType = Unea_TextType.unknow; break; case Unea_TextType.email: data.email = text; textType = Unea_TextType.unknow; break; case Unea_TextType.webSite: data.webSite = text; textType = Unea_TextType.unknow; break; case Unea_TextType.leader: data.leader = text; textType = Unea_TextType.unknow; break; case Unea_TextType.employeNumber: int employeNumber; if (int.TryParse(text, out employeNumber)) { data.employeNumber = employeNumber; } else { Trace.CurrentTrace.WriteLine("error unknow employe number \"{0}\"", text); } textType = Unea_TextType.unknow; break; case Unea_TextType.lastYearRevenue: if (text != "€") { data.lastYearRevenue = text; } textType = Unea_TextType.unknow; break; case Unea_TextType.siret: data.siret = text; textType = Unea_TextType.unknow; break; case Unea_TextType.certification: data.certification = text; textType = Unea_TextType.unknow; break; case Unea_TextType.client: data.clients = text; textType = Unea_TextType.unknow; break; case Unea_TextType.novalues: break; default: data.unknowInfos.Add(text); break; } } } foreach (XXElement xe in xeContent.XPathElements(".//table//td/a/img")) { string url = xe.XPathValue("@src"); if (!data.photos.ContainsKey(url)) { data.photos.Add(url, null); } else { Trace.CurrentTrace.WriteLine("warning photo already exists \"{0}\"", url); } } return(data); }