public static void ClassInit(XElement xe) { __useUrlCache = xe.zXPathValue("UseUrlCache").zTryParseAs(false); __cacheDirectory = xe.zXPathValue("CacheDirectory"); __useXml = xe.zXPathValue("UseXml").zTryParseAs(__useXml); __xmlNodeName = xe.zXPathValue("XmlNodeName"); __useMongo = xe.zXPathValue("UseMongo").zTryParseAs(__useMongo); __mongoServer = xe.zXPathValue("MongoServer", __mongoServer); __mongoDatabase = xe.zXPathValue("MongoDatabase"); __mongoCollectionName = xe.zXPathValue("MongoCollection"); __mongoDocumentItemName = xe.zXPathValue("MongoDocumentItemName"); IDocumentStore_v1 <TelechargementPlus_PostDetail> documentStore = null; if (__useMongo) { //documentStore = new MongoDocumentStoreInSpecificItem<TelechargementPlus_PostDetail>(__mongoServer, __mongoDatabase, __mongoCollectionName, __mongoDocumentItemName); documentStore = new MongoDocumentStore_v1 <TelechargementPlus_PostDetail>(__mongoServer, __mongoDatabase, __mongoCollectionName, __mongoDocumentItemName); TelechargementPlus.InitMongoClassMap(); } _load = new pb.Web.v1.LoadWebData_v2 <TelechargementPlus_PostDetail>(new pb.Web.v1.LoadDataFromWeb_v2 <TelechargementPlus_PostDetail>(LoadPostDetailFromWeb, GetUrlCache()), documentStore); //_load.SetXmlParameters(__useXml, __xmlNodeName); //_load.SetMongoParameters(__useMongo, __mongoServer, __mongoDatabase, __mongoCollectionName); }
public void SetTextValues(IEnumerable <string> texts) { string name = null; string text = null; foreach (string s in texts) { // PDF | 116 pages | 53 Mb | French //Trace.CurrentTrace.WriteLine("SetTextValues : \"{0}\"", s); if (s == "\r\n") { if (text != null) { if (name != null) { infos.SetValue(name, new ZString(text)); } else { description.Add(text); } text = null; } name = null; } else { //string s2 = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(infos, s)); string s2 = TelechargementPlus.TrimFunc1(TelechargementPlus.ExtractTextValues(infos, s)); if (infos.ContainsKey("language")) { language = (string)infos["language"]; infos.Remove("language"); } else if (infos.ContainsKey("size")) { size = (string)infos["size"]; infos.Remove("size"); } else if (infos.ContainsKey("page_nb")) { nbPages = int.Parse((string)infos["page_nb"]); infos.Remove("page_nb"); } //Trace.WriteLine("text \"{0}\" => \"{1}\"", s, s2); bool foundName = false; if (s2.EndsWith(":")) { string s3 = s2.Substring(0, s2.Length - 1).Trim(); if (s3 != "") { name = s3; foundName = true; } } //else if (s2 != "" && s2 != title) if (!foundName && s2 != "" && s2 != title) { if (text == null) { text = s2; } else { text += " " + s2; } } } } if (text != null) { if (name != null) { infos.SetValue(name, new ZString(text)); } else { description.Add(text); } } }
protected override TelechargementPlus_PostDetail GetData() { XXElement xeSource = new XXElement(GetXmlDocument().Root); TelechargementPlus_PostDetail data = new TelechargementPlus_PostDetail(); data.sourceUrl = Url; data.loadFromWebDate = DateTime.Now; XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']"); XXElement xe = xePost.XPathElement(".//div[@class='heading']//div[@class='binner']"); //data.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(data.infos, xe.XPathValue(".//text()"))); // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1) data.title = TelechargementPlus.ExtractTextValues(data.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()"))); data.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//a//text()")); //data.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); data.category = xe.XPathElements(".//div[@class='storeinfo']").DescendantTexts().Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //TelechargementPlus_Print print = new TelechargementPlus_Print(); //print.url = Url; //print.loadFromWebDate = DateTime.Now; //data.infos.SetValues(data.infos); //<div class="base"> // <div class="heading"> // <div class="binner"> // <h1> // Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct] Gratuit</h1> // <div class="storeinfo"> // <a href="http://www.telechargement-plus.com/2013/10/14/">Aujourd'hui, 11:59</a> // | Catégorie: // <a href="http://www.telechargement-plus.com/e-book-magazines/">E-Book / Magazines</a>, // <a href="http://www.telechargement-plus.com/e-book-magazines/journaux/">Journaux</a>, // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">Magazines</a> // <!-- | Views: 16--> // </div> // </div> // </div> // <div class="maincont"> // <div class="binner"> // <div class="shortstory"> // <div class="story-text"> // <center> // <span id="post-img"> // <img src="/templates/film-gratuit/images/prez/livre.png" alt="E-Book / Magazines, Journaux, Magazines" /> // </span> // </center> // <span id="post-img"> // <div style="text-align: center;"> // <br /> // <!--dle_image_begin:http://www.hapshack.com/images/TX72Y.jpg|--> // <img src="http://www.hapshack.com/images/TX72Y.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // <br /> // <br /> // <b>Editeur :</b> Presse Fr<br /> // <b>Date de sortie :</b> 2013 // <br /> // <b>H�bergeur : </b>Multi / // <b> // <!--colorstart:#FF0000--> // <span style="color: #FF0000"> // <!--/colorstart--> // [Link Direct]<!--colorend--> // </span><!--/colorend--> // </b> // <br /> // <br /> // <!--dle_image_begin:http://prezup.eu/prez/infossurlebook.png|--> // <img src="http://prezup.eu/prez/infossurlebook.png" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // <br /> // <br /> // <b>Advanced Cr�ation Photoshop HS�rie N�19 - Novembre 2013 [Lien Direct]</b> // <br /> // PDF | French | 186 pages | 100 MB<br /> // <br /> // <b>Le CD | zipper/22 Fichiers &+ | 520 MB</b><br /> // 37 Projets complets<br /> // SAVOIR TOUT FAIRE : Avec Photoshop Volume XIII<br /> // SPECIAL PHOTOMONTAGE & PEINTURE NUMERIQUE<br /> // BONUS : 2 Tutoriels Illustrator<br /> // / / / // <br /> // <br /> // </div> // </span> // <span id="post-img"> // <div id="news-id-86887" style="display: inline;"> // *<br /> // *<br /> // *<br /> // <div style="text-align: center;"> // <b> // <!--sizestart:6--> // <span style="font-size: 24pt;"> // <!--/sizestart--> // <!--colorstart:#FF6600--> // <span style="color: #FF6600"> // <!--/colorstart--> // Cloudzer<!--colorend--> // </span><!--/colorend--><!--sizeend--> // </span><!--/sizeend--> // = // <!--colorstart:#FF0000--> // <span style="color: #FF0000"> // <!--/colorstart--> // [Link Direct]<!--colorend--> // </span><!--/colorend--> // </b> // <br /> // <br /> // <a href="http://clz.to/q83zrwga" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/0THnp.gif|--> // <img src="http://www.hapshack.com/images/0THnp.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <a href="http://ul.to/ukqruco3" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/9MfYk.gif|--> // <img src="http://www.hapshack.com/images/9MfYk.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // <a href="http://hulkfile.eu/gap3aafrlmaj.html" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/Js84x.jpg|--> // <img src="http://www.hapshack.com/images/Js84x.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // <a href="http://turbobit.net/blki3znuvzeg.html" target="_blank"> // <!--dle_image_begin:http://www.hapshack.com/images/QYeW0.gif|--> // <img src="http://www.hapshack.com/images/QYeW0.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" // title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end--> // </a> // <br /> // <br /> // *<br /> // *<br /> // <b>Le CD &+ : </b> // <br /> // http://clz.to/o58urag6<br /> // http://ul.to/rpqjypm4<br /> // http://hulkfile.eu/i2k3bbz835zg.html<br /> // http://turbobit.net/v644k3dd8izl.html<br /> // <br /> // <br /> // Bonne lecture<br /> // ************* // </div> // </div> // </span> //XXElement xe = _xePost.XPathElement(".//div[@class='heading']//div[@class='binner']"); //_post.title = _print.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(_print.infos, xe.XPathValue(".//text()"))); //string postDate = xe.XPathValue(".//a//text()"); ////WriteLine("postDate : \"{0}\"", postDate); //// Aujourd'hui, 17:13 ////if (postDate != null) //// _print.infos.SetValue("postDate", new ZString(postDate)); ////_print.creationDate = FrboardPrint.GetDateTime(date.Trim(_trimAll), time.Trim(_trimAll)); //_post.creationDate = TelechargementPlus.ParseDateTime(postDate); //_print.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(s => TelechargementPlus.TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //print.title = data.title; //print.category = data.category; xe = xePost.XPathElement(".//div[@class='maincont']//div[@class='binner']//div[@class='story-text']"); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, TelechargementPlus.ImagesToSkip, node => node is XElement && ((XElement)node).Name == "a" ? false : true); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), node => node is XElement && ((XElement)node).Name == "a" ? false : true); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), // node => node is XElement && ((XElement)node).Name == "a" ? false : true).ToList(); //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), // node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode).ToList(); data.images = xe.XPathElements(".//span[@id='post-img']") .DescendantNodes(node => XmlDescendant.ImageFilter(node, node2 => node2 is XElement && ((XElement)node2).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode)) .Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, Url)) .Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)) .ToList(); if (_loadImage) { pb.old.Http_v2.LoadImageFromWeb(data.images); } //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true)); data.SetTextValues(xe.XPathElements(".//span[@id='post-img']").DescendantTexts(node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode)); data.downloadLinks.AddRange(xe.XPathValues(".//span[@id='post-img']//a/@href")); ////<h1 class="shd"> //// <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html"> //// [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013 //// </a> ////</h1> //XXElement xe = xePost.XPathElement(".//*[@class='shd']//a"); //_print.url = xe.XPathValue("@href"); //_print.title = TrimString(ExtractTextValues(xe.XPathValue(".//text()"))); ////<div class="shdinf"> //// <div class="shdinf"> //// <span class="rcol">Auteur: //// <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/"> //// bakafa //// </a> //// </span> //// <span class="date"> //// <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b> //// </span> //// <span class="lcol">Catégorie: //// <a href="http://www.telechargement-plus.com/e-book-magazines/"> //// E-Book / Magazines //// </a> » //// <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/"> //// Magazines //// </a> //// </span> //// </div> ////</div> //xe = xePost.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); //_print.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()"); //string postDate = xe.XPathValue(".//span[@class='date']//text()"); //// Aujourd'hui, 17:13 //if (postDate != null) // _print.infos.SetValue("postDate", new ZString(postDate)); //_print.category = xe.DescendantTextList(".//span[@class='lcol']").Select(s => TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); ////.zForEach(s => s.Trim()) ////<span id="post-img"> //// <div id="news-id-86236" style="display: inline;"> //// <div style="text-align: center;"> //// <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|--> //// <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" //// title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end--> //// <br /> //// <b> //// <br /> //// Ici Paris N°3562 - 9 au 15 Octobre 2013<br /> //// French | 52 pages | HQ PDF | 101 MB //// </b> //// <br /> //// <br /> //// Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font //// l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les //// coulisses de la télé) indiscrétions, potins.<br /> //// </div> //// </div> ////</span> //xe = xePost.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); //_print.images = xe.XPathImages(".//img", _imagesToSkip); //if (_loadImage) // Http2.LoadImageFromWeb(_print.images); return(data); }
//public static TelechargementPlus_HeaderPage Load_old(string url, HttpRequestParameters requestParameters = null, bool reload = false, bool loadImage = false) //{ // RequestFromWeb request = new RequestFromWeb(url, requestParameters, reload, loadImage); // return _loadHeaderPage_old.Load(request); //} public static TelechargementPlus_HeaderPage LoadHeaderPageFromWeb(pb.Web.v1.RequestFromWeb_v2 request) { // loadDataFromWeb XXElement xeSource = new XXElement(request.GetXmlDocument().Root); string url = request.Url; TelechargementPlus_HeaderPage data = new TelechargementPlus_HeaderPage(); // post list : // <div class="base shortstory"> // _hxr.ReadSelect("//div[@class='base shortstory']:.:EmptyRow", ".//text()"); // next page : // <div class="navigation"> // <div align="center"> // <span>Prev.</span> // <span>1</span> // <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">2</a> // ... // <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">Next</a> // </div> // </div> // _hxr.ReadSelect("//div[@class='navigation']//a[text()='Next']:.:EmptyRow", "text()", "@href"); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <TelechargementPlus_PostHeader> headers = new List <TelechargementPlus_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { TelechargementPlus_PostHeader header = new TelechargementPlus_PostHeader(); //_postHeader.sourceUrl = _sourceUrl; header.sourceUrl = url; header.loadFromWebDate = DateTime.Now; //<h1 class="shd"> // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html"> // [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013 // </a> //</h1> XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(header.infos, xe.XPathValue(".//text()"))); // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1) header.title = TelechargementPlus.ExtractTextValues(header.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()"))); //<div class="shdinf"> // <div class="shdinf"> // <span class="rcol">Auteur: // <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/"> // bakafa // </a> // </span> // <span class="date"> // <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b> // </span> // <span class="lcol">Catégorie: // <a href="http://www.telechargement-plus.com/e-book-magazines/"> // E-Book / Magazines // </a> » // <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/"> // Magazines // </a> // </span> // </div> //</div> xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']"); header.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()"); //string postDate = xe.XPathValue(".//span[@class='date']//text()"); // Aujourd'hui, 17:13 //if (postDate != null) // _postHeader.infos.SetValue("postDate", new ZString(postDate)); header.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()")); //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); header.category = xe.XPathElements(".//span[@class='lcol']").DescendantTexts().Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/"); //Trace.CurrentTrace.WriteLine("post header category \"{0}\"", _postHeader.category); //.zForEach(s => s.Trim()) //<span id="post-img"> // <div id="news-id-86236" style="display: inline;"> // <div style="text-align: center;"> // <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|--> // <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" // title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end--> // <br /> // <b> // <br /> // Ici Paris N°3562 - 9 au 15 Octobre 2013<br /> // French | 52 pages | HQ PDF | 101 MB // </b> // <br /> // <br /> // Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font // l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les // coulisses de la télé) indiscrétions, potins.<br /> // </div> // </div> //</span> xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]"); //_postHeader.images = xe.XPathImages(".//img", _url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip); //header.images = xe.XPathImages(url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)); //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList(); if (request.LoadImage) { pb.old.Http_v2.LoadImageFromWeb(header.images); } //header.SetTextValues(xe.DescendantTextList()); header.SetTextValues(xe.DescendantTexts()); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }