Пример #1
0
 public static string GetLogin(XXElement xeSource)
 {
     // ebookdz.com_forum_showthread.php_t_109595_01_02.html :
     //   <div id="toplinks" class="toplinks">
     //   <li class="welcomelink">Bienvenue, <a href="member.php?u=49369"><b>la_beuze</b></a></li>
     return xeSource.XPathValue("//div[@id='toplinks']//li[@class='welcomelink']//a//text()");
 }
Пример #2
0
        public IEnumerable<Ebookdz_Forum> LoadMainForum(Predicate<string> filter = null, bool reload = false)
        {
            LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest { Url = __urlForum }, reload: reload));
            if (loadDataFromWeb.LoadResult)
            {
                XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
                //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href");
                // <ol id="forums" class="floatcontainer">
                foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li"))
                {
                    // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer
                    // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985
                    XXElement xe2 = xe.XPathElement(".//a");

                    string name = xe2.XPathValue(".//text()");

                    if (filter != null && !filter(name))
                        continue;

                    string url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href")));
                    //if (url != null)
                    //{
                    //    PBUriBuilder uriBuilder = new PBUriBuilder(url);
                    //    uriBuilder.RemoveQueryValue("s");
                    //    url = uriBuilder.ToString();
                    //}
                    yield return new Ebookdz_Forum { Forum = name, Url = url };
                }
            }
        }
Пример #3
0
        public static Http Login(XXElement xeSource)
        {
            XmlConfig localConfig = new XmlConfig(XmlConfig.CurrentConfig.GetExplicit("LocalConfig"));
            string login = localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Login");
            string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")).zToHex(lowercase: true);

            // <base href="http://www.ebookdz.com/forum/" />
            string urlBase = xeSource.XPathValue("//head//base/@href");
            //string urlBase = xeSource.XPathValue("//body//base/@href");
            //Trace.WriteLine("urlBase : \"{0}\"", urlBase);
            XXElement xeForm = xeSource.XPathElement("//form[@id='navbar_loginform']");
            if (xeForm.XElement == null)
            {
                //Trace.WriteLine("element not found \"//form[@id='navbar_loginform']\"");
                throw new PBException("element form not found \"//form[@id='navbar_loginform']\"");
            }
            //Trace.WriteLine("form action : \"{0}\"", xeForm.XPathValue("@action"));
            string urlForm = zurl.GetUrl(urlBase, xeForm.XPathValue("@action"));
            string method = xeForm.XPathValue("@method");
            //Trace.WriteLine("urlForm : \"{0}\" method {1}", urlForm, method);
            StringBuilder sb = new StringBuilder();
            bool first = true;
            foreach (XXElement xeInput in xeForm.XPathElements(".//input"))
            {
                string name = xeInput.XPathValue("@name");
                if (name == null)
                    continue;
                string value = null;
                if (name == "vb_login_username")
                    value = login;
                else if (name == "vb_login_password")
                    value = null;
                else if (name == "vb_login_md5password" || name == "vb_login_md5password_utf")
                    value = hashPassword;
                else
                    value = xeInput.XPathValue("@value");
                if (!first)
                    sb.Append("&");
                sb.AppendFormat("{0}={1}", name, value);
                first = false;
            }
            string content = sb.ToString();
            //Trace.WriteLine("content : \"{0}\"", content);

            HttpRequest httpRequest = new HttpRequest { Url = urlForm, Content = content, Method = Http.GetHttpRequestMethod(method) };
            HttpRequestParameters httpRequestParameters = new HttpRequestParameters();
            Http http = HttpManager.CurrentHttpManager.Load(httpRequest, httpRequestParameters);
            //xeSource = new XXElement(http.zGetXmlDocument().Root);
            //if (!IsLoggedIn(xeSource))
            //    throw new PBException("unable login to http://www.ebookdz.com/");
            return http;
        }
Пример #4
0
 public static void InitLoadFromWeb()
 {
     if (__isLoggedIn)
         return;
     Http http = LoadMainPage();
     XXElement xeSource = new XXElement(http.zGetXDocument().Root);
     if (!IsLoggedIn(xeSource))
     {
         http = Login(xeSource);
         SaveCookies(http.RequestParameters.Cookies);
         if (!IsLoggedIn())
             throw new PBException("unable login to http://www.ebookdz.com/");
     }
     __isLoggedIn = true;
 }
Пример #5
0
 private static string _XPathConcatText(XXElement xxelement, string xpath, string separator, bool xplicit, Func<string, string> resultFunc = null, Func<string, string> itemFunc = null)
 {
     string value = null;
     XElement xelement = xxelement.XElement;
     if (xelement != null)
     {
         object o = xelement.XPathEvaluate(xpath);
         if (XPathResultConcatText(o, out value, separator, resultFunc, itemFunc))
             XXElement.WriteLine(2, "get value \"{0}\" from element \"{1}\" (\"{2}\")", xpath, xxelement.XPath, value);
         else if (xplicit)
             XXElement.WriteLine(1, "error value not found \"{0}\" from element \"{1}\"", xpath, xxelement.XPath);
     }
     else if (xplicit)
         XXElement.WriteLine(1, "error value not found \"{0}\" from null element \"{1}\"", xpath, xxelement.XPath);
     return value;
 }
Пример #6
0
        public static void Test_Login_02()
        {
            Http http = Ebookdz.LoadMainPage();
            XXElement xeSource = new XXElement(http.zGetXDocument().Root);
            Trace.WriteLine("Login        : \"{0}\"", Ebookdz.GetLogin(xeSource));
            bool isLoggedIn = Ebookdz.IsLoggedIn(xeSource);
            Trace.WriteLine("Is logged in : \"{0}\"", isLoggedIn);
            if (!isLoggedIn)
            {
                http = Ebookdz.Login(xeSource);
                Ebookdz.SaveCookies(http.RequestParameters.Cookies);

                http = Ebookdz.LoadMainPage();
                xeSource = new XXElement(http.zGetXDocument().Root);
                Trace.WriteLine("Login        : \"{0}\"", Ebookdz.GetLogin(xeSource));
                Trace.WriteLine("Is logged in : \"{0}\"", Ebookdz.IsLoggedIn(xeSource));
            }
        }
Пример #7
0
        //private static Ebookdz_HeaderPage GetData(WebResult webResult)
        private static IEnumDataPages<IHeaderData> GetData(WebResult webResult)
        {
            XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root);
            string url = webResult.WebRequest.HttpRequest.Url;
            Ebookdz_HeaderPage_v2 data = new Ebookdz_HeaderPage_v2();
            data.SourceUrl = url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            //data.Id = _GetPageKey(webResult.WebRequest.HttpRequest);

            // <div class="threadpagenav">
            // <span class="prev_next">
            // <a rel="next" href="forumdisplay.php?f=157&amp;page=2&amp;s=fec27f3bac2b58debbb727ab8725c8a4" title="Page suivante - Résultats de 21 à 40 sur 61">
            data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='threadpagenav']//span[@class='prev_next']//a[@rel='next']/@href"));

            List<Ebookdz_PostHeader> headers = new List<Ebookdz_PostHeader>();

            // <div class="body_bd">
            // <div id="threadlist" class="threadlist">
            // <ol id="threads" class="threads">
            // <li class="threadbit " id="thread_111977">
            //   <h3 class="threadtitle">
            //   <a title="" class="title" href="showthread.php?t=111977&amp;s=fec27f3bac2b58debbb727ab8725c8a4" id="thread_title_111977">La  Provence Marseille du lundi 26 janvier 2015</a>

            foreach (XXElement xeHeader in xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li"))
            {
                Ebookdz_PostHeader header = new Ebookdz_PostHeader();
                header.SourceUrl = url;
                header.LoadFromWebDate = webResult.LoadFromWebDate;

                XXElement xe = xeHeader.XPathElement(".//h3[@class='threadtitle']//a[@class='title']");
                header.Title = xe.XPathValue(".//text()");
                header.UrlDetail = Ebookdz_v1.GetUrl(zurl.GetUrl(url, xe.XPathValue("@href")));

                headers.Add(header);
            }

            data.PostHeaders = headers.ToArray();

            //Trace.WriteLine(data.zToJson());

            return data;
        }
Пример #8
0
        protected override IEnumDataPages<IHeaderData> GetDataPage(WebResult webResult)
        {
            XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root);
            string url = webResult.WebRequest.HttpRequest.Url;
            PostHeaderDataPage_v1 data = new PostHeaderDataPage_v1();
            data.SourceUrl = url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetPageKey(webResult.WebRequest.HttpRequest);

            data.UrlNextPage = null;

            Predicate<string> filter = __forumFilter;
            List<EbookdzForumData> forums = new List<EbookdzForumData>();
            //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href");
            // <ol id="forums" class="floatcontainer">
            foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li"))
            {
                // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer
                // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985
                XXElement xe2 = xe.XPathElement(".//a");

                string name = xe2.XPathValue(".//text()");

                if (filter != null && !filter(name))
                    continue;

                EbookdzForumData forum = new EbookdzForumData();
                forum.SourceUrl = url;
                forum.LoadFromWebDate = webResult.LoadFromWebDate;
                forum.Name = name;

                forum.UrlDetail = GetUrl(zurl.GetUrl(url, xe2.XPathValue("@href")));
                forums.Add(forum);
            }

            data.Headers = forums.ToArray();
            return data;
        }
Пример #9
0
 public static void Test_XXElement_DescendantTextList_01()
 {
     string url = @"c:\pib\dev_data\exe\runsource\download\sites\rapide-ddl\cache\detail\39000\ebooks_magazine_39023-multi-lautomobile-no821-octobre-2014.html";
     pb.old.Http_v2.LoadUrl(url);
     XXElement xe = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root).XPathElement("//div[@class='lcolomn mainside']").XPathElement(".//div[@class='maincont']");
     //string xpath = ".//div";
     //foreach (string s in xe.DescendantTextList())
     foreach (string s in xe.DescendantTexts())
     {
         Trace.WriteLine(s);
     }
     //foreach (string s in from xe2 in xe.XElement.XPathSelectElements(xpath) from s in xe2.zDescendantTextList() select s)
     //{
     //    Trace.WriteLine(s);
     //}
     //foreach (XElement xe2 in xe.XElement.XPathSelectElements(xpath))
     //{
     //    Trace.WriteLine("XElement {0}", xe2.zGetPath());
     //    foreach (string s in xe2.zDescendantTextList())
     //    {
     //        Trace.WriteLine(s);
     //    }
     //}
 }
Пример #10
0
        private static IEnumDataPages<IHeaderData> GetHeaderPageData(WebResult webResult)
        {
            XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root);
            string url = webResult.WebRequest.HttpRequest.Url;
            Vosbooks_HeaderPage_v2 data = new Vosbooks_HeaderPage_v2();
            data.SourceUrl = url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetPageKey(webResult.WebRequest.HttpRequest);

            // <div id="page">
            // <div id="wrapper">
            // <table id="layout">
            // <tr>
            // <td></td>
            // <td>
            // <div id="left-col">
            // <div id="content-padding">
            // <div id="content">
            //   <div style="height:264px;" class="cover_global" data-zt="divbyzt">...</div>
            //   ...
            // </div>

            data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='page-nav']//li[last()]//a[text()='>']/@href"));

            IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//table[@id='layout']//div[@id='content']/div");
            List<Vosbooks_PostHeader_v1> headers = new List<Vosbooks_PostHeader_v1>();
            foreach (XXElement xeHeader in xeHeaders)
            {
                Vosbooks_PostHeader_v1 header = new Vosbooks_PostHeader_v1();
                header.SourceUrl = url;
                header.LoadFromWebDate = webResult.LoadFromWebDate;

                if (xeHeader.XPathValue("@class") == "page-nav")
                    continue;

                // <div style="" data-zt="divbyzt">
                // <div data-zt="divbyzt">
                // <div data-zt="divbyzt">
                // <center>
                // <strong>
                // <a href="http://www.vosbooks.net/74231-journaux/pack-journaux-francais-du-28-janvier-2015.html" title="">
                // Pack Journaux Français Du 28 Janvier 2015
                // <span class="detail_release" data-zt="spanbyzt"></span>
                // </a>
                // </strong>
                // </center>
                // </div>
                // </div>
                // </div>

                XXElement xe = xeHeader.XPathElement(".//div/div/div//a");
                header.Title = xe.XPathValue(".//text()");
                header.UrlDetail = xe.XPathValue("./@href");

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.PostHeaders = headers.ToArray();
            return data;
        }
Пример #11
0
 public static bool Test_IsLoggedIn_01(XXElement xeSource)
 {
     return Test_GetLogin_01(xeSource) != null;
 }
Пример #12
0
        public static IEnumDataPages<PostHeader> GetHeaderPageData(WebResult webResult)
        {
            XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root);
            string url = webResult.WebRequest.HttpRequest.Url;
            PostHeaderDataPage<PostHeader> data = new PostHeaderDataPage<PostHeader>();
            data.SourceUrl = url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetPageKey(webResult.WebRequest.HttpRequest);

            data.UrlNextPage = null;

            // <div id="vba_news4">
            IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']");
            List<PostHeader> headers = new List<PostHeader>();
            foreach (XXElement xeHeader in xeHeaders)
            {
                PostHeader header = new PostHeader();
                header.SourceUrl = url;
                header.LoadFromWebDate = webResult.LoadFromWebDate;

                //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']");
                XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]");
                header.Title = xe.XPathValue(".//text()");
                header.UrlDetail = xe.XPathValue("./@href");

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                //pb.Trace.WriteLine(header.Title);

                headers.Add(header);
            }
            data.Headers = headers.ToArray();
            return data;
        }
Пример #13
0
 public static bool IsLoggedIn(XXElement xeSource)
 {
     return GetLogin(xeSource) != null;
 }
Пример #14
0
        public static void Test_Login_01(string url)
        {
            XmlConfig localConfig = new XmlConfig(XmlConfig.CurrentConfig.GetExplicit("LocalConfig"));
            string login = localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Login");
            string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")).zToHex(lowercase: true);

            string urlSite = "http://www.ebookdz.com/";
            HttpRequestParameters_v1 requestParameters = new HttpRequestParameters_v1();
            pb.old.Http_v2.LoadUrl(urlSite, requestParameters);
            XXElement xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root);
            Trace.WriteLine("Login        : \"{0}\"", Test_GetLogin_01(xeSource));
            Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource));
            // <base href="http://www.ebookdz.com/forum/" />
            string urlBase = xeSource.XPathValue("//head//base/@href");
            //string urlBase = xeSource.XPathValue("//body//base/@href");
            Trace.WriteLine("urlBase : \"{0}\"", urlBase);
            XXElement xeForm = xeSource.XPathElement("//form[@id='navbar_loginform']");
            if (xeForm.XElement == null)
            {
                Trace.WriteLine("element not found \"//form[@id='navbar_loginform']\"");
                return;
            }
            Trace.WriteLine("form action : \"{0}\"", xeForm.XPathValue("@action"));
            string urlForm = zurl.GetUrl(urlBase, xeForm.XPathValue("@action"));
            string method = xeForm.XPathValue("@method");
            Trace.WriteLine("urlForm : \"{0}\" method {1}", urlForm, method);
            StringBuilder sb = new StringBuilder();
            bool first = true;
            foreach(XXElement xeInput in xeForm.XPathElements(".//input"))
            {
                string name = xeInput.XPathValue("@name");
                if (name == null)
                    continue;
                string value = null;
                if (name == "vb_login_username")
                    value = login;
                else if (name == "vb_login_password")
                    value = null;
                else if (name == "vb_login_md5password" || name == "vb_login_md5password_utf")
                    value = hashPassword;
                else
                    value = xeInput.XPathValue("@value");
                if (!first)
                    sb.Append("&");
                sb.AppendFormat("{0}={1}", name, value);
                first = false;
            }
            string content = sb.ToString();
            Trace.WriteLine("content : \"{0}\"", content);

            requestParameters.content = content;
            requestParameters.method = Http.GetHttpRequestMethod(method);
            pb.old.Http_v2.LoadUrl(urlForm, requestParameters);

            //CookieCollection cookies = requestParameters.cookies.GetCookies(new Uri(urlSite));
            //Trace.WriteLine("cookies :");
            //Trace.WriteLine(cookies.zToJson());

            requestParameters.method = HttpRequestMethod.Get;
            requestParameters.content = null;
            pb.old.Http_v2.LoadUrl(url, requestParameters);
            xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root);
            Trace.WriteLine("Login        : \"{0}\"", Test_GetLogin_01(xeSource));
            Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource));

            string cookiesFile = Path.Combine(XmlConfig.CurrentConfig.GetExplicit("Ebookdz/CookiesDir"), "cookies.txt");
            Trace.WriteLine("save cookies to \"{0}\"", cookiesFile);
            //zfile.CreateFileDirectory(cookiesFile);
            //CookieCollection cookies = requestParameters.cookies.GetCookies(new Uri(urlSite));
            //cookies.zSave(cookiesFile);
            zcookies.SaveCookies(requestParameters.cookies, urlSite, cookiesFile);

            //cookies = requestParameters.cookies.GetCookies(new Uri(urlSite));
            //Trace.WriteLine("cookies :");
            //Trace.WriteLine(cookies.zToJson());
        }
Пример #15
0
        public IEnumerable<Ebookdz_Forum> LoadSubForum(string url, string forum, Predicate<string> filter = null, bool reload = false)
        {
            LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest { Url = url }, reload: reload));
            if (loadDataFromWeb.LoadResult)
            {
                XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);

                // <div class="body_bd">
                // <div id="forumbits" class="forumbits">
                // <ol>
                // <li id="forum10" class="forumbit_post new L1">
                //   <div class="forumrow">
                //   <ol id="childforum_for_161" class="childsubforum">
                //     <div class="titleline">
                foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li"))
                {
                    XXElement xe2 = xe.XPathElement(".//div[@class='forumrow']//a");
                    string category = xe2.XPathValue(".//text()");
                    url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href")));
                    yield return new Ebookdz_Forum { Forum = forum, Category = category, Url = url };

                    foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a"))
                    {
                        string name = xe3.XPathValue(".//text()");

                        if (filter != null && !filter(name))
                            continue;
                        url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe3.XPathValue("@href")));
                        yield return new Ebookdz_Forum { Forum = forum, Category = category, Name = name, Url = url };
                    }
                }
            }
        }
Пример #16
0
        public static IKeyData<int> GetForumHeaderPageData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            string url = loadDataFromWeb.WebRequest.HttpRequest.Url;
            Ebookdz_HeaderPage data = new Ebookdz_HeaderPage();
            data.SourceUrl = url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            //data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest);

            // <div id="above_threadlist" class="above_threadlist">
            // <div class="threadpagenav">
            // <span class="prev_next">
            // <a rel="next" href="forumdisplay.php?f=74&amp;page=2&amp;s=4807e931448c05da34dd54fbd0308479" title="Page suivante - Résultats de 21 à 40 sur 66">
            data.UrlNextPage = GetUrl(zurl.GetUrl(url, xeSource.XPathValue("//div[@id='above_threadlist']//span[@class='prev_next']//a[@rel='next']/@href")));

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // <div id="breadcrumb" class="breadcrumb">
            // <ul class="floatcontainer">
            // <li class="navbit">
            // Forum / Journaux / Presse quotidienne / Autres Journaux

            // <div id="threadlist" class="threadlist">
            // <ol id="threads" class="threads">

            IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li");
            List<Ebookdz_PostHeader> headers = new List<Ebookdz_PostHeader>();
            foreach (XXElement xeHeader in xeHeaders)
            {
                Ebookdz_PostHeader header = new Ebookdz_PostHeader();
                header.SourceUrl = url;
                header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;

                // <div class="threadinfo" title="">
                // <div class="inner">
                // <a title="" class="title" href="showthread.php?t=111210&amp;s=4807e931448c05da34dd54fbd0308479" id="thread_title_111210">L'OPINION du mardi  20 janvier 2015</a>

                XXElement xe = xeHeader.XPathElement(".//div[@class='threadinfo']//a[@class='title']");
                header.Title = xe.XPathValue(".//text()");
                header.UrlDetail = GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe.XPathValue("@href")));

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.PostHeaders = headers.ToArray();
            //return (IEnumDataPages_new2<int, IHeaderData_new>)data;
            return (IKeyData<int>)data;
        }
Пример #17
0
 public static void Test_LoadWithCookies_01(string url)
 {
     HttpRequestParameters_v1 requestParameters = new HttpRequestParameters_v1();
     string cookiesFile = XmlConfig.CurrentConfig.GetExplicit("Ebookdz/CookiesFile");
     requestParameters.cookies = zcookies.LoadCookies(cookiesFile);
     Trace.WriteLine("load cookies from \"{0}\"", cookiesFile);
     pb.old.Http_v2.LoadUrl(url, requestParameters);
     XXElement xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root);
     Trace.WriteLine("Login        : \"{0}\"", Test_GetLogin_01(xeSource));
     Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource));
 }
Пример #18
0
        public static TelechargementPlus_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request)
        {
            XXElement xeSource = new XXElement(request.GetXmlDocument().Root);
            TelechargementPlus_PostDetail data = new TelechargementPlus_PostDetail();
            data.sourceUrl = request.Url;
            data.loadFromWebDate = DateTime.Now;

            XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']");
            XXElement xe = xePost.XPathElement(".//div[@class='heading']//div[@class='binner']");
            // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1)
            data.title = TelechargementPlus.ExtractTextValues(data.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()")));
            data.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//a//text()"));
            //data.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");
            data.category = xe.XPathElements(".//div[@class='storeinfo']").DescendantTexts().Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");

            //<div class="base">
            //    <div class="heading">
            //        <div class="binner">
            //            <h1>
            //                Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct] Gratuit</h1>
            //            <div class="storeinfo">
            //                <a href="http://www.telechargement-plus.com/2013/10/14/">Aujourd'hui, 11:59</a>
            //                | Cat&eacute;gorie: 
            //                <a href="http://www.telechargement-plus.com/e-book-magazines/">E-Book / Magazines</a>, 
            //                <a href="http://www.telechargement-plus.com/e-book-magazines/journaux/">Journaux</a>, 
            //                <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">Magazines</a>
            //                <!-- | Views: 16-->
            //            </div>
            //        </div>
            //    </div>
            //    <div class="maincont">
            //        <div class="binner">
            //            <div class="shortstory">
            //                <div class="story-text">
            //                    <center>
            //                        <span id="post-img">
            //                            <img src="/templates/film-gratuit/images/prez/livre.png" alt="E-Book / Magazines, Journaux, Magazines" />
            //                        </span>
            //                    </center>
            //                    <span id="post-img">
            //                        <div style="text-align: center;">
            //                            <br />
            //                            <!--dle_image_begin:http://www.hapshack.com/images/TX72Y.jpg|-->
            //                            <img src="http://www.hapshack.com/images/TX72Y.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                            <br />
            //                            <br />
            //                            <b>Editeur :</b> Presse Fr<br />
            //                            <b>Date de sortie :</b> 2013
            //                            <br />
            //                            <b>H�bergeur : </b>Multi / 
            //                            <b>
            //                                <!--colorstart:#FF0000-->
            //                                <span style="color: #FF0000">
            //                                    <!--/colorstart-->
            //                                    [Link Direct]<!--colorend-->
            //                                </span><!--/colorend-->
            //                            </b>
            //                            <br />
            //                            <br />
            //                            <!--dle_image_begin:http://prezup.eu/prez/infossurlebook.png|-->
            //                            <img src="http://prezup.eu/prez/infossurlebook.png" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                <br />
            //                            <br />
            //                            <b>Advanced Cr�ation Photoshop HS�rie N�19 - Novembre 2013 [Lien Direct]</b>
            //                            <br />
            //                            PDF | French | 186 pages | 100 MB<br />
            //                            <br />
            //                            <b>Le CD | zipper/22 Fichiers &+ | 520 MB</b><br />
            //                            37 Projets complets<br />
            //                            SAVOIR TOUT FAIRE : Avec Photoshop Volume XIII<br />
            //                            SPECIAL PHOTOMONTAGE & PEINTURE NUMERIQUE<br />
            //                            BONUS : 2 Tutoriels Illustrator<br />
            //                            / / /
            //                            <br />
            //                            <br />
            //                        </div>
            //                    </span>
            //                    <span id="post-img">
            //                        <div id="news-id-86887" style="display: inline;">
            //                            *<br />
            //                            *<br />
            //                            *<br />
            //                            <div style="text-align: center;">
            //                                <b>
            //                                    <!--sizestart:6-->
            //                                    <span style="font-size: 24pt;">
            //                                        <!--/sizestart-->
            //                                        <!--colorstart:#FF6600-->
            //                                        <span style="color: #FF6600">
            //                                            <!--/colorstart-->
            //                                            Cloudzer<!--colorend-->
            //                                        </span><!--/colorend--><!--sizeend-->
            //                                    </span><!--/sizeend-->
            //                                    =
            //                                    <!--colorstart:#FF0000-->
            //                                    <span style="color: #FF0000">
            //                                        <!--/colorstart-->
            //                                        [Link Direct]<!--colorend-->
            //                                    </span><!--/colorend-->
            //                                </b>
            //                                <br />
            //                                <br />
            //                                <a href="http://clz.to/q83zrwga" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/0THnp.gif|-->
            //                                    <img src="http://www.hapshack.com/images/0THnp.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <a href="http://ul.to/ukqruco3" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/9MfYk.gif|-->
            //                                    <img src="http://www.hapshack.com/images/9MfYk.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <br />
            //                                <a href="http://hulkfile.eu/gap3aafrlmaj.html" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/Js84x.jpg|-->
            //                                    <img src="http://www.hapshack.com/images/Js84x.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <br />
            //                                <a href="http://turbobit.net/blki3znuvzeg.html" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/QYeW0.gif|-->
            //                                    <img src="http://www.hapshack.com/images/QYeW0.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <br />
            //                                *<br />
            //                                *<br />
            //                                <b>Le CD &+ : </b>
            //                                <br />
            //                                http://clz.to/o58urag6<br />
            //                                http://ul.to/rpqjypm4<br />
            //                                http://hulkfile.eu/i2k3bbz835zg.html<br />
            //                                http://turbobit.net/v644k3dd8izl.html<br />
            //                                <br />
            //                                <br />
            //                                Bonne lecture<br />
            //                                *************
            //                            </div>
            //                        </div>
            //                    </span>

            xe = xePost.XPathElement(".//div[@class='maincont']//div[@class='binner']//div[@class='story-text']");
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(request.Url, TelechargementPlus.ImagesToSkip, node => node is XElement && ((XElement)node).Name == "a" ? false : true);
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(request.Url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), node => node is XElement && ((XElement)node).Name == "a" ? false : true);
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, request.Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source),
            //    node => node is XElement && ((XElement)node).Name == "a" ? false : true).ToList();
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, request.Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source),
            //    node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode).ToList();
            data.images = xe.XPathElements(".//span[@id='post-img']")
                .DescendantNodes(node => XmlDescendant.ImageFilter(node, node2 => node2 is XElement && ((XElement)node2).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode))
                .Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url))
                .Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source))
                .ToList();

            if (request.LoadImage)
                pb.old.Http_v2.LoadImageFromWeb(data.images);

            //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true));
            data.SetTextValues(xe.XPathElements(".//span[@id='post-img']").DescendantTexts(node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode));
            data.downloadLinks.AddRange(xe.XPathValues(".//span[@id='post-img']//a/@href"));

            ////<h1 class="shd">
            ////    <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html">
            ////        [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013
            ////    </a>
            ////</h1>
            //XXElement xe = xePost.XPathElement(".//*[@class='shd']//a");
            //_print.url = xe.XPathValue("@href");
            //_print.title = TrimString(ExtractTextValues(xe.XPathValue(".//text()")));

            ////<div class="shdinf">
            ////    <div class="shdinf">
            ////      <span class="rcol">Auteur: 
            ////          <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/">
            ////              bakafa
            ////          </a>
            ////      </span> 
            ////      <span class="date">
            ////          <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b>
            ////      </span>
            ////      <span class="lcol">Cat&eacute;gorie: 
            ////          <a href="http://www.telechargement-plus.com/e-book-magazines/">
            ////              E-Book / Magazines
            ////          </a> &raquo; 
            ////          <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">
            ////              Magazines
            ////          </a>
            ////      </span>
            ////    </div>
            ////</div>
            //xe = xePost.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']");
            //_print.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()");
            //string postDate = xe.XPathValue(".//span[@class='date']//text()");
            //// Aujourd'hui, 17:13
            //if (postDate != null)
            //    _print.infos.SetValue("postDate", new ZString(postDate));
            //_print.category = xe.DescendantTextList(".//span[@class='lcol']").Select(s => TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");
            ////.zForEach(s => s.Trim())

            ////<span id="post-img">
            ////    <div id="news-id-86236" style="display: inline;">
            ////        <div style="text-align: center;">
            ////            <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|-->
            ////            <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013"
            ////                title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end-->
            ////            <br />
            ////            <b>
            ////                <br />
            ////                Ici Paris N°3562 - 9 au 15 Octobre 2013<br />
            ////                French | 52 pages | HQ PDF | 101 MB
            ////            </b>
            ////            <br />
            ////            <br />
            ////            Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font
            ////            l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les
            ////            coulisses de la télé) indiscrétions, potins.<br />
            ////        </div>
            ////    </div>
            ////</span>
            //xe = xePost.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]");
            //_print.images = xe.XPathImages(".//img", _imagesToSkip);
            //if (_loadImage)
            //    Http2.LoadImageFromWeb(_print.images);

            return data;
        }
Пример #19
0
        public static RapideDdl_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request)
        {
            XXElement xeSource = new XXElement(request.GetXmlDocument().Root);
            RapideDdl_PostDetail data = new RapideDdl_PostDetail();
            data.sourceUrl = request.Url;
            data.loadFromWebDate = DateTime.Now;

            XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']");

            //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/");
            data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/");

            //data.title = RapideDdl.ExtractTextValues(data.infos, xePost.XPathValue(".//div[@class='base fullstory']//text()", RapideDdl.TrimFunc1));
            //data.title = xePost.DescendantTextList(".//div[@class='spbar']", node => !(node is XElement) || ((XElement)node).Name != "a", RapideDdl.TrimFunc1).FirstOrDefault();
            //data.title = xePost.XPathValue(".//div[@class='spbar']/text()", RapideDdl.TrimFunc1);
            //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault();
            data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault();

            XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']");
            //////////////data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)data.loadFromWebDate);
            data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");

            xe = xePost.XPathElement(".//div[@class='maincont']");
            //data.images = xe.XPathImages(request.Url, nodeFilter: node => node is XElement && ((XElement)node).Name == "a");
            //data.images = xe.XPathImages(request.Url);
            //data.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url)).ToList();
            data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).ToList();

            //if (request.LoadImage)
            // force load image to get image width and height
            pb.old.Http_v2.LoadImageFromWeb(data.images);

            //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true));
            //data.SetTextValues(xe.DescendantTextList(".//div"));
            data.SetTextValues(xe.XPathElements(".//div").DescendantTexts());

            //data.downloadLinks.AddRange(xe.XPathValues(".//div[2]//a/@href"));
            //foreach (XXElement xe2 in xe.XPathElements("div/div").Skip(1))
            foreach (XXElement xe2 in xe.XPathElements("div/div"))
            {
                // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg
                // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif
                // http://pixhst.com/pictures/3029467
                //data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.StartsWith("http://www.zupmage.eu")));
                data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com")
                    && !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif")));
            }

            return data;
        }
Пример #20
0
        public static TelechargementPlus_HeaderPage LoadHeaderFromWeb_GetData(LoadDataFromWeb_v1<TelechargementPlus_HeaderPage> loadDataFromWeb, bool loadImage = false)
        {
            XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root);
            string url = loadDataFromWeb.Url;
            TelechargementPlus_HeaderPage data = new TelechargementPlus_HeaderPage();

            // post list :
            //   <div class="base shortstory">
            //   _hxr.ReadSelect("//div[@class='base shortstory']:.:EmptyRow", ".//text()");
            // next page :
            //   <div class="navigation">
            //     <div align="center">
            //       <span>Prev.</span> 
            //       <span>1</span> 
            //       <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">2</a> 
            //       ...
            //       <a href="http://www.telechargement-plus.com/e-book-magazines/page/2/">Next</a>
            //     </div>
            //   </div>
            //   _hxr.ReadSelect("//div[@class='navigation']//a[text()='Next']:.:EmptyRow", "text()", "@href");
            data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='navigation']//a[text()='Next']/@href"));
            IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']");
            List<TelechargementPlus_PostHeader> headers = new List<TelechargementPlus_PostHeader>();
            foreach (XXElement xeHeader in xeHeaders)
            {
                TelechargementPlus_PostHeader header = new TelechargementPlus_PostHeader();
                //_postHeader.sourceUrl = _sourceUrl;
                header.sourceUrl = url;
                header.loadFromWebDate = DateTime.Now;

                //<h1 class="shd">
                //    <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html">
                //        [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013
                //    </a>
                //</h1>
                XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(header.infos, xe.XPathValue(".//text()")));
                // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1)
                header.title = TelechargementPlus.ExtractTextValues(header.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()")));

                //<div class="shdinf">
                //    <div class="shdinf">
                //      <span class="rcol">Auteur: 
                //          <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/">
                //              bakafa
                //          </a>
                //      </span> 
                //      <span class="date">
                //          <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b>
                //      </span>
                //      <span class="lcol">Cat&eacute;gorie: 
                //          <a href="http://www.telechargement-plus.com/e-book-magazines/">
                //              E-Book / Magazines
                //          </a> &raquo; 
                //          <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">
                //              Magazines
                //          </a>
                //      </span>
                //    </div>
                //</div>
                xe = xeHeader.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']");
                header.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()");
                //string postDate = xe.XPathValue(".//span[@class='date']//text()");
                // Aujourd'hui, 17:13
                //if (postDate != null)
                //    _postHeader.infos.SetValue("postDate", new ZString(postDate));
                header.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"));
                //header.category = xe.DescendantTextList(".//span[@class='lcol']").Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");
                header.category = xe.XPathElements(".//span[@class='lcol']").DescendantTexts().Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");
                //Trace.CurrentTrace.WriteLine("post header category \"{0}\"", _postHeader.category);
                //.zForEach(s => s.Trim())

                //<span id="post-img">
                //    <div id="news-id-86236" style="display: inline;">
                //        <div style="text-align: center;">
                //            <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|-->
                //            <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013"
                //                title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end-->
                //            <br />
                //            <b>
                //                <br />
                //                Ici Paris N°3562 - 9 au 15 Octobre 2013<br />
                //                French | 52 pages | HQ PDF | 101 MB
                //            </b>
                //            <br />
                //            <br />
                //            Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font
                //            l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les
                //            coulisses de la télé) indiscrétions, potins.<br />
                //        </div>
                //    </div>
                //</span>
                xe = xeHeader.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]");
                //_postHeader.images = xe.XPathImages(".//img", _url, TelechargementPlus.ImagesToSkip);
                //header.images = xe.XPathImages(url, TelechargementPlus.ImagesToSkip);
                //header.images = xe.XPathImages(url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source));
                //header.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList();
                header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, url)).Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source)).ToList();
                if (loadImage)
                    pb.old.Http_v2.LoadImageFromWeb(header.images);

                //header.SetTextValues(xe.DescendantTextList());
                header.SetTextValues(xe.DescendantTexts());

                headers.Add(header);
            }
            data.postHeaders = headers.ToArray();
            return data;
        }
Пример #21
0
        private static Ebookdz_PostDetail_v2 GetData(WebResult webResult)
        {
            XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root);
            Ebookdz_PostDetail_v2 data = new Ebookdz_PostDetail_v2();
            data.SourceUrl = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = _GetPostDetailKey(webResult.WebRequest.HttpRequest);

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015
            //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars);
            data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").zNotNullFunc(s => s.Trim(DownloadPrint.TrimChars));
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);
            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015
            string lowerTitle = null;
            if (data.Title != null)
                lowerTitle = data.Title.ToLowerInvariant();
            //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.Trim).zToStringValues("/");
            string category = data.Category.ToLowerInvariant();
            data.PrintType = GetPrintType(category);
            //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            // <div id="postlist" class="postlist restrain">
            XXElement xe = xePost.XPathElement(".//div[@id='postlist']");

            // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09
            //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            XXElement xe2 = xe.XPathElement(".//div[@class='posthead']");
            //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues("");
            date = date.Replace('\xA0', ' ');
            data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm");
            if (data.PostCreationDate == null)
                pb.Trace.WriteLine("unknow post creation date \"{0}\"", date);
            if (__trace)
                pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date);

            //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars);
            data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").zNotNullFunc(s => s.Trim(DownloadPrint.TrimChars));

            // <div class="postbody">
            xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div");

            //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();
            data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();

            // force load image to get image width and height
            if (webResult.WebRequest.LoadImage)
                data.Images = DownloadPrint.LoadImages(data.Images).ToArray();

            // get infos, description, language, size, nbPages
            // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a")
            PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title);
            data.Description = textValues.description;
            //data.Language = textValues.language;
            //data.Size = textValues.size;
            //data.NbPages = textValues.nbPages;
            data.Infos.SetValues(textValues.infos);

            // modif pour avoir les liens de http://www.ebookdz.com/forum/showthread.php?t=113291
            //data.DownloadLinks = xe.XPathValues(".//a/@href");
            data.DownloadLinks = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote").XPathValues(".//a/@href").ToArray();

            if (__trace)
                pb.Trace.WriteLine(data.zToJson());

            return data;
        }