Exemple #1
0
        protected override bool _MoveNext()
        {
            while (_xmlEnum.MoveNext())
            {
                // xe = xeArticle.XPathElement("./header//a");
                // url = xe.XPathValue("@href");
                // title = xe.XPathValue(".//text()");
                // xe = xeArticle.XPathElement(".//div[@class='entry_top']");
                // xe2 = xe.XPathElement(".//img");

                XXElement xeArticle = _xmlEnum.Current;
                _postHeader           = new Pdf4frPostHeader();
                _postHeader.sourceUrl = _sourceUrl;
                XXElement xe = xeArticle.XPathElement("./header//a");
                _postHeader.url   = xe.XPathValue("@href");
                _postHeader.title = xe.XPathValue(".//text()");
                xe = xeArticle.XPathElement(".//div[@class='entry_top']");
                ////_postHeader.image = xe.XPathImage(".//img", _url);
                //foreach (string s in xe.XElement.zDescendantTextList())
                foreach (string s in xe.XElement.zDescendantTexts())
                {
                    _postHeader.SetInfo(s);
                }

                if (_loadImage && _postHeader.image.Source != null)
                {
                    Http_v2.LoadImageFromWeb(_postHeader.image.Source);
                }
                xe = xeArticle.XPathElement(".//footer");
                return(true);
            }
            return(false);
        }
Exemple #2
0
        public bool MoveNext()
        {
            while (_xmlEnum.MoveNext())
            {
                XXElement xeHeader = _xmlEnum.Current;
                _header                 = new Gesat_HeaderCompany();
                _header.sourceUrl       = _url;
                _header.loadFromWebDate = DateTime.Now;

                //<span class="NOM"><a title="ESAT BETTY LAUNAY-MOULIN VERT" href="/Gesat/Hauts-de-Seine,92/Bois-Colombes,35494/esat-betty-launay-moulin-vert-competences-et-handicap-92,e1837/">ESAT BETTY LAUNAY-MOULIN VERT</a></span>
                //_header.companyName = xeHeader.ExplicitXPathValue(".//span[@class='NOM']//a//text()");
                XXElement xe = xeHeader.XPathElement(".//span[@class='NOM']//a");
                if (xe != null)
                {
                    _header.url = GetUrl(xe.ExplicitXPathValue("@href"));
                    //_header.name = xe.ExplicitXPathValue(".//text()", _trimFunc1);
                    _header.name = _trimFunc1(xe.ExplicitXPathValue(".//text()"));
                }
                //<span class="VILLE">E.S.A.T.<br />Bois-Colombes (92)</span>
                xe = xeHeader.XPathElement(".//span[@class='VILLE']");
                if (xe != null)
                {
                    //IEnumerator<string> texts = xe.DescendantTextList().GetEnumerator();
                    IEnumerator <string> texts = xe.DescendantTexts().GetEnumerator();
                    if (texts.MoveNext())
                    {
                        _header.type = texts.Current.Trim();
                    }
                    else
                    {
                        Trace.CurrentTrace.WriteLine("error companyType not found");
                    }
                    if (texts.MoveNext())
                    {
                        _header.location = texts.Current.Trim();
                    }
                    else
                    {
                        Trace.CurrentTrace.WriteLine("error companyLocation not found");
                    }
                }
                // <span class="TELEPHONE">01 47 86 11 48</span>
                //_header.phone = xeHeader.ExplicitXPathValue(".//span[@class='TELEPHONE']//text()", _trimFunc1);
                _header.phone = _trimFunc1(xeHeader.ExplicitXPathValue(".//span[@class='TELEPHONE']//text()"));
                //<img info_bulle="Signataire de la charte Ethique et Valeurs" border="0" alt="/images/bullesGesat/pictoCharte.png" src="/images/bullesGesat/pictoCharte.png" style=" border: 0;" />
                //<img info_bulle="Lauréat des trophées HandiResponsables 2013" border="0" alt="/images/bullesGesat/LAURIERS-OR-2013.png" src="/images/bullesGesat/LAURIERS-OR-2013.png" style=" border: 0;" />
                //_header.infos = xeHeader.XPathValues(".//img/@info_bulle", _trimFunc1);
                _header.infos = xeHeader.XPathValues(".//img/@info_bulle").Select(_trimFunc1).ToArray();
                //_header.SetInfo(xeHeader.XPathValues(".//img/@info_bulle"));
                return(true);
            }
            return(false);
        }
Exemple #3
0
        public static RapideDdl_PostDetail LoadPostDetailFromWeb(pb.Web.v1.RequestFromWeb_v2 request)
        {
            XXElement            xeSource = new XXElement(request.GetXmlDocument().Root);
            RapideDdl_PostDetail data     = new RapideDdl_PostDetail();

            data.sourceUrl       = request.Url;
            data.loadFromWebDate = DateTime.Now;

            XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']");

            //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/");
            data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).Where(s => s != "Accueil" && s != "").zToStringValues("/");

            //data.title = RapideDdl.ExtractTextValues(data.infos, xePost.XPathValue(".//div[@class='base fullstory']//text()", RapideDdl.TrimFunc1));
            //data.title = xePost.DescendantTextList(".//div[@class='spbar']", node => !(node is XElement) || ((XElement)node).Name != "a", RapideDdl.TrimFunc1).FirstOrDefault();
            //data.title = xePost.XPathValue(".//div[@class='spbar']/text()", RapideDdl.TrimFunc1);
            //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault();
            data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(Download.Print.RapideDdl.RapideDdl.TrimFunc1).LastOrDefault();

            XXElement xe = xePost.XPathElement(".//div[@class='shdinfo']");

            //////////////data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), (DateTime)data.loadFromWebDate);
            data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");

            xe = xePost.XPathElement(".//div[@class='maincont']");
            //data.images = xe.XPathImages(request.Url, nodeFilter: node => node is XElement && ((XElement)node).Name == "a");
            //data.images = xe.XPathImages(request.Url);
            //data.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, request.Url)).ToList();
            data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, request.Url)).ToList();

            //if (request.LoadImage)
            // force load image to get image width and height
            pb.old.Http_v2.LoadImageFromWeb(data.images);

            //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true));
            //data.SetTextValues(xe.DescendantTextList(".//div"));
            data.SetTextValues(xe.XPathElements(".//div").DescendantTexts());

            //data.downloadLinks.AddRange(xe.XPathValues(".//div[2]//a/@href"));
            //foreach (XXElement xe2 in xe.XPathElements("div/div").Skip(1))
            foreach (XXElement xe2 in xe.XPathElements("div/div"))
            {
                // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg
                // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif
                // http://pixhst.com/pictures/3029467
                //data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") && !url.StartsWith("http://www.zupmage.eu")));
                data.downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") &&
                                                                                !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif")));
            }

            return(data);
        }
Exemple #4
0
        protected override bool _MoveNext()
        {
            while (_xmlEnum.MoveNext())
            {
                // xe = xeArticle.XPathElement("./header//a");
                // url = xe.XPathValue("@href");
                // title = xe.XPathValue(".//text()");
                // xe = xeArticle.XPathElement(".//div[@class='entry_top']");
                // xe2 = xe.XPathElement(".//img");

                XXElement xeArticle = _xmlEnum.Current;
                _postHeader           = new Magazine3kPostHeader();
                _postHeader.sourceUrl = _sourceUrl;

                //<h2 class="title">
                //    <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html"
                //        rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris &amp; supp Economie du lundi 07 octobre">
                //        Le Parisien + Journal de Paris &amp; supp Economie du lundi 07 octobre</a>
                //</h2>

                XXElement xe = xeArticle.XPathElement(".//a");
                _postHeader.url   = xe.XPathValue("@href");
                _postHeader.title = xe.XPathValue(".//text()");
                //xe = xeArticle.XPathElement(".//div[@class='entry_top']");
                //_postHeader.image = xe.XPathImage(".//img");
                //foreach (string s in xe.XElement.zDescendantTexts())
                //    _postHeader.SetInfo(s);

                //if (_loadImage && _postHeader.image.Source != null)
                //    Http2.LoadImageFromWeb(_postHeader.image.Source);
                //xe = xeArticle.XPathElement(".//footer");
                return(true);
            }
            return(false);
        }
        // header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData>
        protected override IEnumDataPages <PostHeader> GetHeaderPageData(HttpResult <string> httpResult)
        {
            XXElement xeSource = httpResult.zGetXDocument().zXXElement();
            string    url      = httpResult.Http.HttpRequest.Url;
            PostHeaderHeaderDataPages_v2 data = new PostHeaderHeaderDataPages_v2();

            data.SourceUrl       = url;
            data.LoadFromWebDate = httpResult.Http.RequestTime;
            data.Id = GetPageKey(httpResult.Http.HttpRequest);

            // <div id='dle-content'>
            // <div class="leftpane">
            // <div class="movieposter" title="Watch Movie Pachamama : Cuisine des premières nations">
            // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html">
            // <a href="http://www.telecharger-magazine.com/livres/3833-pachamama-cuisine-des-premires-nations.html" title="Pachamama : Cuisine des premières nations">
            // <img src="http://pxhst.co/avaxhome/cd/2a/00152acd.jpeg" width="110" height="150" alt="télécharger Pachamama : Cuisine des premières nations" title="télécharger Pachamama : Cuisine des premières nations" />
            // </a>
            // </div>
            // </div>
            // ...
            // <div class="navigation" align="center">
            // <div class="clear"></div>
            // <span>&#8592; Previous</span> <span>1</span>
            // <a href="http://www.telecharger-magazine.com/page/2/">2</a>
            // ...
            // <a href="http://www.telecharger-magazine.com/page/2/">Next &#8594;</a>
            // <div class="clear"></div>
            // </div>

            XXElement xe = xeSource.XPathElement("//div[@id='dle-content']");

            data.UrlNextPage = zurl.GetUrl(url, xe.XPathValue(".//a[starts-with(text(), 'Next')]/@href"));

            IEnumerable <XXElement> xeHeaders = xe.XPathElements(".//div[@class='leftpane']");
            List <PostHeader>       headers   = new List <PostHeader>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                PostHeader header = new PostHeader();
                header.SourceUrl       = url;
                header.LoadFromWebDate = httpResult.Http.RequestTime;

                if (xeHeader.XPathValue("@class") == "page-nav")
                {
                    continue;
                }

                XXElement xe2 = xeHeader.XPathElement(".//a/a");
                header.Title     = xe2.AttribValue("title");
                header.UrlDetail = xe2.AttribValue("href");

                headers.Add(header);
            }
            data.Data = headers.ToArray();

            return(data);
        }
Exemple #6
0
        protected override Unea_DetailCompany1 GetData()
        {
            XXElement           xeSource = new XXElement(GetXmlDocument().Root);
            Unea_DetailCompany1 data     = new Unea_DetailCompany1();

            data.sourceUrl       = Url;
            data.loadFromWebDate = DateTime.Now;

            // <div class="fiche">
            XXElement xeFiche = xeSource.XPathElement(".//div[@class='fiche']");

            //// <div class="fiche_bloc">
            //IEnumerator<XXElement> xeFicheBlocs = xeFiche.XPathElements(".//div[@class='fiche_bloc']").GetEnumerator();

            //// fiche_bloc no 1
            //if (!xeFicheBlocs.MoveNext())
            //{
            //    Trace.CurrentTrace.WriteLine("error fiche_bloc no 1 not found \"<div class='fiche_bloc'>\"");
            //    return data;
            //}
            //XXElement xe = xeFicheBlocs.Current;
            //GetDataFicheBlocNo1(data, xe);

            //// fiche_bloc no 2
            //if (!xeFicheBlocs.MoveNext())
            //{
            //    Trace.CurrentTrace.WriteLine("error fiche_bloc no 2 not found \"<div class='fiche_bloc'>\"");
            //    return data;
            //}
            //xe = xeFicheBlocs.Current;
            //GetDataFicheBlocNo2(data, xe);

            //// fiche_bloc no 3
            //if (!xeFicheBlocs.MoveNext())
            //{
            //    Trace.CurrentTrace.WriteLine("error fiche_bloc no 3 not found \"<div class='fiche_bloc'>\"");
            //    return data;
            //}
            //xe = xeFicheBlocs.Current;
            //GetDataFicheBlocNo3(data, xe);

            GetNewDataFicheBloc(data, xeFiche);

            return(data);
        }
Exemple #7
0
        protected void _GetDetailData(XXElement xeSource, BlogDemoorDetailData data)
        {
            // <div id="content">

            XXElement xe = xeSource.XPathElement("//div[@id='content']//div[@class='item_div']");

            data.Title = xe.XPathValue(".//h2//text()");
            string date = xe.XPathValue(".//div[@class='dateheader']/text()");
            Date   d;

            if (Date.TryParseExact(date, "d MMMM yyyy", __cultureInfo, DateTimeStyles.None, out d))
            {
                data.Date = d;
            }
            else
            {
                Trace.WriteLine($"date not found \"{date}\"");
            }

            //<div class="articlebody" itemprop="articleBody">
            XXElement xeBody = xe.XPathElement(".//div[@class='articlebody']");

            if (xeBody.XElement != null)
            {
                data.Content = xeBody.XElement.ToString();
            }

            //data.Images = xeBody.XPathValues(".//a/@href").Where(url => new Uri(url).Host.EndsWith(".canalblog.com")).Select(url => new WebImage(zurl.GetUrl(data.SourceUrl, url))).ToArray();
            data.Images = GetImageUrls(xeBody.XPathElements(".//img")).Select(url => new WebImage(zurl.GetUrl(data.SourceUrl, url))).ToArray();

            // force load image to get image width and height
            //if (webResult.WebRequest.LoadImage)
            //    data.LoadImages();

            //if (__trace)
            //    pb.Trace.WriteLine(data.zToJson());
        }
Exemple #8
0
        public static void Test_DebridLink_01()
        {
            //HttpRun.Load("https://api.debrid-link.fr/rest/token/1R6858wC6lO15X8i/new");
            string urlBase = "https://api.debrid-link.fr/rest/";
            //string login = RunSource.CurrentRunSource.Config.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/Login");
            string login    = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/Login");
            string password = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/Password");
            //string publickey = "1R6858wC6lO15X8i";
            string publickey = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/PublicKey");
            //string sessidTime = "all";
            string sessidTime = XmlConfig.CurrentConfig.GetConfig("LocalConfig").GetExplicit("DownloadAutomateManager/DebridLink/SessidTime");
            string url        = urlBase + string.Format("token/{0}/new", publickey);
            HttpRequestParameters requestParameters = new HttpRequestParameters {
                Encoding = Encoding.UTF8
            };
            Http http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = url
            }, requestParameters);
            DateTime dt = DateTime.Now;

            http.ResultText.zTraceJson();
            BsonDocument doc           = BsonSerializer.Deserialize <BsonDocument>(http.ResultText);
            string       token         = doc.zGet("value.token").zAsString();
            string       validTokenUrl = doc.zGet("value.validTokenUrl").zAsString();
            string       key           = doc.zGet("value.key").zAsString();
            int          ts            = doc.zGet("ts").zAsInt();

            Trace.WriteLine("request time   : \"{0:dd/MM/yyyy HH:mm:ss}\"", dt);
            Trace.WriteLine("result         : \"{0}\"", doc.zGet("result").zAsString());
            Trace.WriteLine("token          : \"{0}\"", token);
            Trace.WriteLine("validTokenUrl  : \"{0}\"", validTokenUrl);
            Trace.WriteLine("key            : \"{0}\"", key);
            Trace.WriteLine("ts             : \"{0}\"", ts);
            Trace.WriteLine("ts             : \"{0:dd/MM/yyyy HH:mm:ss}\"", zdate.UnixTimeStampToDateTime(ts));
            Trace.WriteLine("ts             : \"{0}\"", zdate.UnixTimeStampToDateTime(ts) - dt);

            // validTokenUrl : "https://secure.debrid-link.fr/user/2_2d481d8991e4db60f43d24d9d387b75699db7a0157182967/login"
            http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = validTokenUrl
            }, requestParameters);

            // <script>if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; }</script>
            // <form action='' method='POST' class='form-horizontal'>
            // <input type='text' class='form-control' name='user'>
            // <input type='password' class='form-control' name='password'>
            // <input type='hidden' value='10_a3a206c4398f195283a4843d44f017f3211275e443747173' name='token'>
            // <input type='submit' style='display:none'>
            // <button type='submit' name='authorizedToken' value='1' class='btn btn-dl'>Envoyer</button>

            XXElement xeSource = http.zGetXDocument().zXXElement();

            // script : if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; }
            string script = xeSource.XPathValue("//head//script//text()");

            if (script == null)
            {
                Trace.WriteLine("//head//script not found");
                return;
            }
            Trace.WriteLine("script : \"{0}\"", script);
            Regex rg    = new Regex("top\\.location\\.href=[\"'](.*)[\"']", RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
            Match match = rg.Match(script);

            if (!match.Success)
            {
                Trace.WriteLine("top.location.href='...' not found in script");
                return;
            }
            url = match.Groups[1].Value;
            Trace.WriteLine("login url : \"{0}\"", url);

            XXElement xeForm = xeSource.XPathElement("//form");
            string    action = xeForm.AttribValue("action");

            Trace.WriteLine("form action : \"{0}\"", action);
            if (action != null && action != "")
            {
                url = action;
            }
            HttpRequestMethod method = Http.GetHttpRequestMethod(xeForm.AttribValue("method"));

            Trace.WriteLine("form method : {0}", method);

            //XmlConfig localConfig = new XmlConfig(RunSource.CurrentRunSource.Config.GetExplicit("LocalConfig"));
            //string login = localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Login");
            //string password = localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Password");
            StringBuilder content = new StringBuilder();
            bool          first = true;
            string        name, value;

            //foreach (XXElement xe in xeForm.XPathElements(".//input"))
            foreach (XXElement xe in xeForm.DescendantFormItems())
            {
                name = xe.AttribValue("name");
                if (name == null)
                {
                    continue;
                }
                if (name == "user")
                {
                    value = login;
                }
                else if (name == "password")
                {
                    value = password;
                }
                else if (name == "sessidTime")
                {
                    value = sessidTime;
                }
                else
                {
                    value = xe.AttribValue("value");
                }
                if (!first)
                {
                    content.Append('&');
                }
                content.AppendFormat("{0}={1}", name, value);
                Trace.WriteLine("{0}={1}", name, value);
                first = false;
            }
            //XXElement xeButton = xeForm.XPathElement(".//button");
            //name = xeButton.AttribValue("name");
            //value = xeButton.AttribValue("value");
            //if (name != null && value != null)
            //{
            //    content.AppendFormat("&{0}={1}", name, value);
            //    Trace.WriteLine("{0}={1}", name, value);
            //}

            // "user=la_beuze&password=xxxxxx&sessidTime=all&token=10_56b51ee12ad5dabcac620230cda436cab94bd37154742765&authorizedToken=1"
            //  user=la_beuze&password=pbeuz0&sessidTime=all&token=10_3205776c76bb0479b1d57e9bf834b38ae2c5d10669848384&authorizedToken=1
            Trace.WriteLine("content : \"{0}\"", content.ToString());
            http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = url, Method = method, Content = content.ToString()
            }, requestParameters);

            // <div class='panel-body'>
            // <div class='alert alert-success'>
            // La session a bien été activée. Vous pouvez utiliser l'application API Test
            // </div>
            // </div>
            xeSource = http.zGetXDocument().zXXElement();
            string loginMessage = xeSource.XPathValue("//div[@class='panel-body']//text()").Trim();

            Trace.WriteLine("login message : \"{0}\"", loginMessage);
        }
Exemple #9
0
        private void _Login(string url)
        {
            // https://debrid-link.fr/user/2_21c744ba958f13fac08ee5c8855f72ab9a3b3e3224789126/login
            Http.Http http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = url
            }, _requestParameters);
            XXElement xeSource = HttpManager.CurrentHttpManager.GetXDocument(http).zXXElement();

            string    loginUrl = __loginUrl;
            XXElement xeForm   = xeSource.XPathElement("//form");

            string action = xeForm.AttribValue("action");

            if (action != null && action != "")
            {
                loginUrl = action;
            }

            string            method     = xeForm.AttribValue("method");
            HttpRequestMethod httpMethod = HttpRequestMethod.Get;

            if (method != null && method != "")
            {
                httpMethod = HttpTools.GetHttpRequestMethod(method);
            }

            StringBuilder content = new StringBuilder();
            bool          first = true;
            string        name, value;

            foreach (XXElement xe in xeForm.DescendantFormItems())
            {
                name = xe.AttribValue("name");
                if (name == null)
                {
                    continue;
                }
                if (name == "user")
                {
                    value = _login;
                }
                else if (name == "password")
                {
                    value = _password;
                }
                //else if (name == "sessidTime")
                //    value = GetConnexionLifetime(_connexionLifetime);
                else
                {
                    value = xe.AttribValue("value");
                }
                if (!first)
                {
                    content.Append('&');
                }
                content.AppendFormat("{0}={1}", name, value);
                if (__trace)
                {
                    if (name != "password")
                    {
                        pb.Trace.WriteLine("  {0}={1}", name, value);
                    }
                    else
                    {
                        pb.Trace.WriteLine("  {0}=xxx", name);
                    }
                }
                first = false;
            }

            if (__trace)
            {
                pb.Trace.WriteLine("  form login url              : \"{0}\"", loginUrl);
                pb.Trace.WriteLine("  form action                 : \"{0}\"", action);
                pb.Trace.WriteLine("  form method                 : {0}", httpMethod);
                //pb.Trace.WriteLine("  form values                 : {0}", content.ToString());
            }

            http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = loginUrl, Method = httpMethod, Content = content.ToString()
            }, _requestParameters);

            xeSource = http.zGetXDocument().zXXElement();
            //<div class="alert alert-success">
            XXElement xeLogin = xeSource.XPathElement("//div[@class='alert alert-success']");

            if (xeLogin.XElement == null)
            {
                throw new PBException("can't login to debrid-link.fr");
            }
        }
Exemple #10
0
        //protected override GoldenDdl_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb)
        protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb)
        {
            XXElement            xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root);
            GoldenDdl_PostDetail data     = new GoldenDdl_PostDetail();

            data.sourceUrl       = loadDataFromWeb.request.Url;
            data.loadFromWebDate = loadDataFromWeb.loadFromWebDate;
            data.id = GetPostDetailKey(data.sourceUrl);

            XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']");

            //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            data.category = xePost.XPathElements(".//div[@class='hdiin']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/");
            string category = data.category.ToLowerInvariant();

            data.printType = GetPrintType(category);
            //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            //data.title = xePost.XPathValue(".//div[@class='bheading']//text()", DownloadPrint.Trim);
            data.title = xePost.XPathValue(".//div[@class='bheading']//text()").Trim(DownloadPrint.TrimChars);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title);

            if (titleInfos.foundInfo)
            {
                data.originalTitle = data.title;
                data.title         = titleInfos.title;
                data.infos.SetValues(titleInfos.infos);
            }

            string date = xePost.XPathValue(".//div[@class='datenews']//text()");

            data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy");
            if (data.creationDate == null)
            {
                pb.Trace.WriteLine("unknow date time \"{0}\"", date);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date);
            }

            data.postAuthor = xePost.XPathValue(".//div[@class='argr']//a//text()");

            XXElement xe = xePost.XPathElement(".//div[@class='maincont']");

            //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray();
            data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray();


            // force load image to get image width and height
            if (loadDataFromWeb.request.LoadImage)
            {
                data.images = DownloadPrint.LoadImages(data.images).ToArray();
            }

            // get infos, description, language, size, nbPages
            //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.title);
            PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title);

            data.description = textValues.description;
            data.language    = textValues.language;
            data.size        = textValues.size;
            data.nbPages     = textValues.nbPages;
            data.infos.SetValues(textValues.infos);

            List <string> downloadLinks = new List <string>();

            foreach (XXElement xe2 in xe.XPathElements("div/div"))
            {
                // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg
                // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif
                // http://pixhst.com/pictures/3029467
                downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") &&
                                                                           !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif")));
            }
            data.downloadLinks = downloadLinks.ToArray();

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #11
0
        protected override IPost GetData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement          xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            Ebookdz_PostDetail data     = new Ebookdz_PostDetail();

            data.SourceUrl       = loadDataFromWeb.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            data.Id = GetPostDetailKey(loadDataFromWeb.WebRequest.HttpRequest);

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015
            //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()", DownloadPrint.Trim);
            data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015
            string lowerTitle = null;

            if (data.Title != null)
            {
                lowerTitle = data.Title.ToLowerInvariant();
            }
            //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return(text != "forum" && !text.EndsWith(lowerTitle)); }).Select(DownloadPrint.Trim).zToStringValues("/");
            string category = data.Category.ToLowerInvariant();

            data.PrintType = GetPrintType(category);
            //Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            // <div id="postlist" class="postlist restrain">
            XXElement xe = xePost.XPathElement(".//div[@id='postlist']");

            // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09
            //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            XXElement xe2 = xe.XPathElement(".//div[@class='posthead']");
            //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues("");

            date = date.Replace('\xA0', ' ');
            data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm");
            if (data.PostCreationDate == null)
            {
                pb.Trace.WriteLine("unknow post creation date \"{0}\"", date);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date);
            }

            //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.Trim);
            data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars);

            // <div class="postbody">
            xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div");

            //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();
            data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();

            // force load image to get image width and height
            if (loadDataFromWeb.WebRequest.LoadImage)
            {
                data.Images = DownloadPrint.LoadImages(data.Images).ToArray();
            }

            // get infos, description, language, size, nbPages
            // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a")
            PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title);

            data.Description = textValues.description;
            data.Language    = textValues.language;
            data.Size        = textValues.size;
            data.NbPages     = textValues.nbPages;
            data.Infos.SetValues(textValues.infos);

            data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray();

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #12
0
        public void Connexion()
        {
            if (_connexionFile == null)
            {
                throw new PBException("DebriderDebridLink connexion file is null");
            }
            //if (_serverTimeFile == null)
            //    throw new PBException("DebriderDebridLink server time file is null");

            pb.Trace.WriteLine("{0:dd-MM-yyyy HH:mm:ss} - new connexion to debrid-link.fr", DateTime.Now);

            if (__trace)
            {
                pb.Trace.WriteLine("DebriderDebridLink.Connexion() :");
            }

            string url = __url + string.Format("/token/{0}/new", _publicKey);

            HttpRequestParameters requestParameters = new HttpRequestParameters {
                Encoding = Encoding.UTF8
            };
            DateTime dt = DateTime.Now;

            Http.Http http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = url
            }, requestParameters);
            BsonDocument result = BsonSerializer.Deserialize <BsonDocument>(http.ResultText);

            if (__trace)
            {
                pb.Trace.WriteLine("  result                      :");
                pb.Trace.WriteLine(result.zToJson());
            }
            DebridLinkConnexion connexion = new DebridLinkConnexion();

            //DebridLinkServerTime serverTime = new DebridLinkServerTime();
            connexion.ConnexionTime = dt;
            //string token = doc.zGet("value.token").zAsString();
            connexion.Token = result.zGet("value.token").zAsString();
            string validTokenUrl = result.zGet("value.validTokenUrl").zAsString();

            //string key = doc.zGet("value.key").zAsString();
            connexion.Key = result.zGet("value.key").zAsString();
            int ts = result.zGet("ts").zAsInt();

            connexion.ClientTime        = dt;
            connexion.ServerTime        = zdate.UnixTimeStampToDateTime(ts);
            connexion.ServerTimeGap     = connexion.ServerTime - dt;
            connexion.ConnexionLifetime = _connexionLifetime;
            connexion.EndConnexionTime  = connexion.ConnexionTime + GetConnexionTimespan(connexion.ConnexionLifetime) - TimeSpan.FromMinutes(5);
            if (__trace)
            {
                pb.Trace.WriteLine("  request time                : \"{0:dd/MM/yyyy HH:mm:ss}\"", dt);
                pb.Trace.WriteLine("  result                      : \"{0}\"", result.zGet("result").zAsString());
                pb.Trace.WriteLine("  token                       : \"{0}\"", connexion.Token);
                pb.Trace.WriteLine("  validTokenUrl               : \"{0}\"", validTokenUrl);
                pb.Trace.WriteLine("  key                         : \"{0}\"", connexion.Key);
                pb.Trace.WriteLine("  server time                 : {0} - {1:dd/MM/yyyy HH:mm:ss}", ts, connexion.ServerTime);
                pb.Trace.WriteLine("  server time gap             : {0}", connexion.ServerTimeGap);
            }

            // validTokenUrl : "https://secure.debrid-link.fr/user/2_2d481d8991e4db60f43d24d9d387b75699db7a0157182967/login"
            http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = validTokenUrl
            }, requestParameters);

            // <script>if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; }</script>
            // <form action='' method='POST' class='form-horizontal'>
            // <input type='text' class='form-control' name='user'>
            // <input type='password' class='form-control' name='password'>
            // <select name='sessidTime' class='form-control'>
            // <option value='all'  selected='selected'> Toujours</option>
            // ...
            // </select>
            // <input type='hidden' value='10_a3a206c4398f195283a4843d44f017f3211275e443747173' name='token'>
            // <button type='submit' name='authorizedToken' value='1' class='btn btn-dl'>Envoyer</button>
            // <input type='submit' style='display:none'>

            XXElement xeSource = http.zGetXDocument().zXXElement();

            // le script n'est plus là dans la page html 24/03/2015
            // script : if (window!=window.top) { top.location.href='https://secure.debrid-link.fr/login'; }
            //string script = xeSource.XPathValue("//head//script//text()");
            //if (script == null)
            //{
            //    //Trace.WriteLine("//head//script not found");
            //    //return;
            //    throw new PBException("DebriderDebridLink.Connect() : //head//script not found");
            //}
            //if (__trace)
            //    pb.Trace.WriteLine("  script                      : \"{0}\"", script);
            //Regex rg = new Regex("top\\.location\\.href=[\"'](.*)[\"']", RegexOptions.CultureInvariant | RegexOptions.IgnoreCase);
            //Match match = rg.Match(script);
            //if (!match.Success)
            //{
            //    //Trace.WriteLine("top.location.href='...' not found in script");
            //    //return;
            //    throw new PBException("DebriderDebridLink.Connect() : top.location.href='...' not found in script");
            //}
            //url = match.Groups[1].Value;

            url = "https://secure.debrid-link.fr/login";
            if (__trace)
            {
                pb.Trace.WriteLine("  login url                   : \"{0}\"", url);
            }

            XXElement xeForm = xeSource.XPathElement("//form");
            string    action = xeForm.AttribValue("action");

            if (__trace)
            {
                pb.Trace.WriteLine("  form action                 : \"{0}\"", action);
            }
            if (action != null && action != "")
            {
                url = action;
            }
            HttpRequestMethod method = HttpTools.GetHttpRequestMethod(xeForm.AttribValue("method"));

            if (__trace)
            {
                pb.Trace.WriteLine("  form method                 : {0}", method);
            }

            StringBuilder content = new StringBuilder();
            bool          first = true;
            string        name, value;

            foreach (XXElement xe in xeForm.DescendantFormItems())
            {
                name = xe.AttribValue("name");
                if (name == null)
                {
                    continue;
                }
                if (name == "user")
                {
                    value = _login;
                }
                else if (name == "password")
                {
                    value = _password;
                }
                else if (name == "sessidTime")
                {
                    value = GetConnexionLifetime(_connexionLifetime);
                }
                else
                {
                    value = xe.AttribValue("value");
                }
                if (!first)
                {
                    content.Append('&');
                }
                content.AppendFormat("{0}={1}", name, value);
                if (__trace)
                {
                    if (name != "password")
                    {
                        pb.Trace.WriteLine("  {0}={1}", name, value);
                    }
                    else
                    {
                        pb.Trace.WriteLine("  {0}=xxx", name);
                    }
                }
                first = false;
            }

            // "user=la_beuze&password=xxxxxx&sessidTime=all&token=10_56b51ee12ad5dabcac620230cda436cab94bd37154742765&authorizedToken=1"
            //if (__trace)
            //    pb.Trace.WriteLine("content : \"{0}\"", content.ToString());

            http = HttpManager.CurrentHttpManager.Load(new HttpRequest {
                Url = url, Method = method, Content = content.ToString()
            }, requestParameters);

            // <div class='panel-body'>
            // <div class='alert alert-success'>
            // La session a bien été activée. Vous pouvez utiliser l'application API Test
            // </div>
            // </div>
            xeSource = http.zGetXDocument().zXXElement();
            //string loginMessage = xeSource.ExplicitXPathValue("//div[@class='panel-body']//text()");
            string loginMessage = xeSource.ExplicitXPathValue("//div[@class='alert alert-success']//text()");

            if (__trace)
            {
                pb.Trace.WriteLine("  login message               : \"{0}\"", loginMessage);
            }
            //if (loginMessage == null || !loginMessage.Trim().StartsWith("La session a bien été activée", StringComparison.InvariantCultureIgnoreCase))
            if (loginMessage == null || !loginMessage.Trim().StartsWith("Vous avez été connecté avec succès", StringComparison.InvariantCultureIgnoreCase))
            {
                throw new PBException("DebriderDebridLink.Connect() : wrong login message \"{0}\"", loginMessage);
            }


            connexion.zSave(_connexionFile);
            _connexion = connexion;
            //serverTime.zSave(_serverTimeFile);
            //_serverTime = serverTime;
        }
Exemple #13
0
        protected override bool _MoveNext()
        {
            while (_xmlEnum.MoveNext())
            {
                // xe = xeArticle.XPathElement("./header//a");
                // url = xe.XPathValue("@href");
                // title = xe.XPathValue(".//text()");
                // xe = xeArticle.XPathElement(".//div[@class='entry_top']");
                // xe2 = xe.XPathElement(".//img");

                XXElement xeArticle = _xmlEnum.Current;
                _postHeader           = new ZoneEbooksPostHeader();
                _postHeader.sourceUrl = _sourceUrl;

                //<h2 class="title">
                //    <a href="http://zone-ebooks.com/journaux/le-parisien-journal-de-paris-supp-economie-du-lundi-07-octobre-pdf.html"
                //        rel="bookmark" title="Lien permanent: Le Parisien + Journal de Paris &amp; supp Economie du lundi 07 octobre">
                //        Le Parisien + Journal de Paris &amp; supp Economie du lundi 07 octobre</a>
                //</h2>

                XXElement xe = xeArticle.XPathElement(".//*[@class='title']//a");
                _postHeader.url   = xe.XPathValue("@href");
                _postHeader.title = xe.XPathValue(".//text()");

                //<div class="post-date">
                //    <span class="ext">Il y a 2 heures</span>
                //</div>
                string postDate = xeArticle.XPathValue(".//div[@class='post-date']//text()");
                //WriteLine("post date \"{0}\"", postDate);
                //Il y a 57 secondes
                //Il y a 3 minutes
                //Il y a 1 heure
                //Il y a 1 jour
                //Il y a 2 semaines
                //Il y a 2 mois
                if (postDate != null)
                {
                    _postHeader.infos.Add("postDate", new ZString(postDate));
                }

                //<div class="post-info">
                //    <span class="a">par
                //        <a href="http://zone-ebooks.com/author/admin" title="Articles par admin ">
                //            admin
                //        </a>
                //    </span>
                //    dans
                //    <a href="http://zone-ebooks.com/category/journaux" rel="tag" title="Journaux (158 sujets)">Journaux</a>
                //</div>
                xe = xeArticle.XPathElement(".//div[@class='post-info']");
                _postHeader.postAuthor = xe.XPathValue(".//a//text()");
                _postHeader.category   = xe.XPathValue("./a//text()");

                //<div class="post-content clear-block">
                xe = xeArticle.XPathElement(".//div[starts-with(@class, 'post-content')]");

                //<img title="Le Parisien + Journal de Paris &amp; supp Economie du lundi 07 octobre  PDF"
                //    alt="Le Parisien + Journal de Paris & supp Economie du lundi 07 octobre  PDF"
                //    src="http://i.imgur.com/f7aWDHF.jpg" width="362" height="446" />
                //_postHeader.images = xe.XPathImages(".//img", _url, _imagesToSkip);
                //_postHeader.images = xe.XPathImages(_url, _imagesToSkip);
                //_postHeader.images = xe.XPathImages(_url, imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source));
                //_postHeader.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, _url), imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList();
                //_postHeader.images = xe.XPathImages(xeImg => new ImageHtml(xeImg, _url), imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList();
                _postHeader.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, _url)).Where(imageHtml => !_imagesToSkip.ContainsKey(imageHtml.Source)).ToList();

                if (_loadImage)
                {
                    pb.old.Http_v2.LoadImageFromWeb(_postHeader.images);
                }
                // image "infos sur le livre" http://i.imgur.com/GTPfRoB.png
                // image "description" http://i.imgur.com/Ruuh4CP.png
                //**********************************************************************************************************************************************************************************
                // pb image "infos sur le livre"
                //   zone-ebooks.com_img_info_livre_02_02.html
                //   zone-ebooks.com_img_info_livre_02_02.xml
                //   <div style="text-align: center;">
                // image ok
                //     <img title="Florence Bellot, &quot;Tresses et bracelets bresiliens&quot;" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/RHWAvUQ.jpg" />
                //     <p>
                // image "infos sur le livre"
                //       <img title="Florence Bellot, &quot;Tresses et bracelets bresiliens&quot;" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/GTPfRoB.png" />
                //     </p>
                //   ...
                //     <p>
                // image "description"
                //       <img title="Florence Bellot, &quot;Tresses et bracelets bresiliens&quot;" alt="Florence Bellot, Tresses et bracelets bresiliens PDF" src="http://i.imgur.com/Ruuh4CP.png" />
                //     </p>
                //**********************************************************************************************************************************************************************************


                //xe = xeArticle.XPathElement(".//div[@class='entry_top']");
                //_postHeader.image = xe.XPathImage(".//img");
                //foreach (string s in xe.XElement.zDescendantTexts())
                //    _postHeader.SetInfo(s);

                //if (_loadImage && _postHeader.image.Source != null)
                //    Http2.LoadImageFromWeb(_postHeader.image.Source);
                //xe = xeArticle.XPathElement(".//footer");
                return(true);
            }
            return(false);
        }
Exemple #14
0
        public static void Test_Connexion_01()
        {
            string exportDirectory = @"c:\pib\drive\google\dev_data\exe\runsource\download\sites\debrid-link.fr\model\login\new";

            bool trace = DebridLinkFr_v2.Trace;

            DebridLinkFr_v2.Trace = false;

            Trace.WriteLine("test connexion to debrid-link.fr");
            Trace.WriteLine("  export directory      : \"{0}\"", exportDirectory);

            XmlConfig localConfig = GetLocalConfig();

            string publicKey = localConfig.GetExplicit("DownloadAutomateManager/DebridLink/PublicKey");

            Trace.WriteLine("  publicKey             : \"{0}\"", publicKey);
            Trace.WriteLine();

            string newTokenUrl = string.Format("https://debrid-link.fr/api/token/{0}/new", publicKey);
            string exportFile  = "01_debrid-link.fr_api_new_token.txt";

            Trace.WriteLine("  get new token key     : \"{0}\"", newTokenUrl);
            Trace.WriteLine("  export to file        : \"{0}\"", exportFile);
            DateTime requestTime = DateTime.Now;

            Http.Http http = Http.Http.LoadAsText(new HttpRequest {
                Url = newTokenUrl
            }, exportFile: zPath.Combine(exportDirectory, exportFile));
            BsonDocument result        = BsonDocument.Parse(http.ResultText);
            int          serverTs      = result.zGet("ts").zAsInt();
            DateTime     serverTime    = zdate.UnixTimeStampToDateTime(serverTs);
            TimeSpan     serverTimeGap = serverTime - requestTime;

            Trace.WriteLine("  server time           : request time {0} server timestamp {1} server time {2} gap {3}", requestTime, serverTs, serverTime, serverTimeGap);
            Trace.WriteLine("  result                :");
            Trace.WriteLine(result.zToJson());
            Trace.WriteLine();

            string validTokenUrl = result.zGet("value.validTokenUrl").zAsString();

            exportFile = "02_debrid-link.fr_api_valid_token.html";
            Trace.WriteLine("  load valid token url  : \"{0}\"", validTokenUrl);
            Trace.WriteLine("  export to file        : \"{0}\"", exportFile);
            HttpRequestParameters httpRequestParameters = new HttpRequestParameters {
                Encoding = Encoding.UTF8
            };

            http = Http.Http.LoadAsText(new HttpRequest {
                Url = validTokenUrl
            }, httpRequestParameters, exportFile: zPath.Combine(exportDirectory, exportFile));
            Trace.WriteLine();

            string loginUrl = "https://debrid-link.fr/login";

            exportFile = "03_debrid-link.fr_login.html";
            Trace.WriteLine("  send login info       : \"{0}\"", loginUrl);
            Trace.WriteLine("  export to file        : \"{0}\"", exportFile);
            string content      = string.Format("user={0}&password={1}&understand=true", localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Login"), localConfig.GetExplicit("DownloadAutomateManager/DebridLink/Password"));
            string traceContent = string.Format("user={0}&password={1}&understand=true", "xxxxxx", "xxxxxx");

            Trace.WriteLine("  content               : \"{0}\"", traceContent);
            http = Http.Http.LoadAsText(new HttpRequest {
                Url = loginUrl, Method = HttpRequestMethod.Post, Content = content
            }, httpRequestParameters, exportFile: zPath.Combine(exportDirectory, exportFile));
            Trace.WriteLine();

            XXElement xe = http.zGetXDocument().zXXElement();

            //<div class="alert alert-success">
            xe = xe.XPathElement("//div[@class='alert alert-success']");
            Trace.WriteLine("  verify login          : {0}", xe.XElement != null ? "login ok" : "login error");
            Trace.WriteLine();

            string request    = "/account/infos";
            string urlRequest = "https://debrid-link.fr/api" + request;

            exportFile = "04_debrid-link.fr_account_infos.txt";
            Trace.WriteLine("  get account infos     : \"{0}\"", urlRequest);
            Trace.WriteLine("  export to file        : \"{0}\"", exportFile);
            string   key       = result.zGet("value.key").zAsString();
            DateTime time      = DateTime.Now + serverTimeGap;
            int      timestamp = zdate.DateTimeToUnixTimeStamp(time);
            string   signature = DebridLinkFr_v2.GetSignature(timestamp, request, key);

            Trace.WriteLine("  signature             : timestamp {0} request \"{1}\" key \"{2}\" signature \"{3}\"", timestamp, request, key, signature);
            string token = result.zGet("value.token").zAsString();

            httpRequestParameters = new HttpRequestParameters {
                Encoding = Encoding.UTF8
            };
            httpRequestParameters.Headers["x-dl-token"] = token;
            httpRequestParameters.Headers["x-dl-sign"]  = signature;
            httpRequestParameters.Headers["x-dl-ts"]    = timestamp.ToString();
            Trace.WriteLine("  set header            : \"{0}\" = \"{1}\"", "x-dl-token", token);
            Trace.WriteLine("  set header            : \"{0}\" = \"{1}\"", "x-dl-sign", signature);
            Trace.WriteLine("  set header            : \"{0}\" = \"{1}\"", "x-dl-ts", timestamp);
            DateTime dt = DateTime.Now;

            http = Http.Http.LoadAsText(new HttpRequest {
                Url = urlRequest
            }, httpRequestParameters, exportFile: zPath.Combine(exportDirectory, exportFile));
            result = BsonDocument.Parse(http.ResultText);
            // control server time
            int      newTimestamp     = result.zGet("ts").zAsInt();
            DateTime newServerTime    = zdate.UnixTimeStampToDateTime(newTimestamp);
            TimeSpan newServerTimeGap = newServerTime - dt;

            Trace.WriteLine("  new server time       : {0} gap {1} timestamp {2} timestamp gap {3}", newServerTime, newServerTimeGap, newTimestamp, timestamp - newTimestamp);
            Trace.WriteLine("  result                :");
            Trace.WriteLine(result.zToJson());
            Trace.WriteLine();

            DebridLinkFr_v2.Trace = trace;
        }
        //protected override Telechargementz_PostDetail GetDataFromWeb(LoadDataFromWeb loadDataFromWeb)
        protected override IPost GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb)
        {
            XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root);
            Telechargementz_PostDetail data = new Telechargementz_PostDetail();

            data.SourceUrl       = loadDataFromWeb.request.Url;
            data.LoadFromWebDate = loadDataFromWeb.loadFromWebDate;
            data.Id = GetPostDetailKey(data.SourceUrl);

            XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']");

            data.PostAuthor = xePost.XPathValue(".//div[@class='title-info']//a//text()");

            // , 26.12.14
            string date = xePost.XPathValue(".//div[@class='title-info']//a/following-sibling::text()");

            if (date != null)
            {
                data.PostCreationDate = zdate.ParseDateTimeLikeToday(date.Trim(' ', ','), loadDataFromWeb.loadFromWebDate, "dd.MM.yy");
                if (data.PostCreationDate == null)
                {
                    pb.Trace.WriteLine("unknow date time \"{0}\"", date);
                }
                if (__trace)
                {
                    pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date);
                }
            }
            else
            {
                pb.Trace.WriteLine("creationDate not found \"{0}\"", data.SourceUrl);
            }

            //data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTextList(func: DownloadPrint.TrimFunc1).FirstOrDefault();
            data.Title = xePost.XPathElement(".//div[@class='post-title']").DescendantTexts().Select(DownloadPrint.Trim).FirstOrDefault();
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.infos.SetValues(titleInfos.infos);
            }

            XXElement xe = xePost.XPathElement(".//div[starts-with(@id, 'news-id-')]");

            if (xe.XElement == null)
            {
                pb.Trace.WriteLine("element not found \".//div[starts-with(@id, 'news-id-')]\"");
            }

            //data.Images = new List<UrlImage>();
            //data.Images.Add(xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault());
            //data.Images = new UrlImage[] { xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault() };
            WebImage image = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).FirstOrDefault();

            if (image != null)
            {
                data.Images = new WebImage[] { image }
            }
            ;

            // force load image to get image width and height
            if (loadDataFromWeb.request.LoadImage)
            {
                data.Images = DownloadPrint.LoadImages(data.Images).ToArray();
            }

            data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray();

            //data.category = xePost.DescendantTextList(".//div[@class='hdiin']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            //string category = data.category.ToLowerInvariant();
            //data.printType = GetPrintType(category);
            ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);


            // get infos, description, language, size, nbPages
            // nodeFilter: not <a> and not <span>
            //   nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span")
            // nodeFilter: not <a>
            //PrintTextValues_old textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_old(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title);
            PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title);

            data.description = textValues.description;
            data.language    = textValues.language;
            data.size        = textValues.size;
            data.nbPages     = textValues.nbPages;
            data.infos.SetValues(textValues.infos);

            data.PrintType = PrintType.UnknowEBook;
            if (data.infos.ContainsKey("Bd") || data.infos.ContainsKey("bd") || data.infos.ContainsKey("BD"))
            {
                data.PrintType = PrintType.Comics;
            }
            // Editeur : Presse fr
            else if (data.infos.ContainsKey("editeur") && data.infos["editeur"] is ZString && ((string)data.infos["editeur"]).ToLowerInvariant() == "presse fr")
            {
                data.PrintType = PrintType.Print;
            }
            else if (data.infos.ContainsKey("isbn"))
            {
                data.PrintType = PrintType.Book;
            }


            //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson());
            //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a").Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson());
            //pb.Trace.WriteLine(xe.DescendantNodes(returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson());
            //pb.Trace.WriteLine(xe.DescendantNodes(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "a" && ((XElement)node).Name != "span"), returnNodeFilter: node => node is XText).Select(node => new { type = node.NodeType, name = node is XElement ? ((XElement)node).Name.LocalName : null, value = node is XText ? ((XText)node).Value : null }).zToJson());

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #16
0
        private static Cdefi_Detail GetData(WebResult webResult)
        {
            XXElement    xeSource = webResult.Http.zGetXDocument().zXXElement();
            Cdefi_Detail data     = new Cdefi_Detail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetKey(webResult.WebRequest.HttpRequest);

            XXElement xeData = xeSource.XPathElement("//div[@id='body']//div[@class='wBloc']");

            data.Institution = Cdefi.Trim(xeData.XPathValue(".//div[@class='schoolContentInfo']//div[@class='schoolContentInfo_infos verticalCenterToContainer']//h2/text()"));
            data.Address     = Cdefi.Trim(xeData.XPathValue(".//div[@class='wPage'][1]//span[@class='editable']//text()"));

            foreach (string s in xeData.XPathElement(".//div[@class='wPage'][2]").DescendantTexts().Select(s => Cdefi.Trim(s)))
            {
                //string s2 = Cdefi.Trim(s);
                if (s.StartsWith("Nom du directeur", StringComparison.InvariantCultureIgnoreCase))
                {
                    int i = s.IndexOf(':');
                    if (i != -1)
                    {
                        data.Director = Cdefi.Trim(s.Substring(i + 1));
                    }
                }
                else if (s.StartsWith("Département", StringComparison.InvariantCultureIgnoreCase))
                {
                    int i = s.IndexOf(':');
                    if (i != -1)
                    {
                        data.Department = Cdefi.Trim(s.Substring(i + 1));
                    }
                }
                else if (s.StartsWith("Numéro de téléphone", StringComparison.InvariantCultureIgnoreCase))
                {
                    int i = s.IndexOf(':');
                    if (i != -1)
                    {
                        data.Tel = Cdefi.Trim(s.Substring(i + 1));
                    }
                }
                else if (s.StartsWith("Adresse email de contact", StringComparison.InvariantCultureIgnoreCase))
                {
                    int i = s.IndexOf(':');
                    if (i != -1)
                    {
                        data.Mail = Cdefi.Trim(s.Substring(i + 1));
                    }
                }
                else if (s.StartsWith("Adresse du site internet", StringComparison.InvariantCultureIgnoreCase))
                {
                    int i = s.IndexOf(':');
                    if (i != -1)
                    {
                        data.WebSite = Cdefi.Trim(s.Substring(i + 1));
                    }
                }
                else if (s.StartsWith("Nature de l'établissement", StringComparison.InvariantCultureIgnoreCase))
                {
                    int i = s.IndexOf(':');
                    if (i != -1)
                    {
                        data.InstitutionType = Cdefi.Trim(s.Substring(i + 1));
                    }
                }
                //else
                //    pb.Trace.WriteLine("text : \"{0}\"", s);
            }

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #17
0
        public static Http Login(XXElement xeSource)
        {
            XmlConfig localConfig = new XmlConfig(XmlConfig.CurrentConfig.GetExplicit("LocalConfig"));
            string    login       = localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Login");
            //string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password"));
            string hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")).zToHex(lowercase: true);

            // <base href="http://www.ebookdz.com/forum/" />
            string urlBase = xeSource.XPathValue("//head//base/@href");
            //string urlBase = xeSource.XPathValue("//body//base/@href");
            //Trace.WriteLine("urlBase : \"{0}\"", urlBase);
            XXElement xeForm = xeSource.XPathElement("//form[@id='navbar_loginform']");

            if (xeForm.XElement == null)
            {
                //Trace.WriteLine("element not found \"//form[@id='navbar_loginform']\"");
                throw new PBException("element form not found \"//form[@id='navbar_loginform']\"");
            }
            //Trace.WriteLine("form action : \"{0}\"", xeForm.XPathValue("@action"));
            string urlForm = zurl.GetUrl(urlBase, xeForm.XPathValue("@action"));
            string method  = xeForm.XPathValue("@method");
            //Trace.WriteLine("urlForm : \"{0}\" method {1}", urlForm, method);
            StringBuilder sb    = new StringBuilder();
            bool          first = true;

            foreach (XXElement xeInput in xeForm.XPathElements(".//input"))
            {
                string name = xeInput.XPathValue("@name");
                if (name == null)
                {
                    continue;
                }
                string value = null;
                if (name == "vb_login_username")
                {
                    value = login;
                }
                else if (name == "vb_login_password")
                {
                    value = null;
                }
                else if (name == "vb_login_md5password" || name == "vb_login_md5password_utf")
                {
                    value = hashPassword;
                }
                else
                {
                    value = xeInput.XPathValue("@value");
                }
                if (!first)
                {
                    sb.Append("&");
                }
                sb.AppendFormat("{0}={1}", name, value);
                first = false;
            }
            string content = sb.ToString();
            //Trace.WriteLine("content : \"{0}\"", content);

            HttpRequest httpRequest = new HttpRequest {
                Url = urlForm, Content = content, Method = Http.GetHttpRequestMethod(method)
            };
            HttpRequestParameters httpRequestParameters = new HttpRequestParameters();
            Http http = HttpManager.CurrentHttpManager.Load(httpRequest, httpRequestParameters);

            //xeSource = new XXElement(http.zGetXmlDocument().Root);
            //if (!IsLoggedIn(xeSource))
            //    throw new PBException("unable login to http://www.ebookdz.com/");
            return(http);
        }
Exemple #18
0
        protected void InitXml()
        {
            _data                 = new Gesat_Company();
            _data.url             = _url;
            _data.loadFromWebDate = DateTime.Now;

            if (_header != null)
            {
                _data.name     = _header.name;
                _data.type     = _header.type;
                _data.location = _header.location;
                _data.phone    = _header.phone;
                _data.infos    = _header.infos;
            }

            // <div class="PAGES" id="content">
            XXElement xe = _xeSource.XPathElement(".//div[@id='content']");

            // <h1><span>ESAT BETTY LAUNAY-MOULIN VERT >></span><br />Coordonnées & activités</h1>
            //string s = xe.XPathValue(".//h1//text()", _trimFunc2);
            string s = _trimFunc2(xe.XPathValue(".//h1//text()"));

            //s = s.Trim(' ', '>');
            if (!s.Equals(_data.name, StringComparison.InvariantCultureIgnoreCase))
            {
                _data.headerName = _data.name;
                _data.name       = s;
            }

            // <div class="BLOC B100 ACCROCHE">
            // <div class="CONTENU-BLOC">Cet E.S.A.T. est ouvert depuis 1989 et accueille 55 personnes reconnues travailleurs handicapés.  Il est situé dans la ville de
            // <a href="/Gesat/Hauts-de-Seine,92/Bois-Colombes,35494/" title="Bois-Colombes // Les ESAT et EA de la ville">Bois-Colombes</a> (
            // <a href="/Gesat/Hauts-de-Seine,92/" title="Hauts-de-Seine // Les ESAT et EA du département">Hauts-de-Seine</a>)
            // </div></div>
            _data.descryption = xe.XPathConcatText(".//div[@class='BLOC B100 ACCROCHE']//text()", resultFunc: _trimFunc1);
            _data.descryption = _data.descryption.Replace("\r", "");
            _data.descryption = _data.descryption.Replace("\n", "");
            _data.descryption = _data.descryption.Replace("\t", "");
            //_data.city = xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[1]//text()", _trimFunc1);
            _data.city = _trimFunc1(xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[1]//text()"));
            //_data.department = xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[2]//text()", _trimFunc1);
            _data.department = _trimFunc1(xe.XPathValue(".//div[@class='BLOC B100 ACCROCHE']//a[2]//text()"));

            // <div class="ADRESSE">78, RUE RASPAIL<br />92270  Bois-Colombes</div>
            _data.address = xe.XPathConcatText(".//div[@class='ADRESSE']//text()", " ", itemFunc: _trimFunc1);
            _data.address = _data.address.Replace("\r", "");
            _data.address = _data.address.Replace("\n", "");
            _data.address = _data.address.Replace("\t", "");

            // <div class="TEL">01 47 86 11 48</div>
            //s = xe.XPathValue(".//div[@class='TEL']//text()", _trimFunc1);
            s = _trimFunc1(xe.XPathValue(".//div[@class='TEL']//text()"));
            if (!s.Equals(_data.phone, StringComparison.InvariantCultureIgnoreCase))
            {
                _data.headerPhone = _data.phone;
                _data.phone       = s;
            }

            // <div class="FAX">01 47 82 42 64</div>
            //_data.fax = xe.XPathValue(".//div[@class='FAX']//text()", _trimFunc1);
            _data.fax = _trimFunc1(xe.XPathValue(".//div[@class='FAX']//text()"));

            // <div class="EMAIL">production.launay<img border="0" alt="arobase.png" src="/images/bulles/arobase.png" style=" border: 0;" />lemoulinvert.org</div>
            _data.email = xe.XPathConcatText(".//div[@class='EMAIL']//text()", "@", itemFunc: _trimFunc1);

            // <div class="WWW"><a href="http://www.esat-b-launay.com" target="_blank">www.esat-b-launay.com</a></div>
            //_data.webSite = xe.XPathValue(".//div[@class='WWW']//a/@href", _trimFunc1);
            _data.webSite = _trimFunc1(xe.XPathValue(".//div[@class='WWW']//a/@href"));

            // <div class="BLOC-FICHE BLOC-ACTIVITES">
            // <dl><dt>Conditionnement, travaux &agrave; fa&ccedil;on</dt></dl>
            // <dl><dt>Assemblage, montage</dt></dl>
            // <dl><dt>Mise sous pli, mailing, routage</dt></dl>
            // <dl><dt>Toutes activit&eacute;s en entreprise </dt></dl>
            // <dl><dt>Num&eacute;risation, saisie informatique</dt></dl>
            // <dl><dt>Remplissage, ensachage, flaconnage</dt></dl>
            // <dl><dt>Etiquetage, codage, badges</dt></dl>
            // <dl><dt>Secr&eacute;tariat, travaux administratifs</dt></dl>
            // <dl><dt>Artisanats divers</dt></dl>
            // </div>
            //_data.activities = xe.XPathValues(".//div[@class='BLOC-FICHE BLOC-ACTIVITES']//dl//text()", _trimFunc1);
            _data.activities = xe.XPathValues(".//div[@class='BLOC-FICHE BLOC-ACTIVITES']//dl//text()").Select(_trimFunc1).ToArray();
        }
Exemple #19
0
        // detail get data
        protected override Test_PostDetail GetDetailData(WebResult webResult)
        {
            XXElement       xeSource = webResult.Http.zGetXDocument().zXXElement();
            Test_PostDetail data     = new Test_PostDetail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = _GetDetailKey(webResult.WebRequest.HttpRequest);

            XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']");

            XXElement xe = xePost.XPathElement(".//table[@id='post-head']");

            //string[] dates = xe.DescendantTextList(".//td[@id='head-date']", func: Vosbooks.TrimFunc1).ToArray();
            string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray();
            data.PostCreationDate = GetDate(dates, _lastPostDate);
            if (data.PostCreationDate != null)
            {
                _lastPostDate = new Date(data.PostCreationDate.Value);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues());
            }

            data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            data.Category  = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/");
            data.PrintType = GetPrintType(data.Category);

            xe          = xePost.XPathElement(".//div[@class='entry']");
            data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) };

            // force load image to get image width and height
            //if (webResult.WebRequest.LoadImage)
            //    data.Images = DownloadPrint.LoadImages(data.Images).ToArray();

            // get infos, description, language, size, nbPages
            PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(
                xe.XPathElements(".//p").DescendantTexts(
                    node =>
            {
                if (node is XText)
                {
                    string text = ((XText)node).Value.Trim();
                    //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase))
                    if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(XNodeFilter.Stop);
                    }
                }
                if (node is XElement)
                {
                    XElement xe2 = (XElement)node;
                    if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta")
                    {
                        return(XNodeFilter.Stop);
                    }
                }
                return(XNodeFilter.SelectNode);
            }
                    ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title);

            data.Description = textValues.description;
            data.Infos.SetValues(textValues.infos);

            data.DownloadLinks = xe.DescendantNodes(
                node =>
            {
                if (!(node is XElement))
                {
                    return(XNodeFilter.DontSelectNode);
                }
                XElement xe2 = (XElement)node;
                if (xe2.Name == "a")
                {
                    return(XNodeFilter.SelectNode);
                }
                if (xe2.Name != "p")
                {
                    return(XNodeFilter.DontSelectNode);
                }
                XAttribute xa = xe2.Attribute("class");
                if (xa == null)
                {
                    return(XNodeFilter.DontSelectNode);
                }
                if (xa.Value != "submeta")
                {
                    return(XNodeFilter.DontSelectNode);
                }
                //return XNodeFilter.SkipNode;
                return(XNodeFilter.Stop);
            })
                                 .Select(node => ((XElement)node).Attribute("href").Value).ToArray();

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #20
0
        public static IKeyData <int> GetForumHeaderPageData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement          xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            string             url      = loadDataFromWeb.WebRequest.HttpRequest.Url;
            Ebookdz_HeaderPage data     = new Ebookdz_HeaderPage();

            data.SourceUrl       = url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            //data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest);

            // <div id="above_threadlist" class="above_threadlist">
            // <div class="threadpagenav">
            // <span class="prev_next">
            // <a rel="next" href="forumdisplay.php?f=74&amp;page=2&amp;s=4807e931448c05da34dd54fbd0308479" title="Page suivante - Résultats de 21 à 40 sur 66">
            data.UrlNextPage = GetUrl(zurl.GetUrl(url, xeSource.XPathValue("//div[@id='above_threadlist']//span[@class='prev_next']//a[@rel='next']/@href")));

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // <div id="breadcrumb" class="breadcrumb">
            // <ul class="floatcontainer">
            // <li class="navbit">
            // Forum / Journaux / Presse quotidienne / Autres Journaux

            // <div id="threadlist" class="threadlist">
            // <ol id="threads" class="threads">

            IEnumerable <XXElement>   xeHeaders = xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li");
            List <Ebookdz_PostHeader> headers   = new List <Ebookdz_PostHeader>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                Ebookdz_PostHeader header = new Ebookdz_PostHeader();
                header.SourceUrl       = url;
                header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;

                // <div class="threadinfo" title="">
                // <div class="inner">
                // <a title="" class="title" href="showthread.php?t=111210&amp;s=4807e931448c05da34dd54fbd0308479" id="thread_title_111210">L'OPINION du mardi  20 janvier 2015</a>

                XXElement xe = xeHeader.XPathElement(".//div[@class='threadinfo']//a[@class='title']");
                header.Title     = xe.XPathValue(".//text()");
                header.UrlDetail = GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe.XPathValue("@href")));

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.PostHeaders = headers.ToArray();
            //return (IEnumDataPages_new2<int, IHeaderData_new>)data;
            return((IKeyData <int>)data);
        }
Exemple #21
0
        protected override TelechargementPlus_PostDetail GetData()
        {
            XXElement xeSource = new XXElement(GetXmlDocument().Root);
            TelechargementPlus_PostDetail data = new TelechargementPlus_PostDetail();

            data.sourceUrl       = Url;
            data.loadFromWebDate = DateTime.Now;

            XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']");
            XXElement xe     = xePost.XPathElement(".//div[@class='heading']//div[@class='binner']");

            //data.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(data.infos, xe.XPathValue(".//text()")));
            // xe.XPathValue(".//text()", TelechargementPlus.TrimFunc1)
            data.title        = TelechargementPlus.ExtractTextValues(data.infos, TelechargementPlus.TrimFunc1(xe.XPathValue(".//text()")));
            data.creationDate = TelechargementPlus.ParseDateTime(xe.XPathValue(".//a//text()"));
            //data.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");
            data.category = xe.XPathElements(".//div[@class='storeinfo']").DescendantTexts().Skip(2).Select(TelechargementPlus.TrimFunc1).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");

            //TelechargementPlus_Print print = new TelechargementPlus_Print();
            //print.url = Url;
            //print.loadFromWebDate = DateTime.Now;
            //data.infos.SetValues(data.infos);

            //<div class="base">
            //    <div class="heading">
            //        <div class="binner">
            //            <h1>
            //                Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct] Gratuit</h1>
            //            <div class="storeinfo">
            //                <a href="http://www.telechargement-plus.com/2013/10/14/">Aujourd'hui, 11:59</a>
            //                | Cat&eacute;gorie:
            //                <a href="http://www.telechargement-plus.com/e-book-magazines/">E-Book / Magazines</a>,
            //                <a href="http://www.telechargement-plus.com/e-book-magazines/journaux/">Journaux</a>,
            //                <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">Magazines</a>
            //                <!-- | Views: 16-->
            //            </div>
            //        </div>
            //    </div>
            //    <div class="maincont">
            //        <div class="binner">
            //            <div class="shortstory">
            //                <div class="story-text">
            //                    <center>
            //                        <span id="post-img">
            //                            <img src="/templates/film-gratuit/images/prez/livre.png" alt="E-Book / Magazines, Journaux, Magazines" />
            //                        </span>
            //                    </center>
            //                    <span id="post-img">
            //                        <div style="text-align: center;">
            //                            <br />
            //                            <!--dle_image_begin:http://www.hapshack.com/images/TX72Y.jpg|-->
            //                            <img src="http://www.hapshack.com/images/TX72Y.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                            <br />
            //                            <br />
            //                            <b>Editeur :</b> Presse Fr<br />
            //                            <b>Date de sortie :</b> 2013
            //                            <br />
            //                            <b>H�bergeur : </b>Multi /
            //                            <b>
            //                                <!--colorstart:#FF0000-->
            //                                <span style="color: #FF0000">
            //                                    <!--/colorstart-->
            //                                    [Link Direct]<!--colorend-->
            //                                </span><!--/colorend-->
            //                            </b>
            //                            <br />
            //                            <br />
            //                            <!--dle_image_begin:http://prezup.eu/prez/infossurlebook.png|-->
            //                            <img src="http://prezup.eu/prez/infossurlebook.png" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                <br />
            //                            <br />
            //                            <b>Advanced Cr�ation Photoshop HS�rie N�19 - Novembre 2013 [Lien Direct]</b>
            //                            <br />
            //                            PDF | French | 186 pages | 100 MB<br />
            //                            <br />
            //                            <b>Le CD | zipper/22 Fichiers &+ | 520 MB</b><br />
            //                            37 Projets complets<br />
            //                            SAVOIR TOUT FAIRE : Avec Photoshop Volume XIII<br />
            //                            SPECIAL PHOTOMONTAGE & PEINTURE NUMERIQUE<br />
            //                            BONUS : 2 Tutoriels Illustrator<br />
            //                            / / /
            //                            <br />
            //                            <br />
            //                        </div>
            //                    </span>
            //                    <span id="post-img">
            //                        <div id="news-id-86887" style="display: inline;">
            //                            *<br />
            //                            *<br />
            //                            *<br />
            //                            <div style="text-align: center;">
            //                                <b>
            //                                    <!--sizestart:6-->
            //                                    <span style="font-size: 24pt;">
            //                                        <!--/sizestart-->
            //                                        <!--colorstart:#FF6600-->
            //                                        <span style="color: #FF6600">
            //                                            <!--/colorstart-->
            //                                            Cloudzer<!--colorend-->
            //                                        </span><!--/colorend--><!--sizeend-->
            //                                    </span><!--/sizeend-->
            //                                    =
            //                                    <!--colorstart:#FF0000-->
            //                                    <span style="color: #FF0000">
            //                                        <!--/colorstart-->
            //                                        [Link Direct]<!--colorend-->
            //                                    </span><!--/colorend-->
            //                                </b>
            //                                <br />
            //                                <br />
            //                                <a href="http://clz.to/q83zrwga" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/0THnp.gif|-->
            //                                    <img src="http://www.hapshack.com/images/0THnp.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <a href="http://ul.to/ukqruco3" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/9MfYk.gif|-->
            //                                    <img src="http://www.hapshack.com/images/9MfYk.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <br />
            //                                <a href="http://hulkfile.eu/gap3aafrlmaj.html" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/Js84x.jpg|-->
            //                                    <img src="http://www.hapshack.com/images/Js84x.jpg" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <br />
            //                                <a href="http://turbobit.net/blki3znuvzeg.html" target="_blank">
            //                                    <!--dle_image_begin:http://www.hapshack.com/images/QYeW0.gif|-->
            //                                    <img src="http://www.hapshack.com/images/QYeW0.gif" alt="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]"
            //                                        title="Advanced Cr�ation Photoshop H-S�rie N�19 - Novembre 2013 [Lien Direct]" /><!--dle_image_end-->
            //                                </a>
            //                                <br />
            //                                <br />
            //                                *<br />
            //                                *<br />
            //                                <b>Le CD &+ : </b>
            //                                <br />
            //                                http://clz.to/o58urag6<br />
            //                                http://ul.to/rpqjypm4<br />
            //                                http://hulkfile.eu/i2k3bbz835zg.html<br />
            //                                http://turbobit.net/v644k3dd8izl.html<br />
            //                                <br />
            //                                <br />
            //                                Bonne lecture<br />
            //                                *************
            //                            </div>
            //                        </div>
            //                    </span>

            //XXElement xe = _xePost.XPathElement(".//div[@class='heading']//div[@class='binner']");
            //_post.title = _print.title = TelechargementPlus.TrimString(TelechargementPlus.ExtractTextValues(_print.infos, xe.XPathValue(".//text()")));
            //string postDate = xe.XPathValue(".//a//text()");
            ////WriteLine("postDate : \"{0}\"", postDate);
            //// Aujourd'hui, 17:13
            ////if (postDate != null)
            ////    _print.infos.SetValue("postDate", new ZString(postDate));
            ////_print.creationDate = FrboardPrint.GetDateTime(date.Trim(_trimAll), time.Trim(_trimAll));
            //_post.creationDate = TelechargementPlus.ParseDateTime(postDate);
            //_print.category = xe.DescendantTextList(".//div[@class='storeinfo']").Skip(2).Select(s => TelechargementPlus.TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");

            //print.title = data.title;
            //print.category = data.category;

            xe = xePost.XPathElement(".//div[@class='maincont']//div[@class='binner']//div[@class='story-text']");
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, TelechargementPlus.ImagesToSkip, node => node is XElement && ((XElement)node).Name == "a" ? false : true);
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(Url, imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source), node => node is XElement && ((XElement)node).Name == "a" ? false : true);
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source),
            //    node => node is XElement && ((XElement)node).Name == "a" ? false : true).ToList();
            //data.images = xe.XPathElements(".//span[@id='post-img']").XPathImages(xeImg => new ImageHtml(xeImg, Url), imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source),
            //    node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode).ToList();
            data.images = xe.XPathElements(".//span[@id='post-img']")
                          .DescendantNodes(node => XmlDescendant.ImageFilter(node, node2 => node2 is XElement && ((XElement)node2).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode))
                          .Select(xeImg => new pb.old.ImageHtml((XElement)xeImg, Url))
                          .Where(imageHtml => !TelechargementPlus.ImagesToSkip.ContainsKey(imageHtml.Source))
                          .ToList();
            if (_loadImage)
            {
                pb.old.Http_v2.LoadImageFromWeb(data.images);
            }

            //data.SetTextValues(xe.DescendantTextList(".//span[@id='post-img']", node => node is XElement && ((XElement)node).Name == "a" ? false : true));
            data.SetTextValues(xe.XPathElements(".//span[@id='post-img']").DescendantTexts(node => node is XElement && ((XElement)node).Name == "a" ? XNodeFilter.SkipNode : XNodeFilter.SelectNode));

            data.downloadLinks.AddRange(xe.XPathValues(".//span[@id='post-img']//a/@href"));

            ////<h1 class="shd">
            ////    <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/86236-multi-ici-paris-n3562-9-au-15-octobre-2013.html">
            ////        [Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013
            ////    </a>
            ////</h1>
            //XXElement xe = xePost.XPathElement(".//*[@class='shd']//a");
            //_print.url = xe.XPathValue("@href");
            //_print.title = TrimString(ExtractTextValues(xe.XPathValue(".//text()")));

            ////<div class="shdinf">
            ////    <div class="shdinf">
            ////      <span class="rcol">Auteur:
            ////          <a onclick="ShowProfile('bakafa', 'http://www.telechargement-plus.com/user/bakafa/', '0'); return false;" href="http://www.telechargement-plus.com/user/bakafa/">
            ////              bakafa
            ////          </a>
            ////      </span>
            ////      <span class="date">
            ////          <b><a href="http://www.telechargement-plus.com/2013/10/09/">Aujourd'hui, 17:13</a></b>
            ////      </span>
            ////      <span class="lcol">Cat&eacute;gorie:
            ////          <a href="http://www.telechargement-plus.com/e-book-magazines/">
            ////              E-Book / Magazines
            ////          </a> &raquo;
            ////          <a href="http://www.telechargement-plus.com/e-book-magazines/magazines/">
            ////              Magazines
            ////          </a>
            ////      </span>
            ////    </div>
            ////</div>
            //xe = xePost.XPathElement(".//div[@class='shdinf']/div[@class='shdinf']");
            //_print.postAuthor = xe.XPathValue(".//span[@class='rcol']//a//text()");
            //string postDate = xe.XPathValue(".//span[@class='date']//text()");
            //// Aujourd'hui, 17:13
            //if (postDate != null)
            //    _print.infos.SetValue("postDate", new ZString(postDate));
            //_print.category = xe.DescendantTextList(".//span[@class='lcol']").Select(s => TrimString(s)).Where(s => s != "E-Book / Magazines" && s != "Catégorie:" && s != "").zToStringValues("/");
            ////.zForEach(s => s.Trim())

            ////<span id="post-img">
            ////    <div id="news-id-86236" style="display: inline;">
            ////        <div style="text-align: center;">
            ////            <!--dle_image_begin:http://zupimages.net/up/3/1515486591.jpeg|-->
            ////            <img src="http://zupimages.net/up/3/1515486591.jpeg" alt="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013"
            ////                title="[Multi] Ici Paris N°3562 - 9 au 15 Octobre 2013" /><!--dle_image_end-->
            ////            <br />
            ////            <b>
            ////                <br />
            ////                Ici Paris N°3562 - 9 au 15 Octobre 2013<br />
            ////                French | 52 pages | HQ PDF | 101 MB
            ////            </b>
            ////            <br />
            ////            <br />
            ////            Ici Paris vous fait partager la vie publique et privée de celles et ceux qui font
            ////            l'actualité : exclusivités, interviews, enquêtes (la face cachée du showbiz, les
            ////            coulisses de la télé) indiscrétions, potins.<br />
            ////        </div>
            ////    </div>
            ////</span>
            //xe = xePost.XPathElement(".//span[@id='post-img']//div[starts-with(@id, 'news-id')]");
            //_print.images = xe.XPathImages(".//img", _imagesToSkip);
            //if (_loadImage)
            //    Http2.LoadImageFromWeb(_print.images);

            return(data);
        }
Exemple #22
0
        // detail get data
        protected override MagazinesGratuits_PostDetail GetDetailData(WebResult webResult)
        {
            XXElement xeSource = webResult.Http.zGetXDocument().zXXElement();
            MagazinesGratuits_PostDetail data = new MagazinesGratuits_PostDetail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Key             = _GetDetailKey(webResult.WebRequest.HttpRequest);

            XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']");

            XXElement xe = xePost.XPathElement(".//table[@id='post-head']");

            string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray();
            data.PostCreationDate = GetDate(dates, _lastPostDate);
            if (data.PostCreationDate != null)
            {
                _lastPostDate = new Date(data.PostCreationDate.Value);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues());
            }

            data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            // Ebooks en Epub / Livre
            data.Category  = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/");
            data.PrintType = GetPrintType(data.Category);
            //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            xe          = xePost.XPathElement(".//div[@class='entry']");
            data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) };

            // force load image to get image width and height
            if (webResult.WebRequest.LoadImage)
            {
                data.Images = DownloadPrint.LoadImages(data.Images).ToArray();
            }

            // get infos, description, language, size, nbPages
            // xe.DescendantTextList(".//p")
            PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(
                xe.XPathElements(".//p").DescendantTexts(
                    node =>
            {
                if (node is XText)
                {
                    string text = ((XText)node).Value.Trim();
                    //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase))
                    if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase))
                    {
                        return(XNodeFilter.Stop);
                    }
                }
                if (node is XElement)
                {
                    XElement xe2 = (XElement)node;
                    if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta")
                    {
                        return(XNodeFilter.Stop);
                    }
                }
                return(XNodeFilter.SelectNode);
            }
                    ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title);

            data.Description = textValues.description;
            //data.Language = textValues.language;
            //data.Size = textValues.size;
            //data.NbPages = textValues.nbPages;
            data.Infos.SetValues(textValues.infos);

            //data.DownloadLinks = xe.DescendantNodes(
            //    node =>
            //        {
            //            if (!(node is XElement))
            //                return true;
            //            XElement xe2 = (XElement)node;
            //            if (xe2.Name != "p")
            //                return true;
            //            XAttribute xa = xe2.Attribute("class");
            //            if (xa == null)
            //                return true;
            //            if (xa.Value != "submeta")
            //                return true;
            //            return false;
            //        },
            //    node => node is XElement && ((XElement)node).Name == "a")
            //    .Select(node => ((XElement)node).Attribute("href").Value).ToArray();
            data.DownloadLinks = xe.DescendantNodes(
                node =>
            {
                if (!(node is XElement))
                {
                    return(XNodeFilter.DontSelectNode);
                }
                XElement xe2 = (XElement)node;
                if (xe2.Name == "a")
                {
                    return(XNodeFilter.SelectNode);
                }
                if (xe2.Name != "p")
                {
                    return(XNodeFilter.DontSelectNode);
                }
                XAttribute xa = xe2.Attribute("class");
                if (xa == null)
                {
                    return(XNodeFilter.DontSelectNode);
                }
                if (xa.Value != "submeta")
                {
                    return(XNodeFilter.DontSelectNode);
                }
                //return XNodeFilter.SkipNode;
                return(XNodeFilter.Stop);
            })
                                 .Select(node => ((XElement)node).Attribute("href").Value).ToArray();


            //// <div id="postlist" class="postlist restrain">
            //xe = xePost.XPathElement(".//div[@id='postlist']");

            //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09
            ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']");
            //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            //date = date.Replace('\xA0', ' ');
            //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm");
            //if (data.PostCreationDate == null)
            //    pb.Trace.WriteLine("unknow post creation date \"{0}\"", date);

            //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1);

            //// <div class="postbody">
            //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div");

            //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();


            //// get infos, description, language, size, nbPages
            //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title);
            //data.Description = textValues.description;
            //data.Language = textValues.language;
            //data.Size = textValues.size;
            //data.NbPages = textValues.nbPages;
            //data.Infos.SetValues(textValues.infos);

            //data.DownloadLinks = xe.XPathValues(".//a/@href");

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #23
0
        // detail get data
        protected override TelechargerMagazine_PostDetail GetDetailData(WebResult webResult)
        {
            XXElement xeSource = webResult.Http.zGetXDocument().zXXElement();
            TelechargerMagazine_PostDetail data = new TelechargerMagazine_PostDetail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetDetailKey(webResult.WebRequest.HttpRequest);

            // la date est juste la date du jour
            // <div id="calendar-layer">
            // <table id="calendar" cellpadding="3" class="calendar">
            // ...
            // <tr>
            // ...
            // <td  class="day-active-v day-current" ><a class="day-active-v" href="http://www.telecharger-magazine.com/2015/07/17/" title="Article posté dans 17 Juillet 2015">17</a></td>
            // ...
            // </tr>
            // ...
            // </table>
            // </div>

            // <div id='dle-content'>
            // ...
            // <div class="right-full">
            //
            // <div class="cat_name">
            // Posted in:
            // <a href="http://www.telecharger-magazine.com/journaux/">Journaux</a>
            // </div>
            //
            // <h2 class="title">
            // <img src="/templates/MStarter/images/title.png" alt="" class="img" />
            // Journaux Français Du 17 Juillet 2015
            // </h2>
            //
            // <div class="contenttext">

            // la date est juste la date du jour
            // http://www.telecharger-magazine.com/2015/07/17/
            //xeSource.XPathValue("//div[@id='calendar-layer']//table[@id='calendar']//td[@class='day-active-v day-current']//a/@href");


            XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']//div[@class='right-full']");

            // Journaux
            data.Category  = xePost.XPathValues(".//div[@class='cat_name']//a/text()").Select(DownloadPrint.Trim).zToStringValues("/");
            data.PrintType = GetPrintType(data.Category);
            //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            data.Title = xePost.XPathValue(".//h2[@class='title']//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            XXElement xeContent = xePost.XPathElement(".//div[@class='contenttext']");

            data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xeContent.XPathValue(".//img/@src"))) };

            // force load image to get image width and height
            //if (webResult.WebRequest.LoadImageFromWeb)
            //    data.Images = DownloadPrint.LoadImages(data.Images).ToArray();

            // get infos, description, language, size, nbPages
            PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(
                xeContent.DescendantTexts(
                    node =>
            {
                if (node is XText)
                {
                    string text = ((XText)node).Value.Trim();
                    if (text.ToLowerInvariant() == "description")
                    {
                        return(XNodeFilter.DontSelectNode);
                    }
                }
                if (node is XElement)
                {
                    XElement xe = (XElement)node;
                    if (xe.Name == "a")
                    {
                        return(XNodeFilter.Stop);
                    }
                }
                return(XNodeFilter.SelectNode);
            }
                    ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title, extractValuesFromText: false);

            data.Description = textValues.description;
            data.Infos.SetValues(textValues.infos);

            data.DownloadLinks = xeContent.DescendantNodes(
                node =>
            {
                if (!(node is XElement))
                {
                    return(XNodeFilter.DontSelectNode);
                }
                XElement xe2 = (XElement)node;
                if (xe2.Name == "a")
                {
                    return(XNodeFilter.SelectNode);
                }
                if (xe2.Name != "p")
                {
                    return(XNodeFilter.DontSelectNode);
                }
                XAttribute xa = xe2.Attribute("class");
                if (xa == null)
                {
                    return(XNodeFilter.DontSelectNode);
                }
                if (xa.Value != "submeta")
                {
                    return(XNodeFilter.DontSelectNode);
                }
                //return XNodeFilter.SkipNode;
                return(XNodeFilter.Stop);
            })
                                 .Select(node => ((XElement)node).Attribute("href").Value).ToArray();
            data.DownloadLinks = xeContent.XPathValues(".//a/@href").ToArray();

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #24
0
        private static ExtremeDown_PostDetail_v2 GetData(WebResult webResult)
        {
            XXElement xeSource             = webResult.Http.zGetXDocument().zXXElement();
            ExtremeDown_PostDetail_v2 data = new ExtremeDown_PostDetail_v2();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetPostDetailKey(webResult.WebRequest.HttpRequest);

            XXElement xePost = xeSource.XPathElement("//div[@id='dle-content']");

            //data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()", DownloadPrint.Trim);
            data.Title = xePost.XPathValue(".//h2[@class='blocktitle']//text()").Trim(DownloadPrint.TrimChars);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            XXElement xeDiv = xePost.XPathElement(".//div[@class='blockheader']");

            data.Category = xeDiv.XPathValues(".//i[@class='icon-cats']/ancestor::span//a//text()").Select(DownloadPrint.Trim).zToStringValues("/");
            //string category = data.Category.ToLowerInvariant();
            data.PrintType = GetPrintType(data.Category);

            data.PostAuthor = xeDiv.XPathValue(".//span/i[@class='icon-user']/ancestor::span//a//text()");

            string date = xeDiv.XPathValue(".//span/i[@class='icon-date']/ancestor::span//a//text()");

            data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy");
            if (data.PostCreationDate == null)
            {
                pb.Trace.WriteLine("unknow date time \"{0}\"", date);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.PostCreationDate, date);
            }

            xeDiv = xePost.XPathElement(".//div[@class='blockcontent']");

            List <string> description = new List <string>();

            description.AddRange(xeDiv.XPathValues(".//p[@class='release-name']//text()"));

            //data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray();
            data.Images = xeDiv.XPathElement(".//table//td[@class='image-block']").DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();

            // force load image to get image width and height
            if (webResult.WebRequest.LoadImage)
            {
                data.Images = DownloadPrint.LoadImages(data.Images).ToArray();
            }

            description.AddRange(xeDiv.XPathValues(".//table//td//blockquote//text()"));

            //xeDiv = xePost.XPathElement(".//div[@class='clearfix']");
            xeDiv = xePost.XPathElement(".//div[@class='upload-infos clearfix']");
            description.AddRange(xeDiv.XPathValues(".//table//text()"));

            data.Description = description.ToArray();

            string title = null;
            // xePost.XPathElements(".//script/parent::div//following-sibling::h2")
            Func <XXElement, XNodeFilter> filter =
                xe =>
            {
                if (xe.XElement.Name == "h2")
                {
                    title = xe.XPathValue(".//text()");
                }
                else if (xe.XElement.Name == "script")
                {
                    return(XNodeFilter.Stop);
                }
                else if (xe.XElement.Name == "div")
                {
                    return(XNodeFilter.SelectNode);
                }
                return(XNodeFilter.DontSelectNode);
            };

            foreach (XXElement xe in xePost.XPathElements(".//div[@class='prez_2']//following-sibling::*").zFilterElements(filter))
            {
                //string s = xe.XPathValue(".//text()");
                //// Liens de téléchargement - Pack 1
                //if (s.StartsWith("Liens de téléchargement"))
                //{
                //    s = s.Substring(23).Trim(' ', '-');
                //    if (s == "")
                //        s = title;
                //    else if (title != null)
                //        s = title + " - " + s;
                //    title = null;
                data.DownloadLinks_new.AddItem(title);
                title = null;
                //foreach (XXElement xe2 in xe.XPathElements("following-sibling::div[1]//a"))
                foreach (XXElement xe2 in xe.XPathElements(".//a"))
                {
                    //s = xe2.DescendantTextList().FirstOrDefault();
                    // <strong class="hebergeur">
                    string server = xe2.XPathValue(".//strong[@class='hebergeur']//text()");
                    string link   = xe2.XPathValue("@href");
                    if (__getLinksExtremeProtect && __extremeProtect.IsLinkProtected(link))
                    {
                        data.DownloadLinks_new.AddServer(server, link);
                        data.DownloadLinks_new.AddLinks(__extremeProtect.UnprotectLink(link));
                    }
                    else
                    {
                        data.DownloadLinks_new.AddServer(server);
                        data.DownloadLinks_new.AddLink(link);
                    }
                }
                //}
                //else if (s != null)
                //    title = s;
            }

            //xeDiv = xePost.XPathElement(".//div[@class='blockfooter links']");
            ////data.category = xeDiv.DescendantTextList(".//i[@class='icon-cats']/parent::span//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            //data.Category = xeDiv.XPathElements(".//i[@class='icon-cats']/parent::span//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/");
            //string category = data.Category.ToLowerInvariant();
            //data.PrintType = GetPrintType(category);
            ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #25
0
        public static void Test_Login_01(string url)
        {
            XmlConfig localConfig  = new XmlConfig(XmlConfig.CurrentConfig.GetExplicit("LocalConfig"));
            string    login        = localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Login");
            string    hashPassword = Crypt.ComputeMD5Hash(localConfig.GetExplicit("DownloadAutomateManager/Ebookdz/Password")).zToHex(lowercase: true);

            string urlSite = "http://www.ebookdz.com/";
            HttpRequestParameters_v1 requestParameters = new HttpRequestParameters_v1();

            pb.old.Http_v2.LoadUrl(urlSite, requestParameters);
            XXElement xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root);

            Trace.WriteLine("Login        : \"{0}\"", Test_GetLogin_01(xeSource));
            Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource));
            // <base href="http://www.ebookdz.com/forum/" />
            string urlBase = xeSource.XPathValue("//head//base/@href");

            //string urlBase = xeSource.XPathValue("//body//base/@href");
            Trace.WriteLine("urlBase : \"{0}\"", urlBase);
            XXElement xeForm = xeSource.XPathElement("//form[@id='navbar_loginform']");

            if (xeForm.XElement == null)
            {
                Trace.WriteLine("element not found \"//form[@id='navbar_loginform']\"");
                return;
            }
            Trace.WriteLine("form action : \"{0}\"", xeForm.XPathValue("@action"));
            string urlForm = zurl.GetUrl(urlBase, xeForm.XPathValue("@action"));
            string method  = xeForm.XPathValue("@method");

            Trace.WriteLine("urlForm : \"{0}\" method {1}", urlForm, method);
            StringBuilder sb    = new StringBuilder();
            bool          first = true;

            foreach (XXElement xeInput in xeForm.XPathElements(".//input"))
            {
                string name = xeInput.XPathValue("@name");
                if (name == null)
                {
                    continue;
                }
                string value = null;
                if (name == "vb_login_username")
                {
                    value = login;
                }
                else if (name == "vb_login_password")
                {
                    value = null;
                }
                else if (name == "vb_login_md5password" || name == "vb_login_md5password_utf")
                {
                    value = hashPassword;
                }
                else
                {
                    value = xeInput.XPathValue("@value");
                }
                if (!first)
                {
                    sb.Append("&");
                }
                sb.AppendFormat("{0}={1}", name, value);
                first = false;
            }
            string content = sb.ToString();

            Trace.WriteLine("content : \"{0}\"", content);

            requestParameters.content = content;
            requestParameters.method  = Http.GetHttpRequestMethod(method);
            pb.old.Http_v2.LoadUrl(urlForm, requestParameters);

            //CookieCollection cookies = requestParameters.cookies.GetCookies(new Uri(urlSite));
            //Trace.WriteLine("cookies :");
            //Trace.WriteLine(cookies.zToJson());

            requestParameters.method  = HttpRequestMethod.Get;
            requestParameters.content = null;
            pb.old.Http_v2.LoadUrl(url, requestParameters);
            xeSource = new XXElement(pb.old.Http_v2.HtmlReader.XDocument.Root);
            Trace.WriteLine("Login        : \"{0}\"", Test_GetLogin_01(xeSource));
            Trace.WriteLine("Is logged in : \"{0}\"", Test_IsLoggedIn_01(xeSource));

            string cookiesFile = Path.Combine(XmlConfig.CurrentConfig.GetExplicit("Ebookdz/CookiesDir"), "cookies.txt");

            Trace.WriteLine("save cookies to \"{0}\"", cookiesFile);
            //zfile.CreateFileDirectory(cookiesFile);
            //CookieCollection cookies = requestParameters.cookies.GetCookies(new Uri(urlSite));
            //cookies.zSave(cookiesFile);
            zcookies.SaveCookies(requestParameters.cookies, urlSite, cookiesFile);

            //cookies = requestParameters.cookies.GetCookies(new Uri(urlSite));
            //Trace.WriteLine("cookies :");
            //Trace.WriteLine(cookies.zToJson());
        }
Exemple #26
0
        protected void _GetDetailData(XXElement xeSource, Vosbooks_PostDetail_v6 data)
        {
            // <div id="page">
            // <div id="wrapper">
            // <table id="layout">
            // <tr>...</tr>
            // <tr>
            // <td class="sidebars">...</td>
            // <td>
            // <div id="left-col">
            // <div id="content-padding">
            // <div id="content">
            // ...
            // <div class="post" id="post-74299" style="margin-top: 0;">
            //
            // <table id="post-head">
            // <tr>
            // <td id="head-date">
            // <div class="date"><span>jan</span> 29</div>
            // </td>
            // <td>
            // <div class="title">
            // <h2><a href="http://www.vosbooks.net/74299-livre/les-imposteurs-francois-cavanna.html" rel="bookmark" title="Les imposteurs – François Cavanna" >Les imposteurs – François Cavanna </a></h2>
            // <div class="postdata">
            // <span class="category">
            // <a href="http://www.vosbooks.net/category/livre/ebooks-epub" rel="category tag">Ebooks en Epub</a>, 
            // <a href="http://www.vosbooks.net/category/livre" rel="category tag">Livre</a>
            // </span>
            // </div>
            // </div>
            // </td>
            // </tr>
            // </table>
            //
            // <div class="entry">
            // ...
            // <p style="text-align: center;"> 
            // <img class="alignnone"  src="http://imageshack.com/a/img538/3859/6JXSxu.jpg" alt="Les imposteurs – François Cavanna" title="Les imposteurs – François Cavanna" height="540" width="420" />
            // </p>

            // </tr>


            XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']");

            XXElement xe = xePost.XPathElement(".//table[@id='post-head']");
            string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray();
            data.PostCreationDate = GetDate(dates, _lastPostDate);
            if (data.PostCreationDate != null)
                _lastPostDate = new Date(data.PostCreationDate.Value);
            //if (__trace)
            //    pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues());

            data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);
            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            // Ebooks en Epub / Livre
            data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/");
            data.PrintType = GetPrintType(data.Category);
            //pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            xe = xePost.XPathElement(".//div[@class='entry']");
            //data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) };
            //string urlImage = xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src");
            string urlImage = xe.XPathValue("h3/following-sibling::p/img/@src");
            if (urlImage != null)
                data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, urlImage)) };

            // force load image to get image width and height
            //if (webResult.WebRequest.LoadImageFromWeb)
            //    data.Images = DownloadPrint.LoadImages(data.Images).ToArray();

            // get infos, description, language, size, nbPages
            // xe.DescendantTextList(".//p")
            PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(
                xe.XPathElements(".//p").DescendantTexts(
                node =>
                {
                    if (node is XText)
                    {
                        string text = ((XText)node).Value.Trim();
                        if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase))
                            return XNodeFilter.Stop;
                    }
                    if (node is XElement)
                    {
                        XElement xe2 = (XElement)node;
                        if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta")
                            return XNodeFilter.Stop;
                    }
                    return XNodeFilter.SelectNode;
                }
                ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title);
            data.Description = textValues.description;
            //data.Language = textValues.language;
            //data.Size = textValues.size;
            //data.NbPages = textValues.nbPages;
            data.Infos.SetValues(textValues.infos);

            //data.DownloadLinks = xe.DescendantNodes(
            //    node => 
            //        {
            //            if (!(node is XElement))
            //                return true;
            //            XElement xe2 = (XElement)node;
            //            if (xe2.Name != "p")
            //                return true;
            //            XAttribute xa = xe2.Attribute("class");
            //            if (xa == null)
            //                return true;
            //            if (xa.Value != "submeta")
            //                return true;
            //            return false;
            //        },
            //    node => node is XElement && ((XElement)node).Name == "a")
            //    .Select(node => ((XElement)node).Attribute("href").Value).ToArray();
            data.DownloadLinks = xe.DescendantNodes(
                node =>
                {
                    if (!(node is XElement))
                        return XNodeFilter.DontSelectNode;
                    XElement xe2 = (XElement)node;
                    if (xe2.Name == "a")
                        return XNodeFilter.SelectNode;
                    if (xe2.Name != "p")
                        return XNodeFilter.DontSelectNode;
                    XAttribute xa = xe2.Attribute("class");
                    if (xa == null)
                        return XNodeFilter.DontSelectNode;
                    if (xa.Value != "submeta")
                        return XNodeFilter.DontSelectNode;
                    //return XNodeFilter.SkipNode;
                    return XNodeFilter.Stop;
                })
                .Select(node => ((XElement)node).Attribute("href").Value).Where(zurl.CheckUrl).ToArray();


            //// <div id="postlist" class="postlist restrain">
            //xe = xePost.XPathElement(".//div[@id='postlist']");

            //// Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09
            ////string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            //XXElement xe2 = xe.XPathElement(".//div[@class='posthead']");
            //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            //date = date.Replace('\xA0', ' ');
            //data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, webResult.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm");
            //if (data.PostCreationDate == null)
            //    pb.Trace.WriteLine("unknow post creation date \"{0}\"", date);

            //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.TrimFunc1);

            //// <div class="postbody">
            //xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div");

            //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();


            //// get infos, description, language, size, nbPages
            //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a"), data.Title);
            //data.Description = textValues.description;
            //data.Language = textValues.language;
            //data.Size = textValues.size;
            //data.NbPages = textValues.nbPages;
            //data.Infos.SetValues(textValues.infos);

            //data.DownloadLinks = xe.XPathValues(".//a/@href");

            //if (__trace)
            //    pb.Trace.WriteLine(data.zToJson());
        }
Exemple #27
0
        protected override RapideDdl_PostDetail GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb)
        {
            XXElement            xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root);
            RapideDdl_PostDetail data     = new RapideDdl_PostDetail();

            data.sourceUrl       = loadDataFromWeb.request.Url;
            data.loadFromWebDate = loadDataFromWeb.loadFromWebDate;
            data.id = GetPostDetailKey(data.sourceUrl);

            XXElement xePost = xeSource.XPathElement("//div[@class='lcolomn mainside']");

            //data.category = xePost.DescendantTextList(".//div[@class='spbar']//a").Select(DownloadPrint.TrimFunc1).Where(
            data.category = xePost.XPathElements(".//div[@class='spbar']//a").DescendantTexts().Select(DownloadPrint.Trim).Where(
                s =>
            {
                s = s.ToLowerInvariant();
                return(s != "" && !s.Contains("acceuil") && !s.Contains("accueil"));
            }
                ).zToStringValues("/");
            string category = data.category.ToLowerInvariant();

            data.printType = GetPostType(category);

            //data.title = xePost.DescendantTextList(".//div[@class='spbar']", func: DownloadPrint.TrimFunc1).LastOrDefault();
            data.title = xePost.XPathElements(".//div[@class='spbar']").DescendantTexts().Select(DownloadPrint.Trim).LastOrDefault();
            //ExtractTitleInfos(data);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.title);

            if (titleInfos.foundInfo)
            {
                data.originalTitle = data.title;
                data.title         = titleInfos.title;
                data.infos.SetValues(titleInfos.infos);
            }

            XXElement xe   = xePost.XPathElement(".//div[@class='shdinfo']");
            string    date = xe.XPathValue(".//span[@class='date']//text()");

            //data.creationDate = Download.Print.RapideDdl.RapideDdl.ParseDateTime(date, loadDataFromWeb.loadFromWebDate);
            data.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy");
            if (data.creationDate == null)
            {
                pb.Trace.WriteLine("unknow date time \"{0}\"", date);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("creationDate {0} - \"{1}\"", data.creationDate, date);
            }
            data.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");

            xe = xePost.XPathElement(".//div[@class='maincont']");
            //data.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray();
            data.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(loadDataFromWeb.request.Url, xeImg.zAttribValue("src")))).ToArray();

            if (loadDataFromWeb.request.LoadImage)
            {
                data.images = DownloadPrint.LoadImages(data.images).ToArray();
            }

            //RapideDdl.SetTextValues(data, xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a" ));
            // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a")
            PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.title);

            data.description = textValues.description;
            data.language    = textValues.language;
            data.size        = textValues.size;
            data.nbPages     = textValues.nbPages;
            data.infos.SetValues(textValues.infos);

            List <string> downloadLinks = new List <string>();

            foreach (XXElement xe2 in xe.XPathElements("div/div"))
            {
                // http://prezup.eu http://pixhst.com/avaxhome/27/36/002e3627.jpeg http://www.zupmage.eu/i/R1UgqdXn4F.jpg
                // http://i.imgur.com/Gu7hagN.jpg http://img11.hostingpics.net/pics/591623liens.png http://www.hapshack.com/images/jUfTZ.gif
                // http://pixhst.com/pictures/3029467
                downloadLinks.AddRange(xe2.XPathValues(".//a/@href").Where(url => !url.StartsWith("http://prezup.eu") && !url.StartsWith("http://pixhst.com") &&
                                                                           !url.EndsWith(".jpg") && !url.EndsWith("jpeg") && !url.EndsWith("png") && !url.EndsWith("gif")));
            }
            data.downloadLinks = downloadLinks.ToArray();

            //if (__trace)
            //    RapideDdl_LoadPostDetail.Trace_RapideDdl_PostDetail(data);

            return(data);
        }
Exemple #28
0
        private static OnisepInstitution_Detail GetData(WebResult webResult)
        {
            XXElement xeSource            = webResult.Http.zGetXDocument().zXXElement();
            OnisepInstitution_Detail data = new OnisepInstitution_Detail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetKey(webResult.WebRequest.HttpRequest);

            XXElement xeData = xeSource.XPathElement("//div[@id='oni_content-page']//div[@class='oni_innerContent']//div[@id='oni_zoom-block']");

            data.Institution = OnisepInstitution.Trim(xeData.XPathValue(".//h1/text()"));
            // <span class="oni_span-title">Code UAI : 0062080D</span>
            string s = OnisepInstitution.Trim(xeData.XPathValue(".//span[@class='oni_span-title']/text()"));

            if (s != null && s.StartsWith("Code UAI :", StringComparison.InvariantCultureIgnoreCase))
            {
                data.UAICode = OnisepInstitution.Trim(s.Substring(10));
            }

            XXElement xe = xeData.XPathElement(".//div[@class='oni_fiche-info-1']");

            data.Address    = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='street-address']/text()"));
            data.PostalCode = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='postal-code']/text()"));
            data.City       = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='locality']/text()"));
            data.Tel        = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='tel']/text()"));
            s = xe.XPathValues(".//p[@class='vcard']//text()").Select(OnisepInstitution.Trim).Where(t => t.StartsWith("Fax :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault();
            if (s != null)
            {
                data.Fax = OnisepInstitution.Trim(s.Substring(5));
            }
            s = xe.XPathValue(".//a[@class='email']/@href");
            if (s != null && s.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase))
            {
                s = s.Substring(7);
            }
            data.Mail    = s;
            data.WebSite = xe.DescendantTextNodes().Where(xt => string.Equals(OnisepInstitution.Trim(xt.Value), "site :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault()
                           //.zXPathValue(".//following-sibling::a/@href");
                           .zXPathValue(".//following::a/@href");

            foreach (XXElement xe2 in xeData.XPathElements(".//div[@class='oni_fiche-info-2']//li"))
            {
                string[] values = xe2.DescendantTexts().Take(2).ToArray();
                if (values.Length != 2)
                {
                    continue;
                }
                switch (OnisepInstitution.Trim(values[0]).ToLower())
                {
                case "statut de l'établissement :":
                    data.InstitutionStatus = OnisepInstitution.Trim(values[1]);
                    break;

                case "hébergement :":
                    data.Lodging = OnisepInstitution.Trim(values[1]);
                    break;

                case "présence d'une ulis":
                    data.Ulis = OnisepInstitution.Trim(values[1]);
                    break;
                }
            }

            data.StudyLevels = xeData.XPathElements(".//div[@class='oni_nav-in']//ul[@class='oni_nav-in-ul']//li").Select(li => li.DescendantTexts().zConcatStrings()).Where(txt => txt != null).ToArray();

            data.BacLevel = GetBacLevel(data.StudyLevels);

            // Address    = text in <span class="street-address">
            // PostalCode = text in <span class="postal-code">
            // City       = text in <span class="locality">
            // Tel        = text in <span class="tel">
            // Fax        = text start with "Fax :"
            // Mail       = @href start with mailto: in <a class="email">

            //bool address = false;
            //foreach (XNode node in xeData.XPathElement(".//div[@class='oni_fiche-info-1']//p[@class='vcard']").DescendantNodes())
            //{
            //    if (node is XElement)
            //    {
            //        XElement xe = (XElement)node;
            //        if (xe.Name == "span")
            //        {
            //            XAttribute attribute = xe.Attribute("class");
            //            if (attribute != null && attribute.Value == "street-address")
            //                address = true;
            //        }
            //        else
            //            address = false;
            //    }
            //    if (node is XText)
            //    {
            //        if (address)
            //        {
            //            data.Address = OnisepInstitution.Trim(((XText)node).Value);
            //            address = false;
            //        }
            //    }
            //}

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
Exemple #29
0
        protected override Unea_DetailCompany2 GetData()
        {
            XXElement           xeSource = new XXElement(GetXmlDocument().Root);
            Unea_DetailCompany2 data     = new Unea_DetailCompany2();

            data.sourceUrl       = Url;
            data.loadFromWebDate = DateTime.Now;

            // <div class='ctn_content-article'>
            XXElement xeContent = xeSource.XPathElement(".//div[@class='ctn_content-article']");

            //IEnumerator<string> texts = xeContent.DescendantTextList(nodeFilter: node => !(node is XElement) || (((XElement)node).Name != "script" && ((XElement)node).Name != "table"), func: __trimFunc2).GetEnumerator();
            IEnumerator <string> texts = xeContent.DescendantTexts(node => !(node is XElement) || (((XElement)node).Name != "script" && ((XElement)node).Name != "table") ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).Select(__trimFunc2).GetEnumerator();

            // <h1>
            // <img src="http://unea.griotte.biz/BaseDocumentaire/Docs/Public/4017/LOGOAmpouleC.JPG" style='border-width:2px;border-color:#5593C9;' height='60px' />
            // <span>Entreprise Adapt&eacute;e</span><br />
            // ALSACE ENTREPRISE ADAPTEE
            // </h1>
            if (texts.MoveNext() && texts.MoveNext())
            {
                data.name = texts.Current;
            }

            // <h2>ALSACE ENTREPRISE ADAPTEE est implant&eacute;e sur les sites de Colmar et Mulhouse avec un effectif de 106 salari&eacute;s, avec les activit&eacute;s sous-traitance : assemblage de pi&egrave;ces, cintrage de tuyaux, montage complexe, ainsi qu'une activit&eacute; prestation de service en espaces verts, m&eacute;nage et transport.</h2>
            if (texts.MoveNext())
            {
                data.presentation = texts.Current;
            }

            Unea_TextType textType = Unea_TextType.unknow;

            //foreach (XText xtext in xeContent.DescendantTextNodeList(".//table"))
            foreach (XText xtext in xeContent.XPathElements(".//table").DescendantTextNodes())
            {
                string text = __trimFunc2(xtext.Value);
                if (text == "")
                {
                    continue;
                }
                if (text.Equals("NOS ACTIVITES", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.activity;
                }
                else if (text.Equals("FILIERES METIER UNEA", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.sector;
                }
                else if (text.Equals("DOCUMENTS TÉLÉCHARGEABLES", StringComparison.InvariantCultureIgnoreCase))
                {
                    foreach (XXElement xe2 in new XXElement(xtext.Parent).XPathElements("following-sibling::ul//a"))
                    {
                        string url = xe2.XPathValue("@href");
                        //string name = name = xe2.XPathValue(".//text()", __trimFunc2);
                        string name = __trimFunc2(xe2.XPathValue(".//text()"));
                        if (!data.downloadDocuments.ContainsKey(url))
                        {
                            data.downloadDocuments.Add(url, new Unea_Document()
                            {
                                name = name, url = url
                            });
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning download document already exists \"{0}\" \"{1}\"", name, url);
                        }
                    }
                    // textType = novalues pour ne pas avoir Plaquette_AEA.pdf dans unknowInfos
                    textType = Unea_TextType.novalues;
                }
                else if (text.Equals("NOUS CONTACTER", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.novalue;
                }
                else if (text.Equals("ADRESSE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.address;
                }
                else if (text.Equals("TELEPHONE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.phone;
                }
                else if (text.Equals("FAX", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.fax;
                }
                else if (text.Equals("EMAIL", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.email;
                }
                else if (text.Equals("SITE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.webSite;
                }
                else if (text.Equals("QUI SOMMES NOUS", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.novalue;
                }
                else if (text.Equals("DIRIGEANT", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.leader;
                }
                else if (text.Equals("NOMBRE DE SALARIÉS", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.employeNumber;
                }
                else if (text.Equals("CHIFFRE D'AFFAIRE DE L'ANNÉE ÉCOULÉE", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.lastYearRevenue;
                }
                else if (text.Equals("NUMÉRO SIRET", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.siret;
                }
                else if (text.Equals("CERTIFICATION", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.certification;
                }
                else if (text.Equals("PRINCIPAUX CLIENTS", StringComparison.InvariantCultureIgnoreCase))
                {
                    textType = Unea_TextType.client;
                }
                else
                {
                    switch (textType)
                    {
                    case Unea_TextType.activity:
                        if (!data.activities.ContainsKey(text))
                        {
                            data.activities.Add(text, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning activity already exists \"{0}\"", text);
                        }
                        break;

                    case Unea_TextType.sector:
                        //data.sectors.Add(text);
                        if (!data.sectors.ContainsKey(text))
                        {
                            data.sectors.Add(text, null);
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("warning sector already exists \"{0}\"", text);
                        }
                        break;

                    case Unea_TextType.address:
                        if (data.address == null)
                        {
                            data.address = text;
                        }
                        else
                        {
                            data.address += " " + text;
                        }
                        break;

                    case Unea_TextType.phone:
                        data.phone = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.fax:
                        data.fax = text;
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.email:
                        data.email = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.webSite:
                        data.webSite = text;
                        textType     = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.leader:
                        data.leader = text;
                        textType    = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.employeNumber:
                        int employeNumber;
                        if (int.TryParse(text, out employeNumber))
                        {
                            data.employeNumber = employeNumber;
                        }
                        else
                        {
                            Trace.CurrentTrace.WriteLine("error unknow employe number \"{0}\"", text);
                        }
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.lastYearRevenue:
                        if (text != "€")
                        {
                            data.lastYearRevenue = text;
                        }
                        textType = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.siret:
                        data.siret = text;
                        textType   = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.certification:
                        data.certification = text;
                        textType           = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.client:
                        data.clients = text;
                        textType     = Unea_TextType.unknow;
                        break;

                    case Unea_TextType.novalues:
                        break;

                    default:
                        data.unknowInfos.Add(text);
                        break;
                    }
                }
            }

            foreach (XXElement xe in xeContent.XPathElements(".//table//td/a/img"))
            {
                string url = xe.XPathValue("@src");
                if (!data.photos.ContainsKey(url))
                {
                    data.photos.Add(url, null);
                }
                else
                {
                    Trace.CurrentTrace.WriteLine("warning photo already exists \"{0}\"", url);
                }
            }

            return(data);
        }