示例#1
0
        //protected override IEnumDataPages<int, IHeaderData> GetDataFromWeb(LoadDataFromWeb loadDataFromWeb)
        protected override IEnumDataPages_v2 <int, IHeaderData_v2> GetData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement          xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            string             url      = loadDataFromWeb.WebRequest.HttpRequest.Url;
            Ebookdz_HeaderPage data     = new Ebookdz_HeaderPage();

            data.SourceUrl       = url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest);

            //data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href"));
            data.UrlNextPage = null;

            // <div id="vba_news4">
            IEnumerable <XXElement>   xeHeaders = xeSource.XPathElements("//div[@id='vba_news4']//div[@class='collapse']");
            List <Ebookdz_PostHeader> headers   = new List <Ebookdz_PostHeader>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                Ebookdz_PostHeader header = new Ebookdz_PostHeader();
                header.SourceUrl       = url;
                header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;

                //XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[@class!='mcbadge mcbadge_r']");
                XXElement xe = xeHeader.XPathElement(".//h2[@class='blockhead']//a[2]");
                header.Title     = xe.XPathValue(".//text()");
                header.UrlDetail = xe.XPathValue("./@href");

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.PostHeaders = headers.ToArray();
            return((IEnumDataPages_v2 <int, IHeaderData_v2>)data);
        }
示例#2
0
        public LoadDataFromWeb_v4 Load(RequestFromWeb_v4 webRequest)
        {
            LoadDataFromWeb_v4 loadDataFromWeb = new LoadDataFromWeb_v4 { WebRequest = webRequest };

            DateTime loadFromWebDate;

            HttpRequest httpRequest = webRequest.HttpRequest;

            if (_urlCache != null)
            {
                string urlPath = _urlCache.GetUrlPath(httpRequest);
                if (webRequest.ReloadFromWeb || !zFile.Exists(urlPath))
                {
                    if (_firstLoadFromWeb && httpRequest.Url.StartsWith("http://"))
                    {
                        InitLoadFromWeb();
                        _firstLoadFromWeb = false;
                    }
                    //if (!HttpManager.CurrentHttpManager.LoadToFile(httpRequest, urlPath, GetHttpRequestParameters()))
                    //    return default(T);
                    if (!HttpManager.CurrentHttpManager.LoadToFile(httpRequest, urlPath, _exportRequest, GetHttpRequestParameters()))
                        return loadDataFromWeb;
                }
                httpRequest = new HttpRequest { Url = urlPath };
                // get last write time as loadFromWebDate, dont take creation time because creation time is modified when copying the file
                //loadFromWebDate = new FileInfo(urlPath).LastWriteTime;
                loadFromWebDate = zFile.CreateFileInfo(urlPath).LastWriteTime;
            }
            else
                loadFromWebDate = DateTime.Now;
            if (_firstLoadFromWeb && httpRequest.Url.StartsWith("http://"))
            {
                InitLoadFromWeb();
                _firstLoadFromWeb = false;
            }
            loadDataFromWeb.Http = HttpManager.CurrentHttpManager.Load(httpRequest, GetHttpRequestParameters());
            //if (loadDataFromWeb.Http == null)
            //    return default(T);
            if (loadDataFromWeb.Http != null)
            {
                loadDataFromWeb.LoadResult = true;
                loadDataFromWeb.LoadFromWebDate = loadFromWebDate;
            }
            return loadDataFromWeb;
        }
示例#3
0
        public IEnumerable <Ebookdz_Forum> LoadSubForum(string url, string forum, Predicate <string> filter = null, bool reload = false)
        {
            LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest {
                Url = url
            }, reload: reload));

            if (loadDataFromWeb.LoadResult)
            {
                XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);

                // <div class="body_bd">
                // <div id="forumbits" class="forumbits">
                // <ol>
                // <li id="forum10" class="forumbit_post new L1">
                //   <div class="forumrow">
                //   <ol id="childforum_for_161" class="childsubforum">
                //     <div class="titleline">
                foreach (XXElement xe in xeSource.XPathElements("//div[@id='forumbits']/ol/li"))
                {
                    XXElement xe2      = xe.XPathElement(".//div[@class='forumrow']//a");
                    string    category = xe2.XPathValue(".//text()");
                    url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href")));
                    yield return(new Ebookdz_Forum {
                        Forum = forum, Category = category, Url = url
                    });

                    foreach (XXElement xe3 in xe.XPathElements(".//ol[@class='childsubforum']/li//div[@class='titleline']//a"))
                    {
                        string name = xe3.XPathValue(".//text()");

                        if (filter != null && !filter(name))
                        {
                            continue;
                        }
                        url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe3.XPathValue("@href")));
                        yield return(new Ebookdz_Forum {
                            Forum = forum, Category = category, Name = name, Url = url
                        });
                    }
                }
            }
        }
示例#4
0
        public IEnumerable <Ebookdz_Forum> LoadMainForum(Predicate <string> filter = null, bool reload = false)
        {
            LoadDataFromWeb_v4 loadDataFromWeb = Load(new RequestFromWeb_v4(new HttpRequest {
                Url = __urlForum
            }, reload: reload));

            if (loadDataFromWeb.LoadResult)
            {
                XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
                //HtmlRun.Select("//ol[@id='forums']/li:.:EmptyRow", ".//text()", ".//a//text()", ".//a/@href");
                // <ol id="forums" class="floatcontainer">
                foreach (XXElement xe in xeSource.XPathElements("//ol[@id='forums']/li"))
                {
                    // Accueil de la Board, Forum de l'entraide, Journaux, MAGAZINES, Les Livres, Sujet supprimés ou à supprimer
                    // http://www.ebookdz.com/forum/forumdisplay.php?f=1&s=1fdf76d35a57d09aa11e75ff6f0d9985
                    XXElement xe2 = xe.XPathElement(".//a");

                    string name = xe2.XPathValue(".//text()");

                    if (filter != null && !filter(name))
                    {
                        continue;
                    }

                    string url = Ebookdz.GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe2.XPathValue("@href")));
                    //if (url != null)
                    //{
                    //    PBUriBuilder uriBuilder = new PBUriBuilder(url);
                    //    uriBuilder.RemoveQueryValue("s");
                    //    url = uriBuilder.ToString();
                    //}
                    yield return(new Ebookdz_Forum {
                        Forum = name, Url = url
                    });
                }
            }
        }
示例#5
0
        protected override IPost GetData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement          xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            Ebookdz_PostDetail data     = new Ebookdz_PostDetail();

            data.SourceUrl       = loadDataFromWeb.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            data.Id = GetPostDetailKey(loadDataFromWeb.WebRequest.HttpRequest);

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015
            //data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()", DownloadPrint.Trim);
            data.Title = xePost.XPathValue(".//div[@id='pagetitle']//a//text()").Trim(DownloadPrint.TrimChars);
            PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);

            if (titleInfos.foundInfo)
            {
                data.OriginalTitle = data.Title;
                data.Title         = titleInfos.title;
                data.Infos.SetValues(titleInfos.infos);
            }

            // Forum / Journaux / Presse quotidienne / Le Monde / Journal Le Monde + Magazine + 2 suppléments du samedi 03 janvier 2015
            string lowerTitle = null;

            if (data.Title != null)
            {
                lowerTitle = data.Title.ToLowerInvariant();
            }
            //data.Category = xePost.DescendantTextList(".//div[@id='breadcrumb']//a").Where(text => { text = text.ToLowerInvariant(); return text != "forum" && !text.EndsWith(lowerTitle); }).Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            data.Category = xePost.XPathElements(".//div[@id='breadcrumb']//a").DescendantTexts().Where(text => { text = text.ToLowerInvariant(); return(text != "forum" && !text.EndsWith(lowerTitle)); }).Select(DownloadPrint.Trim).zToStringValues("/");
            string category = data.Category.ToLowerInvariant();

            data.PrintType = GetPrintType(category);
            //Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            // <div id="postlist" class="postlist restrain">
            XXElement xe = xePost.XPathElement(".//div[@id='postlist']");

            // Aujourd'hui, 07h32 - Aujourd'hui, 10h51 - Hier, 12h55 - 22/02/2014, 21h09
            //string date = xe.DescendantTextList(".//div[@class='posthead']//text()", nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            XXElement xe2 = xe.XPathElement(".//div[@class='posthead']");
            //string date = xe2.DescendantTextList(nodeFilter: node => node.zGetName() != "a").zToStringValues("");
            string date = xe2.DescendantTexts(node => node.zGetName() != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode).zToStringValues("");

            date = date.Replace('\xA0', ' ');
            data.PostCreationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.LoadFromWebDate, @"d/M/yyyy, HH\hmm", @"d-M-yyyy, HH\hmm");
            if (data.PostCreationDate == null)
            {
                pb.Trace.WriteLine("unknow post creation date \"{0}\"", date);
            }
            if (__trace)
            {
                pb.Trace.WriteLine("post creation date {0} - \"{1}\"", data.PostCreationDate, date);
            }

            //data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()", DownloadPrint.Trim);
            data.PostAuthor = xe.XPathValue(".//div[@class='userinfo']//a//text()").Trim(DownloadPrint.TrimChars);

            // <div class="postbody">
            xe = xePost.XPathElement(".//div[@class='postbody']//div[@class='content']//blockquote/div");

            //data.Images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();
            data.Images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(data.SourceUrl, xeImg.zAttribValue("src")))).ToArray();

            // force load image to get image width and height
            if (loadDataFromWeb.WebRequest.LoadImage)
            {
                data.Images = DownloadPrint.LoadImages(data.Images).ToArray();
            }

            // get infos, description, language, size, nbPages
            // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a")
            PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), data.Title);

            data.Description = textValues.description;
            data.Language    = textValues.language;
            data.Size        = textValues.size;
            data.NbPages     = textValues.nbPages;
            data.Infos.SetValues(textValues.infos);

            data.DownloadLinks = xe.XPathValues(".//a/@href").ToArray();

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }
示例#6
0
文件: Ebookdz.cs 项目: labeuze/source
        public static IKeyData<int> GetForumHeaderPageData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            string url = loadDataFromWeb.WebRequest.HttpRequest.Url;
            Ebookdz_HeaderPage data = new Ebookdz_HeaderPage();
            data.SourceUrl = url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            //data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest);

            // <div id="above_threadlist" class="above_threadlist">
            // <div class="threadpagenav">
            // <span class="prev_next">
            // <a rel="next" href="forumdisplay.php?f=74&amp;page=2&amp;s=4807e931448c05da34dd54fbd0308479" title="Page suivante - Résultats de 21 à 40 sur 66">
            data.UrlNextPage = GetUrl(zurl.GetUrl(url, xeSource.XPathValue("//div[@id='above_threadlist']//span[@class='prev_next']//a[@rel='next']/@href")));

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // <div id="breadcrumb" class="breadcrumb">
            // <ul class="floatcontainer">
            // <li class="navbit">
            // Forum / Journaux / Presse quotidienne / Autres Journaux

            // <div id="threadlist" class="threadlist">
            // <ol id="threads" class="threads">

            IEnumerable<XXElement> xeHeaders = xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li");
            List<Ebookdz_PostHeader> headers = new List<Ebookdz_PostHeader>();
            foreach (XXElement xeHeader in xeHeaders)
            {
                Ebookdz_PostHeader header = new Ebookdz_PostHeader();
                header.SourceUrl = url;
                header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;

                // <div class="threadinfo" title="">
                // <div class="inner">
                // <a title="" class="title" href="showthread.php?t=111210&amp;s=4807e931448c05da34dd54fbd0308479" id="thread_title_111210">L'OPINION du mardi  20 janvier 2015</a>

                XXElement xe = xeHeader.XPathElement(".//div[@class='threadinfo']//a[@class='title']");
                header.Title = xe.XPathValue(".//text()");
                header.UrlDetail = GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe.XPathValue("@href")));

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.PostHeaders = headers.ToArray();
            //return (IEnumDataPages_new2<int, IHeaderData_new>)data;
            return (IKeyData<int>)data;
        }
示例#7
0
文件: Ebookdz.cs 项目: 24/source_04
        public static IKeyData <int> GetForumHeaderPageData(LoadDataFromWeb_v4 loadDataFromWeb)
        {
            XXElement          xeSource = new XXElement(loadDataFromWeb.Http.zGetXDocument().Root);
            string             url      = loadDataFromWeb.WebRequest.HttpRequest.Url;
            Ebookdz_HeaderPage data     = new Ebookdz_HeaderPage();

            data.SourceUrl       = url;
            data.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;
            //data.Id = Ebookdz_LoadHeaderPagesManager.GetHeaderPageKey(loadDataFromWeb.WebRequest.HttpRequest);

            // <div id="above_threadlist" class="above_threadlist">
            // <div class="threadpagenav">
            // <span class="prev_next">
            // <a rel="next" href="forumdisplay.php?f=74&amp;page=2&amp;s=4807e931448c05da34dd54fbd0308479" title="Page suivante - Résultats de 21 à 40 sur 66">
            data.UrlNextPage = GetUrl(zurl.GetUrl(url, xeSource.XPathValue("//div[@id='above_threadlist']//span[@class='prev_next']//a[@rel='next']/@href")));

            // <div class="body_bd">
            XXElement xePost = xeSource.XPathElement("//div[@class='body_bd']");

            // <div id="breadcrumb" class="breadcrumb">
            // <ul class="floatcontainer">
            // <li class="navbit">
            // Forum / Journaux / Presse quotidienne / Autres Journaux

            // <div id="threadlist" class="threadlist">
            // <ol id="threads" class="threads">

            IEnumerable <XXElement>   xeHeaders = xeSource.XPathElements("//div[@id='threadlist']//ol[@id='threads']/li");
            List <Ebookdz_PostHeader> headers   = new List <Ebookdz_PostHeader>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                Ebookdz_PostHeader header = new Ebookdz_PostHeader();
                header.SourceUrl       = url;
                header.LoadFromWebDate = loadDataFromWeb.LoadFromWebDate;

                // <div class="threadinfo" title="">
                // <div class="inner">
                // <a title="" class="title" href="showthread.php?t=111210&amp;s=4807e931448c05da34dd54fbd0308479" id="thread_title_111210">L'OPINION du mardi  20 janvier 2015</a>

                XXElement xe = xeHeader.XPathElement(".//div[@class='threadinfo']//a[@class='title']");
                header.Title     = xe.XPathValue(".//text()");
                header.UrlDetail = GetUrl(zurl.GetUrl(loadDataFromWeb.WebRequest.HttpRequest.Url, xe.XPathValue("@href")));

                //header.images = xeHeader.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                //header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));
                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));

                //xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                //header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                //// Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);

                //xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToList();

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());

                //xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(RapideDdl.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.PostHeaders = headers.ToArray();
            //return (IEnumDataPages_new2<int, IHeaderData_new>)data;
            return((IKeyData <int>)data);
        }