Пример #1
0
        public static void ClassInit(XElement xe)
        {
            //__useUrlCache = xe.zXPathValueBool("UseUrlCache", false);
            __useUrlCache    = xe.zXPathValue("UseUrlCache").zTryParseAs(false);
            __cacheDirectory = xe.zXPathValue("CacheDirectory");

            //__useMongo = xe.zXPathValueBool("UseMongo", __useMongo);
            __useMongo              = xe.zXPathValue("UseMongo").zTryParseAs(__useMongo);
            __mongoServer           = xe.zXPathValue("MongoServer", __mongoServer);
            __mongoDatabase         = xe.zXPathValue("MongoDatabase");
            __mongoCollectionName   = xe.zXPathValue("MongoCollection");
            __mongoDocumentItemName = xe.zXPathValue("MongoDocumentItemName");

            IDocumentStore_v3 <int, RapideDdl_HeaderPage> documentStore = null;

            if (__useMongo)
            {
                documentStore             = new MongoDocumentStore_v3 <int, RapideDdl_HeaderPage>(__mongoServer, __mongoDatabase, __mongoCollectionName, __mongoDocumentItemName);
                documentStore.DefaultSort = "{ 'download.id': 1 }";
                documentStore.GetDataKey  = headerPage => headerPage.id;
            }

            __currentLoadHeaderPagesManager = new RapideDdl_LoadHeaderPagesManager(new RapideDdl_LoadHeaderPageFromWebManager(GetUrlCache()), documentStore);
        }
Пример #2
0
        protected override RapideDdl_HeaderPage GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb)
        {
            XXElement            xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root);
            string               url      = loadDataFromWeb.request.Url;
            RapideDdl_HeaderPage data     = new RapideDdl_HeaderPage();

            data.sourceUrl       = url;
            data.loadFromWebDate = loadDataFromWeb.loadFromWebDate;
            data.id = RapideDdl_LoadHeaderPagesManager.GetHeaderPageKey(url);

            data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href"));
            IEnumerable <XXElement>     xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']");
            List <RapideDdl_PostHeader> headers   = new List <RapideDdl_PostHeader>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                RapideDdl_PostHeader header = new RapideDdl_PostHeader();
                header.sourceUrl       = url;
                header.loadFromWebDate = loadDataFromWeb.loadFromWebDate;

                XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a");
                header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href"));

                //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1));
                //header.title = xe.XPathValue(".//text()", DownloadPrint.Trim);
                header.title = xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars);
                PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(header.title);
                if (titleInfos.foundInfo)
                {
                    //header.originalTitle = header.title;
                    header.title = titleInfos.title;
                    header.infos.SetValues(titleInfos.infos);
                }

                xe = xeHeader.XPathElement(".//div[@class='shdinfo']");
                header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()");
                // Aujourd'hui, 17:13
                //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate);
                string date = xe.XPathValue(".//span[@class='date']//text()");
                header.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy");
                if (header.creationDate == null)
                {
                    pb.Trace.WriteLine("unknow date time \"{0}\"", date);
                }
                if (__trace)
                {
                    pb.Trace.WriteLine("creationDate {0} - \"{1}\"", header.creationDate, date);
                }

                xe = xeHeader.XPathElement(".//div[@class='maincont']");
                //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray();
                header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray();

                //if (request.LoadImage)
                //    Http2.LoadImageFromWeb(header.images);

                //RapideDdl.SetTextValues(header, xe.DescendantTextList());
                // get infos, description, language, size, nbPages
                // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a")
                PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), header.title);
                header.description = textValues.description;
                header.language    = textValues.language;
                header.size        = textValues.size;
                header.nbPages     = textValues.nbPages;
                header.infos.SetValues(textValues.infos);

                xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']");
                //header.category = xe.DescendantTextList(".//a").Select(DownloadPrint.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");
                header.category = xe.XPathElements(".//a").DescendantTexts().Select(DownloadPrint.Trim).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/");

                headers.Add(header);
            }
            data.postHeaders = headers.ToArray();
            return(data);
        }