public static void ClassInit(XElement xe) { //__useUrlCache = xe.zXPathValueBool("UseUrlCache", false); __useUrlCache = xe.zXPathValue("UseUrlCache").zTryParseAs(false); __cacheDirectory = xe.zXPathValue("CacheDirectory"); //__useMongo = xe.zXPathValueBool("UseMongo", __useMongo); __useMongo = xe.zXPathValue("UseMongo").zTryParseAs(__useMongo); __mongoServer = xe.zXPathValue("MongoServer", __mongoServer); __mongoDatabase = xe.zXPathValue("MongoDatabase"); __mongoCollectionName = xe.zXPathValue("MongoCollection"); __mongoDocumentItemName = xe.zXPathValue("MongoDocumentItemName"); IDocumentStore_v3 <int, RapideDdl_HeaderPage> documentStore = null; if (__useMongo) { documentStore = new MongoDocumentStore_v3 <int, RapideDdl_HeaderPage>(__mongoServer, __mongoDatabase, __mongoCollectionName, __mongoDocumentItemName); documentStore.DefaultSort = "{ 'download.id': 1 }"; documentStore.GetDataKey = headerPage => headerPage.id; } __currentLoadHeaderPagesManager = new RapideDdl_LoadHeaderPagesManager(new RapideDdl_LoadHeaderPageFromWebManager(GetUrlCache()), documentStore); }
protected override RapideDdl_HeaderPage GetDataFromWeb(LoadDataFromWeb_v3 loadDataFromWeb) { XXElement xeSource = new XXElement(loadDataFromWeb.GetXmlDocument().Root); string url = loadDataFromWeb.request.Url; RapideDdl_HeaderPage data = new RapideDdl_HeaderPage(); data.sourceUrl = url; data.loadFromWebDate = loadDataFromWeb.loadFromWebDate; data.id = RapideDdl_LoadHeaderPagesManager.GetHeaderPageKey(url); data.urlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='basenavi']//span[@class='nnext']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//div[@class='base shortstory']"); List <RapideDdl_PostHeader> headers = new List <RapideDdl_PostHeader>(); foreach (XXElement xeHeader in xeHeaders) { RapideDdl_PostHeader header = new RapideDdl_PostHeader(); header.sourceUrl = url; header.loadFromWebDate = loadDataFromWeb.loadFromWebDate; XXElement xe = xeHeader.XPathElement(".//*[@class='shd']//a"); header.urlDetail = zurl.GetUrl(url, xe.XPathValue("@href")); //header.title = RapideDdl.ExtractTextValues(header.infos, xe.XPathValue(".//text()", RapideDdl.TrimFunc1)); //header.title = xe.XPathValue(".//text()", DownloadPrint.Trim); header.title = xe.XPathValue(".//text()").Trim(DownloadPrint.TrimChars); PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(header.title); if (titleInfos.foundInfo) { //header.originalTitle = header.title; header.title = titleInfos.title; header.infos.SetValues(titleInfos.infos); } xe = xeHeader.XPathElement(".//div[@class='shdinfo']"); header.postAuthor = xe.XPathValue(".//span[@class='arg']//a//text()"); // Aujourd'hui, 17:13 //header.creationDate = RapideDdl.ParseDateTime(xe.XPathValue(".//span[@class='date']//text()"), loadDataFromWeb.loadFromWebDate); string date = xe.XPathValue(".//span[@class='date']//text()"); header.creationDate = zdate.ParseDateTimeLikeToday(date, loadDataFromWeb.loadFromWebDate, "d-M-yyyy, HH:mm", "d M yyyy", "d MMMM yyyy"); if (header.creationDate == null) { pb.Trace.WriteLine("unknow date time \"{0}\"", date); } if (__trace) { pb.Trace.WriteLine("creationDate {0} - \"{1}\"", header.creationDate, date); } xe = xeHeader.XPathElement(".//div[@class='maincont']"); //header.images = xe.XPathImages(xeImg => new UrlImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray(); header.images = xe.DescendantNodes(node => XmlDescendant.ImageFilter(node)).Select(xeImg => new WebImage(zurl.GetUrl(url, xeImg.zAttribValue("src")))).ToArray(); //if (request.LoadImage) // Http2.LoadImageFromWeb(header.images); //RapideDdl.SetTextValues(header, xe.DescendantTextList()); // get infos, description, language, size, nbPages // xe.DescendantTextList(nodeFilter: node => !(node is XElement) || ((XElement)node).Name != "a") PrintTextValues_v1 textValues = DownloadPrint.PrintTextValuesManager.GetTextValues_v1(xe.DescendantTexts(node => !(node is XElement) || ((XElement)node).Name != "a" ? XNodeFilter.SelectNode : XNodeFilter.SkipNode), header.title); header.description = textValues.description; header.language = textValues.language; header.size = textValues.size; header.nbPages = textValues.nbPages; header.infos.SetValues(textValues.infos); xe = xeHeader.XPathElement(".//div[@class='morelink']//span[@class='arg']"); //header.category = xe.DescendantTextList(".//a").Select(DownloadPrint.TrimFunc1).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); header.category = xe.XPathElements(".//a").DescendantTexts().Select(DownloadPrint.Trim).Where(s => !s.StartsWith("Commentaires")).zToStringValues("/"); headers.Add(header); } data.postHeaders = headers.ToArray(); return(data); }