private static OnisepInstitution_HeaderPage GetData(WebResult webResult) { XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root); string url = webResult.WebRequest.HttpRequest.Url; OnisepInstitution_HeaderPage data = new OnisepInstitution_HeaderPage(); data.SourceUrl = url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetPageKey(webResult.WebRequest.HttpRequest); // <table class="oni_tableSearchResults" style="width: 463px; margin-left: 5px" summary=""> // ... // <tbody> // <tr class=oni_odd> // <td class="oni_first"><a href="/Ressources/Univers-Postbac/Postbac/Aquitaine/Pyrenees-Atlantiques/Academie-Basque-du-Sport"> Académie Basque du Sport </a></td> // <td>Biarritz</td> // <td>64200</td> // </tr> // ... // </tbody> // </table> // <div class="pagenavigator"> // <p> // <span class="pages"> // <span class="current">1</span> // <span class="other"><a href="/content/search/(offset)/10?&SubTreeArray=243418&etabRecherche=1&idFormation=&limit=10">2</a></span> // ... // </span> // <span class="next"><a href="/content/search/(offset)/10?&SubTreeArray=243418&etabRecherche=1&idFormation=&limit=10"><span class="text">Suivant »</span></a></span> // </p> // <div class="break"></div> // </div> data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='pagenavigator']//span[@class='next']//a/@href")); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table[@class='oni_tableSearchResults']//tbody//tr"); List <OnisepInstitution_Header> headers = new List <OnisepInstitution_Header>(); foreach (XXElement xeHeader in xeHeaders) { OnisepInstitution_Header header = new OnisepInstitution_Header(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; XXElement xe = xeHeader.XPathElement(".//td[1]"); header.Institution = OnisepInstitution.Trim(xe.XPathValue(".//text()")); header.UrlDetail = zurl.GetUrl(url, xe.XPathValue(".//a/@href")); header.City = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.PostalCode = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[3]//text()")); headers.Add(header); } data.Headers = headers.ToArray(); return(data); }
private static OnisepInstitution_Detail GetData(WebResult webResult) { XXElement xeSource = webResult.Http.zGetXDocument().zXXElement(); OnisepInstitution_Detail data = new OnisepInstitution_Detail(); data.SourceUrl = webResult.WebRequest.HttpRequest.Url; data.LoadFromWebDate = webResult.LoadFromWebDate; data.Id = GetKey(webResult.WebRequest.HttpRequest); XXElement xeData = xeSource.XPathElement("//div[@id='oni_content-page']//div[@class='oni_innerContent']//div[@id='oni_zoom-block']"); data.Institution = OnisepInstitution.Trim(xeData.XPathValue(".//h1/text()")); // <span class="oni_span-title">Code UAI : 0062080D</span> string s = OnisepInstitution.Trim(xeData.XPathValue(".//span[@class='oni_span-title']/text()")); if (s != null && s.StartsWith("Code UAI :", StringComparison.InvariantCultureIgnoreCase)) { data.UAICode = OnisepInstitution.Trim(s.Substring(10)); } XXElement xe = xeData.XPathElement(".//div[@class='oni_fiche-info-1']"); data.Address = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='street-address']/text()")); data.PostalCode = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='postal-code']/text()")); data.City = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='locality']/text()")); data.Tel = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='tel']/text()")); s = xe.XPathValues(".//p[@class='vcard']//text()").Select(OnisepInstitution.Trim).Where(t => t.StartsWith("Fax :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault(); if (s != null) { data.Fax = OnisepInstitution.Trim(s.Substring(5)); } s = xe.XPathValue(".//a[@class='email']/@href"); if (s != null && s.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase)) { s = s.Substring(7); } data.Mail = s; data.WebSite = xe.DescendantTextNodes().Where(xt => string.Equals(OnisepInstitution.Trim(xt.Value), "site :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault() //.zXPathValue(".//following-sibling::a/@href"); .zXPathValue(".//following::a/@href"); foreach (XXElement xe2 in xeData.XPathElements(".//div[@class='oni_fiche-info-2']//li")) { string[] values = xe2.DescendantTexts().Take(2).ToArray(); if (values.Length != 2) { continue; } switch (OnisepInstitution.Trim(values[0]).ToLower()) { case "statut de l'établissement :": data.InstitutionStatus = OnisepInstitution.Trim(values[1]); break; case "hébergement :": data.Lodging = OnisepInstitution.Trim(values[1]); break; case "présence d'une ulis": data.Ulis = OnisepInstitution.Trim(values[1]); break; } } data.StudyLevels = xeData.XPathElements(".//div[@class='oni_nav-in']//ul[@class='oni_nav-in-ul']//li").Select(li => li.DescendantTexts().zConcatStrings()).Where(txt => txt != null).ToArray(); data.BacLevel = GetBacLevel(data.StudyLevels); // Address = text in <span class="street-address"> // PostalCode = text in <span class="postal-code"> // City = text in <span class="locality"> // Tel = text in <span class="tel"> // Fax = text start with "Fax :" // Mail = @href start with mailto: in <a class="email"> //bool address = false; //foreach (XNode node in xeData.XPathElement(".//div[@class='oni_fiche-info-1']//p[@class='vcard']").DescendantNodes()) //{ // if (node is XElement) // { // XElement xe = (XElement)node; // if (xe.Name == "span") // { // XAttribute attribute = xe.Attribute("class"); // if (attribute != null && attribute.Value == "street-address") // address = true; // } // else // address = false; // } // if (node is XText) // { // if (address) // { // data.Address = OnisepInstitution.Trim(((XText)node).Value); // address = false; // } // } //} if (__trace) { pb.Trace.WriteLine(data.zToJson()); } return(data); }