Пример #1
0
        private static OnisepInstitution_HeaderPage GetData(WebResult webResult)
        {
            XXElement xeSource = new XXElement(webResult.Http.zGetXDocument().Root);
            string    url      = webResult.WebRequest.HttpRequest.Url;
            OnisepInstitution_HeaderPage data = new OnisepInstitution_HeaderPage();

            data.SourceUrl       = url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetPageKey(webResult.WebRequest.HttpRequest);

            // <table class="oni_tableSearchResults" style="width: 463px; margin-left: 5px" summary="">
            // ...
            //   <tbody>
            //     <tr class=oni_odd>
            //       <td class="oni_first"><a href="/Ressources/Univers-Postbac/Postbac/Aquitaine/Pyrenees-Atlantiques/Academie-Basque-du-Sport"> Académie Basque du Sport                        </a></td>
            //       <td>Biarritz</td>
            //       <td>64200</td>
            //     </tr>
            //     ...
            //   </tbody>
            // </table>

            // <div class="pagenavigator">
            //   <p>
            //     <span class="pages">
            //       <span class="current">1</span>
            //       <span class="other"><a href="/content/search/(offset)/10?&amp;SubTreeArray=243418&amp;etabRecherche=1&amp;idFormation=&amp;limit=10">2</a></span>
            //       ...
            //     </span>
            //     <span class="next"><a href="/content/search/(offset)/10?&amp;SubTreeArray=243418&amp;etabRecherche=1&amp;idFormation=&amp;limit=10"><span class="text">Suivant&nbsp;&raquo;</span></a></span>
            //   </p>
            //   <div class="break"></div>
            // </div>

            data.UrlNextPage = zurl.GetUrl(url, xeSource.XPathValue("//div[@class='pagenavigator']//span[@class='next']//a/@href"));

            IEnumerable <XXElement>         xeHeaders = xeSource.XPathElements("//table[@class='oni_tableSearchResults']//tbody//tr");
            List <OnisepInstitution_Header> headers   = new List <OnisepInstitution_Header>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                OnisepInstitution_Header header = new OnisepInstitution_Header();
                header.SourceUrl       = url;
                header.LoadFromWebDate = DateTime.Now;
                XXElement xe = xeHeader.XPathElement(".//td[1]");
                header.Institution = OnisepInstitution.Trim(xe.XPathValue(".//text()"));
                header.UrlDetail   = zurl.GetUrl(url, xe.XPathValue(".//a/@href"));
                header.City        = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[2]//text()"));
                header.PostalCode  = OnisepInstitution.Trim(xeHeader.XPathValue(".//td[3]//text()"));
                headers.Add(header);
            }
            data.Headers = headers.ToArray();
            return(data);
        }
Пример #2
0
        private static OnisepInstitution_Detail GetData(WebResult webResult)
        {
            XXElement xeSource            = webResult.Http.zGetXDocument().zXXElement();
            OnisepInstitution_Detail data = new OnisepInstitution_Detail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetKey(webResult.WebRequest.HttpRequest);

            XXElement xeData = xeSource.XPathElement("//div[@id='oni_content-page']//div[@class='oni_innerContent']//div[@id='oni_zoom-block']");

            data.Institution = OnisepInstitution.Trim(xeData.XPathValue(".//h1/text()"));
            // <span class="oni_span-title">Code UAI : 0062080D</span>
            string s = OnisepInstitution.Trim(xeData.XPathValue(".//span[@class='oni_span-title']/text()"));

            if (s != null && s.StartsWith("Code UAI :", StringComparison.InvariantCultureIgnoreCase))
            {
                data.UAICode = OnisepInstitution.Trim(s.Substring(10));
            }

            XXElement xe = xeData.XPathElement(".//div[@class='oni_fiche-info-1']");

            data.Address    = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='street-address']/text()"));
            data.PostalCode = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='postal-code']/text()"));
            data.City       = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='locality']/text()"));
            data.Tel        = OnisepInstitution.Trim(xe.XPathValue(".//span[@class='tel']/text()"));
            s = xe.XPathValues(".//p[@class='vcard']//text()").Select(OnisepInstitution.Trim).Where(t => t.StartsWith("Fax :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault();
            if (s != null)
            {
                data.Fax = OnisepInstitution.Trim(s.Substring(5));
            }
            s = xe.XPathValue(".//a[@class='email']/@href");
            if (s != null && s.StartsWith("mailto:", StringComparison.InvariantCultureIgnoreCase))
            {
                s = s.Substring(7);
            }
            data.Mail    = s;
            data.WebSite = xe.DescendantTextNodes().Where(xt => string.Equals(OnisepInstitution.Trim(xt.Value), "site :", StringComparison.InvariantCultureIgnoreCase)).FirstOrDefault()
                           //.zXPathValue(".//following-sibling::a/@href");
                           .zXPathValue(".//following::a/@href");

            foreach (XXElement xe2 in xeData.XPathElements(".//div[@class='oni_fiche-info-2']//li"))
            {
                string[] values = xe2.DescendantTexts().Take(2).ToArray();
                if (values.Length != 2)
                {
                    continue;
                }
                switch (OnisepInstitution.Trim(values[0]).ToLower())
                {
                case "statut de l'établissement :":
                    data.InstitutionStatus = OnisepInstitution.Trim(values[1]);
                    break;

                case "hébergement :":
                    data.Lodging = OnisepInstitution.Trim(values[1]);
                    break;

                case "présence d'une ulis":
                    data.Ulis = OnisepInstitution.Trim(values[1]);
                    break;
                }
            }

            data.StudyLevels = xeData.XPathElements(".//div[@class='oni_nav-in']//ul[@class='oni_nav-in-ul']//li").Select(li => li.DescendantTexts().zConcatStrings()).Where(txt => txt != null).ToArray();

            data.BacLevel = GetBacLevel(data.StudyLevels);

            // Address    = text in <span class="street-address">
            // PostalCode = text in <span class="postal-code">
            // City       = text in <span class="locality">
            // Tel        = text in <span class="tel">
            // Fax        = text start with "Fax :"
            // Mail       = @href start with mailto: in <a class="email">

            //bool address = false;
            //foreach (XNode node in xeData.XPathElement(".//div[@class='oni_fiche-info-1']//p[@class='vcard']").DescendantNodes())
            //{
            //    if (node is XElement)
            //    {
            //        XElement xe = (XElement)node;
            //        if (xe.Name == "span")
            //        {
            //            XAttribute attribute = xe.Attribute("class");
            //            if (attribute != null && attribute.Value == "street-address")
            //                address = true;
            //        }
            //        else
            //            address = false;
            //    }
            //    if (node is XText)
            //    {
            //        if (address)
            //        {
            //            data.Address = OnisepInstitution.Trim(((XText)node).Value);
            //            address = false;
            //        }
            //    }
            //}

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);
        }