Example #1
0
        // header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData>
        protected override IEnumDataPages <Handeco_Header_v2> GetHeaderPageData(HttpResult <string> httpResult)
        {
            XXElement xeSource           = httpResult.zGetXDocument().zXXElement();
            string    url                = httpResult.Http.HttpRequest.Url;
            Handeco_HeaderDataPages data = new Handeco_HeaderDataPages();

            data.SourceUrl       = url;
            data.LoadFromWebDate = httpResult.Http.RequestTime;
            data.Id = GetPageKey(httpResult.Http.HttpRequest);


            // <div class="paginationControl">
            // page n    : <a href="/fournisseurs/rechercher/page/2#resultats">&gt;</a> |
            // last page : <span class="disabled">&gt;</span> |
            data.UrlNextPage = zurl.RemoveFragment(zurl.GetUrl(url, xeSource.XPathValue("//div[@class='paginationControl']//*[position()=last()-1]/@href")));

            IEnumerable <XXElement>  xeHeaders = xeSource.XPathElements("//table//tr[position() > 1]");
            List <Handeco_Header_v2> headers   = new List <Handeco_Header_v2>();

            foreach (XXElement xeHeader in xeHeaders)
            {
                Handeco_Header_v2 header = new Handeco_Header_v2();
                header.SourceUrl       = url;
                header.LoadFromWebDate = DateTime.Now;
                header.Name            = Handeco.Trim(xeHeader.XPathValue(".//td[1]//text()"));
                header.UrlDetail       = zurl.RemoveFragment(zurl.GetUrl(url, xeHeader.XPathValue(".//td[1]//a/@href")));
                //header.Siret = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()"));
                header.Type       = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()"));
                header.Groupes    = xeHeader.XPathValues(".//td[3]//text()").Select(Handeco.Trim).ToArray();
                header.Activités  = xeHeader.XPathValues(".//td[4]//text()").Select(Handeco.Trim).ToArray();
                header.PostalCode = Handeco.Trim(xeHeader.XPathValue(".//td[5]//text()"));
                headers.Add(header);
            }
            data.Data = headers.ToArray();
            return(data);
        }
Example #2
0
 private static string[] GetTextValues(XXElement xe)
 {
     return(xe.XPathValues(".//td//text()").Select(s => Handeco.Trim(_badCharacters.Replace(s, " "))).ToArray());;
 }
Example #3
0
        protected void _GetDetailData(XXElement xeSource, Handeco_Detail_v2 data)
        {
            //<div style="text-align: right; font-size: 10px;">
            //<em>Dernière mise à jour le 18-01-2013</em>
            //</div>
            string lastUpdate = Handeco.Trim(xeSource.XPathValue("//em[starts-with(text(), 'Dernière mise à jour')]/text()"));

            if (lastUpdate != null)
            {
                Match    match = _lastUpdateRegex.Match(lastUpdate);
                DateTime date;
                if (match.Success && DateTime.TryParseExact(match.Value, "dd-MM-yyyy", System.Globalization.CultureInfo.CurrentCulture, System.Globalization.DateTimeStyles.None, out date))
                {
                    data.DernièreMiseàjour = date;
                }
                else
                {
                    data.UnknowInfos.Add(lastUpdate);
                }
            }
            else
            {
                pb.Trace.WriteLine("error \"Dernière mise à jour\" not found");
            }

            // NOTRE OFFRE - activities - multiple
            //<select style="width: 200px; display: none;" onchange="change_activite(this.selectedIndex);" id="select_activites">
            //    <option>Sous-traitance industrielle - Autre</option>
            //    <option>Assemblage mécanique</option>
            //    <option>Energie renouvelable - Autre</option>
            //</select>
            string[] activityTypes = xeSource.XPathValues("//select[@id = 'select_activites']/option/text()").Select(Handeco.Trim).ToArray();

            // CONTACTS - multiple
            //<select style="width: 200px; display: none;" onchange="change_contact(this.selectedIndex);" id="select_contacts">
            //    <option>Jacky STEINLE (Chef d'atelier)</option>
            //</select>
            string[] contactDescriptions = xeSource.XPathValues("//select[@id = 'select_contacts']/option/text()").Select(Handeco.Trim).ToArray();

            int             indexActivityType       = 0;
            int             indexContactDescription = 0;
            List <Activity> activities = new List <Activity>();
            List <Contact>  contacts   = new List <Contact>();

            foreach (XXElement xxe in xeSource.XPathElements("//table[@class = 'fiche organisation']"))
            {
                //string id = xxe.XPathValue("@id").ToLower();
                string id = xxe.XPathValue("@id");
                if (id != null)
                {
                    id = id.ToLower();
                }

                //if (__trace)
                //    pb.Trace.WriteLine("table id = \"{0}\"", id);

                Activity activity = null;
                Contact  contact  = null;
                if (id != null && id.StartsWith("fiche_activite_"))
                {
                    activity = new Activity();
                    activities.Add(activity);
                    if (indexActivityType < activityTypes.Length)
                    {
                        activity.Type = activityTypes[indexActivityType++];
                    }
                    else
                    {
                        pb.Trace.WriteLine("warning miss an activity type in html (<select id='select_activites'>)");
                    }
                }
                else if (id != null && id.StartsWith("fiche_contact_"))
                {
                    contact = new Contact();
                    contacts.Add(contact);
                    if (indexContactDescription < contactDescriptions.Length)
                    {
                        contact.Description = contactDescriptions[indexContactDescription++];
                    }
                    else
                    {
                        pb.Trace.WriteLine("warning miss an activity type in html (<select id='select_contacts'>)");
                    }
                }

                foreach (XXElement xxe2 in xxe.XPathElements(".//tr"))
                {
                    string valueName = Handeco.Trim(xxe2.XPathValue(".//th//text()"));
                    //_currentElement = xxe2;
                    XXElement currentElement = xxe2;

                    if (valueName == null)
                    {
                        continue;
                    }

                    if (activity != null)
                    {
                        //if (__trace)
                        //    pb.Trace.Write("activité ");
                        if (!SetActivityValue(activity, valueName, currentElement))
                        {
                            //if (__trace)
                            //    pb.Trace.Write("error ");
                            data.UnknowInfos.Add("valeur activité inconnu : " + valueName + " = " + GetTextValue(currentElement));
                        }
                        //else if (__trace)
                        //    pb.Trace.Write("      ");
                        //if (__trace)
                        //    pb.Trace.WriteLine("\"{0}\" =  \"{1}\"", valueName, GetTextValue(currentElement));
                    }
                    else if (contact != null)
                    {
                        //if (__trace)
                        //    pb.Trace.Write("contact  ");
                        if (!SetContactValue(contact, valueName, currentElement))
                        {
                            //if (__trace)
                            //    pb.Trace.Write("error ");
                            data.UnknowInfos.Add("valeur contact inconnu : " + valueName + " = " + GetTextValue(currentElement));
                        }
                        //else if (__trace)
                        //    pb.Trace.Write("      ");
                        //if (__trace)
                        //    pb.Trace.WriteLine("\"{0}\" =  \"{1}\"", valueName, GetTextValue(currentElement));
                    }
                    else
                    {
                        //if (__trace)
                        //    pb.Trace.Write("société  ");
                        if (!SetValue(data, valueName, currentElement))
                        {
                            //if (__trace)
                            //    pb.Trace.Write("error ");
                            data.UnknowInfos.Add("valeur inconnu : " + valueName + " = " + GetTextValue(currentElement));
                        }
                        //else if (__trace)
                        //    pb.Trace.Write("      ");
                        //if (__trace)
                        //    pb.Trace.WriteLine("\"{0}\" =  \"{1}\"", valueName, GetTextValue(currentElement));
                    }
                }
            }
            data.Activités = activities.ToArray();
            data.Contacts  = contacts.ToArray();
        }