Пример #1
0
        private static bool SetValue(Handeco_Detail detail, string valueName, XXElement xe)
        {
            bool ret = true;

            switch (valueName.ToLower())
            {
            // LES INFOS CLES (html)
            case "logo":
                detail.Logo = zurl.GetUrl(detail.SourceUrl, xe.XPathValue(".//td//img/@src"));
                break;

            case "raison sociale":
                detail.RaisonSociale = GetTextValue(xe);
                break;

            case "date de création":
                detail.DateCréation = GetTextValue(xe);
                break;

            case "statut juridique":
                detail.StatutJuridique = GetTextValue(xe);
                break;

            case "type de structure":
                detail.TypeStructure = GetTextValue(xe);
                break;

            case "site web":
                detail.SiteWeb = GetTextValue(xe);
                break;

            case "n° siret":
                detail.Siret = GetTextValue(xe);
                break;

            case "localisation géographique":
                detail.Localisation = GetTextValue(xe);
                break;

            case "normes, habilitations et certifications":
                detail.Normes = GetTextValue(xe);
                break;

            case "chiffre d'affaires annuel":
                detail.ChiffreAffairesAnnuel = GetTextValue(xe);
                break;

            case "effectif total (etp)":
                detail.EffectifTotal = GetTextValue(xe);
                break;

            case "effectif de production (etp)":
                detail.EffectifProduction = GetTextValue(xe);
                break;

            case "effectif d'encadrement (etp)":
                detail.EffectifEncadrement = GetTextValue(xe);
                break;

            case "nombre de travailleurs handicapés (etp)":
                detail.NombreTravailleursHandicapés = GetTextValue(xe);
                break;

            case "nombre de personnes handicapées accompagnées par an":
                detail.NombreHandicapéAccompagné = GetTextValue(xe);
                break;

            // RESEAUX ET PARTENAIRES (html)
            case "appartenance à un groupe":
                detail.AppartenanceGroupe = GetTextValue(xe);
                break;

            case "présentation du groupe":
                detail.PrésentationGroupe = GetTextValue(xe);
                break;

            case "site web du groupe":
                detail.SiteWebGroupe = GetTextValue(xe);
                break;

            case "adhésion à des réseaux du handicap":
                detail.AdhésionRéseauxHandicap = GetTextValue(xe);
                break;

            case "adhésion à des groupements et fédérations professionnels":
                detail.Groupes = GetTextValues(xe);
                break;

            case "expérience de co-traitance ou de gme avec":
                detail.Cotraitance = GetTextValue(xe);
                break;

            // NOS COORDONNEES (html)
            case "adresse principale":
                detail.AdressePrincipale = GetTextValue(xe);
                break;

            case "adresse du siège":
                detail.AdresseSiège = GetTextValue(xe);
                break;

            case "adresse des antennes":
                detail.AdresseAntennes = GetTextValue(xe);
                break;

            case "e-mail":
                detail.Email = GetEmail(GetTextValue(xe));
                break;

            case "tél":
                detail.Tel = GetTextValue(xe);
                break;

            case "fax":
                detail.Fax = GetTextValue(xe);
                break;

            case "code ape":
                detail.CodeApe = GetTextValue(xe);
                break;

            case "n° finess":
                detail.NumeroFiness = GetTextValue(xe);
                break;

            default:
                //company.unknowInfos.Add(valueName + " : " + value);
                ret = false;
                break;
            }
            return(ret);
        }
Пример #2
0
        private static Handeco_Detail GetData(WebResult webResult)
        {
            XXElement      xeSource = webResult.Http.zGetXDocument().zXXElement();
            Handeco_Detail data     = new Handeco_Detail();

            data.SourceUrl       = webResult.WebRequest.HttpRequest.Url;
            data.LoadFromWebDate = webResult.LoadFromWebDate;
            data.Id = GetKey(webResult.WebRequest.HttpRequest);

            //<div style="text-align: right; font-size: 10px;">
            //<em>Dernière mise à jour le 18-01-2013</em>
            //</div>
            string lastUpdate = Handeco.Trim(xeSource.XPathValue("//em[starts-with(text(), 'Dernière mise à jour')]/text()"));

            if (lastUpdate != null)
            {
                Match    match = __lastUpdateRegex.Match(lastUpdate);
                DateTime date;
                if (match.Success && DateTime.TryParseExact(match.Value, "dd-MM-yyyy", System.Globalization.CultureInfo.CurrentCulture, System.Globalization.DateTimeStyles.None, out date))
                {
                    data.DernièreMiseàjour = date;
                }
                else
                {
                    data.UnknowInfos.Add(lastUpdate);
                }
            }
            else
            {
                pb.Trace.WriteLine("error \"Dernière mise à jour\" not found");
            }

            // NOTRE OFFRE - activities - multiple
            //<select style="width: 200px; display: none;" onchange="change_activite(this.selectedIndex);" id="select_activites">
            //    <option>Sous-traitance industrielle - Autre</option>
            //    <option>Assemblage mécanique</option>
            //    <option>Energie renouvelable - Autre</option>
            //</select>
            string[] activityTypes = xeSource.XPathValues("//select[@id = 'select_activites']/option/text()").Select(Handeco.Trim).ToArray();

            // CONTACTS - multiple
            //<select style="width: 200px; display: none;" onchange="change_contact(this.selectedIndex);" id="select_contacts">
            //    <option>Jacky STEINLE (Chef d'atelier)</option>
            //</select>
            string[] contactDescriptions = xeSource.XPathValues("//select[@id = 'select_contacts']/option/text()").Select(Handeco.Trim).ToArray();

            int             indexActivityType       = 0;
            int             indexContactDescription = 0;
            List <Activity> activities = new List <Activity>();
            List <Contact>  contacts   = new List <Contact>();

            foreach (XXElement xxe in xeSource.XPathElements("//table[@class = 'fiche organisation']"))
            {
                //string id = xxe.XPathValue("@id").ToLower();
                string id = xxe.XPathValue("@id");
                if (id != null)
                {
                    id = id.ToLower();
                }

                if (__trace)
                {
                    pb.Trace.WriteLine("table id = \"{0}\"", id);
                }

                Activity activity = null;
                Contact  contact  = null;
                if (id != null && id.StartsWith("fiche_activite_"))
                {
                    activity = new Activity();
                    activities.Add(activity);
                    if (indexActivityType < activityTypes.Length)
                    {
                        activity.Type = activityTypes[indexActivityType++];
                    }
                    else
                    {
                        pb.Trace.WriteLine("warning miss an activity type in html (<select id='select_activites'>)");
                    }
                }
                else if (id != null && id.StartsWith("fiche_contact_"))
                {
                    contact = new Contact();
                    contacts.Add(contact);
                    if (indexContactDescription < contactDescriptions.Length)
                    {
                        contact.Description = contactDescriptions[indexContactDescription++];
                    }
                    else
                    {
                        pb.Trace.WriteLine("warning miss an activity type in html (<select id='select_contacts'>)");
                    }
                }

                foreach (XXElement xxe2 in xxe.XPathElements(".//tr"))
                {
                    string valueName = Handeco.Trim(xxe2.XPathValue(".//th//text()"));
                    //_currentElement = xxe2;
                    XXElement currentElement = xxe2;

                    if (valueName == null)
                    {
                        continue;
                    }

                    if (activity != null)
                    {
                        if (__trace)
                        {
                            pb.Trace.Write("activité ");
                        }
                        if (!SetActivityValue(activity, valueName, currentElement))
                        {
                            if (__trace)
                            {
                                pb.Trace.Write("error ");
                            }
                            data.UnknowInfos.Add("valeur activité inconnu : " + valueName + " = " + GetTextValue(currentElement));
                        }
                        else if (__trace)
                        {
                            pb.Trace.Write("      ");
                        }
                        if (__trace)
                        {
                            pb.Trace.WriteLine("\"{0}\" =  \"{1}\"", valueName, GetTextValue(currentElement));
                        }
                    }
                    else if (contact != null)
                    {
                        if (__trace)
                        {
                            pb.Trace.Write("contact  ");
                        }
                        if (!SetContactValue(contact, valueName, currentElement))
                        {
                            if (__trace)
                            {
                                pb.Trace.Write("error ");
                            }
                            data.UnknowInfos.Add("valeur contact inconnu : " + valueName + " = " + GetTextValue(currentElement));
                        }
                        else if (__trace)
                        {
                            pb.Trace.Write("      ");
                        }
                        if (__trace)
                        {
                            pb.Trace.WriteLine("\"{0}\" =  \"{1}\"", valueName, GetTextValue(currentElement));
                        }
                    }
                    else
                    {
                        if (__trace)
                        {
                            pb.Trace.Write("société  ");
                        }
                        if (!SetValue(data, valueName, currentElement))
                        {
                            if (__trace)
                            {
                                pb.Trace.Write("error ");
                            }
                            data.UnknowInfos.Add("valeur inconnu : " + valueName + " = " + GetTextValue(currentElement));
                        }
                        else if (__trace)
                        {
                            pb.Trace.Write("      ");
                        }
                        if (__trace)
                        {
                            pb.Trace.WriteLine("\"{0}\" =  \"{1}\"", valueName, GetTextValue(currentElement));
                        }
                    }
                }
            }
            data.Activités = activities.ToArray();
            data.Contacts  = contacts.ToArray();

            if (__trace)
            {
                pb.Trace.WriteLine(data.zToJson());
            }

            return(data);

            //XXElement xePost = xeSource.XPathElement("//table[@id='layout']//div[@id='content']//div[@class='post']");

            //XXElement xe = xePost.XPathElement(".//table[@id='post-head']");
            ////string[] dates = xe.DescendantTextList(".//td[@id='head-date']", func: Vosbooks.TrimFunc1).ToArray();
            //string[] dates = xe.XPathElement(".//td[@id='head-date']").DescendantTexts().Select(DownloadPrint.Trim).ToArray();
            //data.PostCreationDate = GetDate(dates, __lastPostDate);
            //if (data.PostCreationDate != null)
            //    __lastPostDate = new Date(data.PostCreationDate.Value);
            //if (__trace)
            //    pb.Trace.WriteLine("post creation date {0} - {1}", data.PostCreationDate, dates.zToStringValues());

            ////data.Title = xePost.XPathValue(".//div[@class='title']//a//text()", DownloadPrint.TrimFunc1);
            //data.Title = xePost.XPathValue(".//div[@class='title']//a//text()").zFunc(DownloadPrint.ReplaceChars).zFunc(DownloadPrint.Trim);
            //PrintTitleInfos titleInfos = DownloadPrint.PrintTextValuesManager.ExtractTitleInfos(data.Title);
            //if (titleInfos.foundInfo)
            //{
            //    data.OriginalTitle = data.Title;
            //    data.Title = titleInfos.title;
            //    data.Infos.SetValues(titleInfos.infos);
            //}

            //// Ebooks en Epub / Livre
            ////data.Category = xePost.DescendantTextList(".//div[@class='postdata']//span[@class='category']//a").Select(DownloadPrint.TrimFunc1).zToStringValues("/");
            //data.Category = xePost.XPathElements(".//div[@class='postdata']//span[@class='category']//a").DescendantTexts().Select(DownloadPrint.Trim).zToStringValues("/");
            //data.PrintType = GetPrintType(data.Category);
            ////pb.Trace.WriteLine("category \"{0}\" printType {1}", category, data.printType);

            //xe = xePost.XPathElement(".//div[@class='entry']");
            //data.Images = new WebImage[] { new WebImage(zurl.GetUrl(data.SourceUrl, xe.XPathValue("div[starts-with(@class, 'post-views')]/following-sibling::h3/following-sibling::p/img/@src"))) };

            //// force load image to get image width and height
            //if (webResult.WebRequest.LoadImage)
            //    data.Images = DownloadPrint.LoadImages(data.Images).ToArray();

            //// get infos, description, language, size, nbPages
            //// xe.DescendantTextList(".//p")
            //PrintTextValues textValues = DownloadPrint.PrintTextValuesManager.GetTextValues(
            //    xe.XPathElements(".//p").DescendantTexts(
            //    node =>
            //    {
            //        if (node is XText)
            //        {
            //            string text = ((XText)node).Value.Trim();
            //            //if (text.StartsWith("Lien Direct", StringComparison.InvariantCultureIgnoreCase))
            //            if (text.StartsWith("lien ", StringComparison.InvariantCultureIgnoreCase))
            //                return XNodeFilter.Stop;
            //        }
            //        if (node is XElement)
            //        {
            //            XElement xe2 = (XElement)node;
            //            if (xe2.Name == "p" && xe2.zAttribValue("class") == "submeta")
            //                return XNodeFilter.Stop;
            //        }
            //        return XNodeFilter.SelectNode;
            //    }
            //    ).Select(DownloadPrint.ReplaceChars).Select(DownloadPrint.TrimWithoutColon), data.Title);
            //data.Description = textValues.description;
            //data.Infos.SetValues(textValues.infos);

            //data.DownloadLinks = xe.DescendantNodes(
            //    node =>
            //    {
            //        if (!(node is XElement))
            //            return XNodeFilter.DontSelectNode;
            //        XElement xe2 = (XElement)node;
            //        if (xe2.Name == "a")
            //            return XNodeFilter.SelectNode;
            //        if (xe2.Name != "p")
            //            return XNodeFilter.DontSelectNode;
            //        XAttribute xa = xe2.Attribute("class");
            //        if (xa == null)
            //            return XNodeFilter.DontSelectNode;
            //        if (xa.Value != "submeta")
            //            return XNodeFilter.DontSelectNode;
            //        //return XNodeFilter.SkipNode;
            //        return XNodeFilter.Stop;
            //    })
            //    .Select(node => ((XElement)node).Attribute("href").Value).ToArray();
        }