// header get data, from WebHeaderDetailMongoManagerBase_v2<THeaderData, TDetailData> protected override IEnumDataPages <Handeco_Header_v2> GetHeaderPageData(HttpResult <string> httpResult) { XXElement xeSource = httpResult.zGetXDocument().zXXElement(); string url = httpResult.Http.HttpRequest.Url; Handeco_HeaderDataPages data = new Handeco_HeaderDataPages(); data.SourceUrl = url; data.LoadFromWebDate = httpResult.Http.RequestTime; data.Id = GetPageKey(httpResult.Http.HttpRequest); // <div class="paginationControl"> // page n : <a href="/fournisseurs/rechercher/page/2#resultats">></a> | // last page : <span class="disabled">></span> | data.UrlNextPage = zurl.RemoveFragment(zurl.GetUrl(url, xeSource.XPathValue("//div[@class='paginationControl']//*[position()=last()-1]/@href"))); IEnumerable <XXElement> xeHeaders = xeSource.XPathElements("//table//tr[position() > 1]"); List <Handeco_Header_v2> headers = new List <Handeco_Header_v2>(); foreach (XXElement xeHeader in xeHeaders) { Handeco_Header_v2 header = new Handeco_Header_v2(); header.SourceUrl = url; header.LoadFromWebDate = DateTime.Now; header.Name = Handeco.Trim(xeHeader.XPathValue(".//td[1]//text()")); header.UrlDetail = zurl.RemoveFragment(zurl.GetUrl(url, xeHeader.XPathValue(".//td[1]//a/@href"))); //header.Siret = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Type = Handeco.Trim(xeHeader.XPathValue(".//td[2]//text()")); header.Groupes = xeHeader.XPathValues(".//td[3]//text()").Select(Handeco.Trim).ToArray(); header.Activités = xeHeader.XPathValues(".//td[4]//text()").Select(Handeco.Trim).ToArray(); header.PostalCode = Handeco.Trim(xeHeader.XPathValue(".//td[5]//text()")); headers.Add(header); } data.Data = headers.ToArray(); return(data); }
private static string[] GetTextValues(XXElement xe) { return(xe.XPathValues(".//td//text()").Select(s => Handeco.Trim(_badCharacters.Replace(s, " "))).ToArray());; }
protected void _GetDetailData(XXElement xeSource, Handeco_Detail_v2 data) { //<div style="text-align: right; font-size: 10px;"> //<em>Dernière mise à jour le 18-01-2013</em> //</div> string lastUpdate = Handeco.Trim(xeSource.XPathValue("//em[starts-with(text(), 'Dernière mise à jour')]/text()")); if (lastUpdate != null) { Match match = _lastUpdateRegex.Match(lastUpdate); DateTime date; if (match.Success && DateTime.TryParseExact(match.Value, "dd-MM-yyyy", System.Globalization.CultureInfo.CurrentCulture, System.Globalization.DateTimeStyles.None, out date)) { data.DernièreMiseàjour = date; } else { data.UnknowInfos.Add(lastUpdate); } } else { pb.Trace.WriteLine("error \"Dernière mise à jour\" not found"); } // NOTRE OFFRE - activities - multiple //<select style="width: 200px; display: none;" onchange="change_activite(this.selectedIndex);" id="select_activites"> // <option>Sous-traitance industrielle - Autre</option> // <option>Assemblage mécanique</option> // <option>Energie renouvelable - Autre</option> //</select> string[] activityTypes = xeSource.XPathValues("//select[@id = 'select_activites']/option/text()").Select(Handeco.Trim).ToArray(); // CONTACTS - multiple //<select style="width: 200px; display: none;" onchange="change_contact(this.selectedIndex);" id="select_contacts"> // <option>Jacky STEINLE (Chef d'atelier)</option> //</select> string[] contactDescriptions = xeSource.XPathValues("//select[@id = 'select_contacts']/option/text()").Select(Handeco.Trim).ToArray(); int indexActivityType = 0; int indexContactDescription = 0; List <Activity> activities = new List <Activity>(); List <Contact> contacts = new List <Contact>(); foreach (XXElement xxe in xeSource.XPathElements("//table[@class = 'fiche organisation']")) { //string id = xxe.XPathValue("@id").ToLower(); string id = xxe.XPathValue("@id"); if (id != null) { id = id.ToLower(); } //if (__trace) // pb.Trace.WriteLine("table id = \"{0}\"", id); Activity activity = null; Contact contact = null; if (id != null && id.StartsWith("fiche_activite_")) { activity = new Activity(); activities.Add(activity); if (indexActivityType < activityTypes.Length) { activity.Type = activityTypes[indexActivityType++]; } else { pb.Trace.WriteLine("warning miss an activity type in html (<select id='select_activites'>)"); } } else if (id != null && id.StartsWith("fiche_contact_")) { contact = new Contact(); contacts.Add(contact); if (indexContactDescription < contactDescriptions.Length) { contact.Description = contactDescriptions[indexContactDescription++]; } else { pb.Trace.WriteLine("warning miss an activity type in html (<select id='select_contacts'>)"); } } foreach (XXElement xxe2 in xxe.XPathElements(".//tr")) { string valueName = Handeco.Trim(xxe2.XPathValue(".//th//text()")); //_currentElement = xxe2; XXElement currentElement = xxe2; if (valueName == null) { continue; } if (activity != null) { //if (__trace) // pb.Trace.Write("activité "); if (!SetActivityValue(activity, valueName, currentElement)) { //if (__trace) // pb.Trace.Write("error "); data.UnknowInfos.Add("valeur activité inconnu : " + valueName + " = " + GetTextValue(currentElement)); } //else if (__trace) // pb.Trace.Write(" "); //if (__trace) // pb.Trace.WriteLine("\"{0}\" = \"{1}\"", valueName, GetTextValue(currentElement)); } else if (contact != null) { //if (__trace) // pb.Trace.Write("contact "); if (!SetContactValue(contact, valueName, currentElement)) { //if (__trace) // pb.Trace.Write("error "); data.UnknowInfos.Add("valeur contact inconnu : " + valueName + " = " + GetTextValue(currentElement)); } //else if (__trace) // pb.Trace.Write(" "); //if (__trace) // pb.Trace.WriteLine("\"{0}\" = \"{1}\"", valueName, GetTextValue(currentElement)); } else { //if (__trace) // pb.Trace.Write("société "); if (!SetValue(data, valueName, currentElement)) { //if (__trace) // pb.Trace.Write("error "); data.UnknowInfos.Add("valeur inconnu : " + valueName + " = " + GetTextValue(currentElement)); } //else if (__trace) // pb.Trace.Write(" "); //if (__trace) // pb.Trace.WriteLine("\"{0}\" = \"{1}\"", valueName, GetTextValue(currentElement)); } } } data.Activités = activities.ToArray(); data.Contacts = contacts.ToArray(); }