public async Task Start() { const int max = 25; for (int i = 0; i < max; i++) { Console.WriteLine("loading pap fr page " + i + 1); var baseAddress = "https://www.pap.fr/annonce/propriete-en-vente-ile-de-france-g471-" + i; var config = Configuration.Default.WithDefaultLoader(); var context = BrowsingContext.New(config); var document = await context.OpenAsync(baseAddress); var list = document.QuerySelectorAll(".main-content .search-results-list .search-list-item"); foreach (var element in list) { var model = new GenericAnnouncementModel("pap.fr"); var imgSrc = element.QuerySelector(" div.col-left > a > img")?.Attributes["src"]?.Value; var url = element.QuerySelector(" div.col-left > a")?.Attributes["href"]?.Value; var photoCount = element.QuerySelector(" div.col-left > .item-photo-count")?.Text(); if (!url.StartsWith("https://")) { url = "https://www.pap.fr" + url; } var title = element.QuerySelector("div.col-right > a.item-title > span.h1")?.TextContent; var itemTags = element.QuerySelectorAll("div.col-right > a.item-title > ul > li")?.Select(li => li?.TextContent).ToArray(); var itemPrice = element.QuerySelector(".col-right .item-price")?.TextContent; var priceDescription = element.QuerySelector(".col-right .mensualite-prix")?.TextContent; var itemDescription = element.QuerySelector(".col-right > p.item-description")?.TextContent; var itemTransport = element.QuerySelector(".col-right > .item-transports")?.TextContent; var id = url?.Split('/')?.Last(); //id = id?.Replace("r", ""); if (string.IsNullOrEmpty(id)) { id = string.Empty; } if (id.Contains("?")) { id = id.Split('?').First(); } if (id.Contains("-")) { id = id.Split('-').Last(); } model.ID = id; model.Images = new List <string> { imgSrc }; model.LinkUrl = url; model.Title = title; model.Price = new List <string> { itemPrice }; model.Description = itemDescription; model.Extra = new { photoCount, itemTags, priceDescription, itemTransport }; AnnouncementParsed?.Invoke(model); } } }
public GenericAnnouncementModel ToGeneric() { var model = new GenericAnnouncementModel("leboncoin"); model.ID = this.list_id.ToString(); model.first_publication_date = first_publication_date; model.expiration_date = expiration_date; model.category_id = category_id; model.category_name = category_name; model.Title = subject; model.Description = body; model.LinkUrl = url; model.Owner = this.owner?.name; model.Price = this.price; model.SmallImages = new List <string> { this.images?.small_url }; model.ThumbImages = new List <string> { this.images?.thumb_url }; if (this.images?.urls_thumb != null) { model.ThumbImages.AddRange(this.images.urls_thumb); } model.LargeUrls = this.images?.urls_large?.ToList(); model.Latitude = this.location?.lat; model.Langitude = this.location?.lng; model.City = this.location?.city; model.Region = this.location?.region_name; model.Extra = new { index_date, status, ad_type, price_calendar, options, has_phone, attributes }; return(model); }
public GenericAnnouncementModel ParseAnnouncement(IElement apart) { GenericAnnouncementModel model = new GenericAnnouncementModel("superimmo"); model.first_publication_date = apart.QuerySelector(" section > div.media-left > div:nth-child(1) > b") ?.Attributes["data-created-at"]?.Value; model.ID = apart.QuerySelector(" section > div.media-left > div:nth-child(1) > b") ?.Attributes["data-listing-id"]?.Value; var NBR = apart.QuerySelector("section > div.media-left > div:nth-child(1) > span") ?.TextContent; model.Images = apart .QuerySelectorAll("section > div.media-left > div:nth-child(1) img") .Select(img => img?.Attributes["src"]?.Value) .Where(u => !string.IsNullOrEmpty(u)) .Distinct() .ToList(); model.SmallImages = new List <string> { apart.QuerySelector("section > div.media-left > div.media > div.media-left > img")?.Attributes["src"] ?.Value }; model.LinkText = apart.QuerySelector("section > div.media-left > div.media > div.media-body > small") ?.TextContent; model.Price = new List <string>() { null }; model.Price[0] = apart.QuerySelector("section > div.media-body > p > a > b.prix")?.TextContent; var PricePerSquare = apart.QuerySelector(" section > div.media-body > p > a > small")?.TextContent; model.Title = apart .QuerySelector( "section > div.media-body > p > a > b.titre") ?.TextContent; var Text1 = apart .QuerySelector( " section > div.media-body > p > a") ?.TextContent; var Text2 = apart .QuerySelector( "section > div.media-body > b") ?.TextContent; model.Description = apart.QuerySelector("section > div.media-body > div > p") ?.TextContent; model.Extra = new { Text1, Text2, PricePerSquare, NBR }; return(model); }
public GenericAnnouncementModel ParseAnnouncement(IElement item) { GenericAnnouncementModel model = new GenericAnnouncementModel("seloger"); model.LinkUrl = item.QuerySelector("div.c-pa-info > a")?.Attributes["href"]?.Value; if (string.IsNullOrEmpty(model.LinkUrl)) { model.LinkUrl = item .QuerySelector( "div.c-pa-pic div.slideContent > a") ?.Attributes["href"]?.Value; } model.LinkText = item.QuerySelector("div.c-pa-info > a")?.TextContent; var Parameters = item.QuerySelectorAll("div.c-pa-info > div.c-pa-criterion em")? .Select(i => i?.TextContent).ToArray(); model.Price = ((item .QuerySelectorAll("div.c-pa-info > div.c-pa-price span")? .Select(d => d.TextContent) .Select(d => d.Replace("|", ""))) ?? new string[0]) .Where(d => !string.IsNullOrEmpty(d)).ToList(); model.Title = item.QuerySelector("div.c-pa-info > div.c-pa-loan > a")?.TextContent; model.City = item.QuerySelector("div.c-pa-info > div.c-pa-city")?.TextContent; model.Images = item .QuerySelectorAll(" div.c-pa-pic > div.c-pa-visual >div.c-pa-imgs >div.slideContent > a>div")? .Select(div => div?.Attributes["data-lazy"]?.Value?.ParseJson()?.SelectToken("url").ToString()) .ToList(); var AgencyLogoUrl = item.QuerySelectorAll("div.c-pa-info > div.c-pa-agency>a>div")? .Select(div => div?.Attributes["data-lazy"]?.Value.ParseJson()?.SelectToken("url").ToString()) .FirstOrDefault(); var AgencyName = item.QuerySelectorAll("div.c-pa-info > div.c-pa-agency>a>div")? .Select(div => div?.Attributes["alt"]?.Value) .FirstOrDefault(); model.ID = item?.Attributes["data-listing-id"]?.Value; var PublicationId = item?.Attributes["data-publication-id"]?.Value; model.Extra = new { Parameters, AgencyLogoUrl, AgencyName, PublicationId }; return(model); }
public static void SaveAnnouncement(GenericAnnouncementModel model) { try { var query = model.GenerateQuery(); lock (Sync) { if (connection.State != ConnectionState.Open) { connection.Open(); } using (var cmd = new SqlCommand(query, connection)) { cmd.ExecuteNonQuery(); } } Logger.WriteLine("{1} {0} models processed \t\t\t", count++, model.HostSite); //Console.CursorLeft = 0; } catch (Exception e) { Logger.WriteLine(e.Message); } }