示例#1
0
        public async Task Start()
        {
            const int max = 25;

            for (int i = 0; i < max; i++)
            {
                Console.WriteLine("loading pap fr page " + i + 1);
                var baseAddress = "https://www.pap.fr/annonce/propriete-en-vente-ile-de-france-g471-" + i;
                var config      = Configuration.Default.WithDefaultLoader();
                var context     = BrowsingContext.New(config);
                var document    = await context.OpenAsync(baseAddress);

                var list = document.QuerySelectorAll(".main-content .search-results-list .search-list-item");
                foreach (var element in list)
                {
                    var model      = new GenericAnnouncementModel("pap.fr");
                    var imgSrc     = element.QuerySelector(" div.col-left > a > img")?.Attributes["src"]?.Value;
                    var url        = element.QuerySelector(" div.col-left > a")?.Attributes["href"]?.Value;
                    var photoCount = element.QuerySelector(" div.col-left > .item-photo-count")?.Text();
                    if (!url.StartsWith("https://"))
                    {
                        url = "https://www.pap.fr" + url;
                    }
                    var title            = element.QuerySelector("div.col-right > a.item-title > span.h1")?.TextContent;
                    var itemTags         = element.QuerySelectorAll("div.col-right > a.item-title > ul > li")?.Select(li => li?.TextContent).ToArray();
                    var itemPrice        = element.QuerySelector(".col-right .item-price")?.TextContent;
                    var priceDescription = element.QuerySelector(".col-right .mensualite-prix")?.TextContent;
                    var itemDescription  = element.QuerySelector(".col-right > p.item-description")?.TextContent;
                    var itemTransport    = element.QuerySelector(".col-right > .item-transports")?.TextContent;
                    var id = url?.Split('/')?.Last();
                    //id = id?.Replace("r", "");
                    if (string.IsNullOrEmpty(id))
                    {
                        id = string.Empty;
                    }
                    if (id.Contains("?"))
                    {
                        id = id.Split('?').First();
                    }
                    if (id.Contains("-"))
                    {
                        id = id.Split('-').Last();
                    }

                    model.ID     = id;
                    model.Images = new List <string> {
                        imgSrc
                    };
                    model.LinkUrl = url;
                    model.Title   = title;
                    model.Price   = new List <string> {
                        itemPrice
                    };
                    model.Description = itemDescription;
                    model.Extra       = new { photoCount, itemTags, priceDescription, itemTransport };

                    AnnouncementParsed?.Invoke(model);
                }
            }
        }
示例#2
0
        public GenericAnnouncementModel ToGeneric()
        {
            var model = new GenericAnnouncementModel("leboncoin");

            model.ID = this.list_id.ToString();
            model.first_publication_date = first_publication_date;
            model.expiration_date        = expiration_date;
            model.category_id            = category_id;
            model.category_name          = category_name;
            model.Title       = subject;
            model.Description = body;
            model.LinkUrl     = url;
            model.Owner       = this.owner?.name;
            model.Price       = this.price;
            model.SmallImages = new List <string> {
                this.images?.small_url
            };
            model.ThumbImages = new List <string> {
                this.images?.thumb_url
            };
            if (this.images?.urls_thumb != null)
            {
                model.ThumbImages.AddRange(this.images.urls_thumb);
            }

            model.LargeUrls = this.images?.urls_large?.ToList();
            model.Latitude  = this.location?.lat;
            model.Langitude = this.location?.lng;
            model.City      = this.location?.city;
            model.Region    = this.location?.region_name;
            model.Extra     = new { index_date, status, ad_type, price_calendar, options, has_phone, attributes };
            return(model);
        }
示例#3
0
        public GenericAnnouncementModel ParseAnnouncement(IElement apart)
        {
            GenericAnnouncementModel model = new GenericAnnouncementModel("superimmo");

            model.first_publication_date = apart.QuerySelector(" section > div.media-left > div:nth-child(1) > b")
                                           ?.Attributes["data-created-at"]?.Value;

            model.ID = apart.QuerySelector(" section > div.media-left > div:nth-child(1) > b")
                       ?.Attributes["data-listing-id"]?.Value;

            var NBR = apart.QuerySelector("section > div.media-left > div:nth-child(1) > span")
                      ?.TextContent;

            model.Images = apart
                           .QuerySelectorAll("section > div.media-left > div:nth-child(1) img")
                           .Select(img => img?.Attributes["src"]?.Value)
                           .Where(u => !string.IsNullOrEmpty(u))
                           .Distinct()
                           .ToList();

            model.SmallImages = new List <string>
            {
                apart.QuerySelector("section > div.media-left > div.media > div.media-left > img")?.Attributes["src"]
                ?.Value
            };

            model.LinkText = apart.QuerySelector("section > div.media-left > div.media > div.media-body > small")
                             ?.TextContent;
            model.Price = new List <string>()
            {
                null
            };
            model.Price[0] = apart.QuerySelector("section > div.media-body > p > a > b.prix")?.TextContent;
            var PricePerSquare = apart.QuerySelector(" section > div.media-body > p > a > small")?.TextContent;

            model.Title = apart
                          .QuerySelector(
                "section > div.media-body > p > a > b.titre")
                          ?.TextContent;

            var Text1 = apart
                        .QuerySelector(
                " section > div.media-body > p > a")
                        ?.TextContent;

            var Text2 = apart
                        .QuerySelector(
                "section > div.media-body > b")
                        ?.TextContent;

            model.Description = apart.QuerySelector("section > div.media-body > div > p")
                                ?.TextContent;

            model.Extra = new { Text1, Text2, PricePerSquare, NBR };
            return(model);
        }
        public GenericAnnouncementModel ParseAnnouncement(IElement item)
        {
            GenericAnnouncementModel model = new GenericAnnouncementModel("seloger");

            model.LinkUrl = item.QuerySelector("div.c-pa-info > a")?.Attributes["href"]?.Value;
            if (string.IsNullOrEmpty(model.LinkUrl))
            {
                model.LinkUrl = item
                                .QuerySelector(
                    "div.c-pa-pic  div.slideContent > a")
                                ?.Attributes["href"]?.Value;
            }
            model.LinkText = item.QuerySelector("div.c-pa-info > a")?.TextContent;
            var Parameters = item.QuerySelectorAll("div.c-pa-info > div.c-pa-criterion em")?
                             .Select(i => i?.TextContent).ToArray();

            model.Price = ((item
                            .QuerySelectorAll("div.c-pa-info > div.c-pa-price span")?
                            .Select(d => d.TextContent)
                            .Select(d => d.Replace("|", ""))) ?? new string[0])
                          .Where(d => !string.IsNullOrEmpty(d)).ToList();

            model.Title  = item.QuerySelector("div.c-pa-info > div.c-pa-loan > a")?.TextContent;
            model.City   = item.QuerySelector("div.c-pa-info > div.c-pa-city")?.TextContent;
            model.Images = item
                           .QuerySelectorAll(" div.c-pa-pic > div.c-pa-visual >div.c-pa-imgs >div.slideContent > a>div")?
                           .Select(div => div?.Attributes["data-lazy"]?.Value?.ParseJson()?.SelectToken("url").ToString())
                           .ToList();

            var AgencyLogoUrl = item.QuerySelectorAll("div.c-pa-info > div.c-pa-agency>a>div")?
                                .Select(div =>
                                        div?.Attributes["data-lazy"]?.Value.ParseJson()?.SelectToken("url").ToString())
                                .FirstOrDefault();
            var AgencyName = item.QuerySelectorAll("div.c-pa-info > div.c-pa-agency>a>div")?
                             .Select(div =>
                                     div?.Attributes["alt"]?.Value)
                             .FirstOrDefault();

            model.ID = item?.Attributes["data-listing-id"]?.Value;
            var PublicationId = item?.Attributes["data-publication-id"]?.Value;

            model.Extra = new { Parameters, AgencyLogoUrl, AgencyName, PublicationId };
            return(model);
        }
示例#5
0
 public static void SaveAnnouncement(GenericAnnouncementModel model)
 {
     try
     {
         var query = model.GenerateQuery();
         lock (Sync)
         {
             if (connection.State != ConnectionState.Open)
             {
                 connection.Open();
             }
             using (var cmd = new SqlCommand(query, connection))
             {
                 cmd.ExecuteNonQuery();
             }
         }
         Logger.WriteLine("{1}  {0} models processed \t\t\t", count++, model.HostSite);
         //Console.CursorLeft = 0;
     }
     catch (Exception e)
     {
         Logger.WriteLine(e.Message);
     }
 }