Пример #1
0
        public async Task StartAsync(string url)
        {
            ListAdvertisements listAdvertisements = new ListAdvertisements();

            do
            {
                if (listAdvertisements.UrlNextPage != default)
                {
                    url = listAdvertisements.UrlNextPage;
                }

                string html = await DownloadHtml(url);

                listAdvertisements = ParsingListAdvertisement(html);

                repository.AddListAdvertisement(listAdvertisements);
            } while (listAdvertisements.ExistNextPage);

            // Парсим отдельно каждое объявление
            var urls = repository.GetUnfinishedAdvertisementUrls();

            foreach (var item in urls)
            {
                string html = await DownloadHtml(item);

                Advertisement advertisement = await ParsingAdvertisementAsync(html);

                advertisement.Url = item;
                repository.AddAdvertisement(advertisement);
            }
        }
Пример #2
0
 public void AddListAdvertisement(ListAdvertisements listAdvertisements)
 {
     foreach (var item in listAdvertisements.Advertisements)
     {
         AddAdvertisement(item);
     }
 }
Пример #3
0
        public ListAdvertisements ParsingListAdvertisement(string html)
        {
            IHtmlDocument document = new HtmlParser().ParseDocument(html);
            var           rows     = document.QuerySelectorAll("div[data-name='OfferCard']").ToArray();

            ListAdvertisements Advertisements = new ListAdvertisements();

            foreach (var row in rows)
            {
                Owner         owner         = new Owner();
                House         house         = new House();
                Advertisement advertisement = new Advertisement();

                advertisement.Url = row.QuerySelector(".c6e8ba5398--header--1fV2A").GetAttribute("href");

                #region Парсим титул "3-комн. кв., 97,4 м², 1/16 этаж"
                var tempTitle = row.QuerySelector("div[data-name='Title']")?.TextContent;
                if (tempTitle == null)
                {
                    tempTitle = row.QuerySelector("div[data-name='TopTitle']")?.TextContent;
                }

                var tempTitleArray = tempTitle?.Split(", ");

                if (tempTitleArray != null)
                {
                    foreach (var item in tempTitleArray)
                    {
                        if (item.Contains("-комн. кв."))
                        {
                            advertisement.RoomCount = int.Parse(string.Join("", item.Where(c => char.IsDigit(c))));
                        }
                        if (item.Contains("м²"))
                        {
                            advertisement.TotalArea = double.Parse(string.Join("", item.Where(c => char.IsDigit(c) || c == ',')));
                        }
                        if (item.Contains("этаж"))
                        {
                            advertisement.Floor = int.Parse(item.Substring(0, item.IndexOf('/')));
                            house.FloorCount    = int.Parse(item.Substring(item.IndexOf('/') + 1, item.IndexOf(' ') - item.IndexOf('/') - 1));
                        }
                    }
                }
                #endregion

                advertisement.Price = decimal.Parse(string.Join("", row.QuerySelector(".c6e8ba5398--header--1df-X").TextContent.Where(c => char.IsDigit(c))));
                house.DistanceInfo  = row.QuerySelector(".c6e8ba5398--underground-container--1exfN")?.TextContent;

                #region Парсим адресс "Татарстан респ., Казань, р-н Советский, ул. Аделя Кутуя, 83"
                house.FullAdress = row.QuerySelector("div[data-name='AddressItem']")?.TextContent;
                var tempAdressArray = house.FullAdress?.Split(", ");

                //ToDo Надо пересмотреть, т.к. порядок часто разный
                house.Region      = tempAdressArray[0];
                house.City        = tempAdressArray[1];
                house.District    = tempAdressArray[2];
                house.Street      = tempAdressArray[3];
                house.NumberHouse = tempAdressArray[4];

                for (int i = 0; i < tempAdressArray.Length; i++)
                {
                    if (tempAdressArray[i].Contains("р-н"))
                    {
                        house.District = tempAdressArray[i];
                    }
                    if (tempAdressArray[i].Contains("мкр."))
                    {
                        house.MicroDistrict = tempAdressArray[i];
                    }
                    //ToDo рассмотреть другие варианты
                }
                #endregion

                advertisement.Description = row.QuerySelector("div[data-name='Description']")?.TextContent;

                owner.Name         = row.QuerySelector("div[data-name='AgentBrandMainInfoComponent']")?.TextContent;
                owner.PhoneNumbers = row.QuerySelector("div[data-name='PhoneButton']")?.TextContent;


                advertisement.DatePublishString = row.QuerySelector("div[data-name='TimeLabel'] .c6e8ba5398--absolute--9uFLj")?.TextContent;
                advertisement.DatePublish       = ParsDateTime(advertisement.DatePublishString);

                advertisement.DateUpdate = DateTime.Now;
                advertisement.FullParse  = false;
                advertisement.House      = house;
                advertisement.Owner      = owner;

                Advertisements.Advertisements.Add(advertisement);
            }

            var pages = document.QuerySelectorAll("div[data-name='Pagination'] ._93444fe79c--list-item--2KxXr");
            for (int i = 0; i < pages.Length - 1; i++)
            {
                if (!pages[i].OuterHtml.Contains("href"))
                {
                    Advertisements.ExistNextPage = true;
                    Advertisements.UrlNextPage   = pages[i + 1].QuerySelector("a").GetAttribute("href");
                    if (!Advertisements.UrlNextPage.StartsWith("https://kazan.cian.ru"))
                    {
                        Advertisements.UrlNextPage = "https://kazan.cian.ru" + Advertisements.UrlNextPage;
                    }
                    break;
                }
            }

            return(Advertisements);
        }