public void ProcessUpdatePriceUrl(Portal portal, PostalCode postalCode, PropertyType propertyType, List <UrlType> urlTypes, ProcessPostalCode processPostalCode)
        {
            var _appContext = new PropertyCrawler.Data.AppContext(true);

            var type    = propertyType == PropertyType.Sell ? "SALE" : "RENT";
            var urlType = propertyType == PropertyType.Sell ? "/property-for-sale" : "/property-to-rent";

            //try
            //{
            using (var client = new HttpClient(/*handler: httpClientHandler*/))
            {
                client.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
                client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
                client.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.9,it;q=0.8,sq;q=0.7");
                var page        = 0;
                var queryString = $"/find.html?searchType={type}&locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&index={24 * page}";

                var          pagesHtml     = client.GetStringAsync(portal.Url + urlType + queryString).Result;
                HtmlDocument pagesDocument = new HtmlDocument();
                pagesDocument.LoadHtml(pagesHtml);

                var allUrlCrawlerModels = new List <UrlCrawlerModel>();

                //a// class// propertyCard-priceLink
                var listOfUrlProperties = new PropertyUrlDetails();

                var jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}";

                var jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString);
                listOfUrlProperties.properties.AddRange(jsonObj.properties);

                //var propertiesCards = pagesDocument.DocumentNode.SelectNodes(".//a[@class=\"propertyCard-priceLink\"]");

                var totalcount = int.Parse(pagesDocument.DocumentNode.SelectSingleNode(".//span[@class=\"searchHeader-resultCount\"]").InnerText.Replace(",", ""));

                var pages = (totalcount / 24) + ((totalcount % 24) == 0 ? 0 : 1);

                for (int i = 1; i < pages; i++)
                {
                    try
                    {
                        queryString = $"/find.html?searchType={type}&locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&index={24 * i}&includeSSTC=false";

                        pagesHtml     = client.GetStringAsync(portal.Url + queryString).Result;
                        pagesDocument = new HtmlDocument();
                        pagesDocument.LoadHtml(pagesHtml);

                        jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}";

                        jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString);

                        listOfUrlProperties.properties.AddRange(jsonObj.properties);

                        //var pageNodes= pagesDocument.DocumentNode.SelectNodes(".//a[@class=\"propertyCard-link\"]");

                        //foreach (var item in pageNodes)
                        //{
                        //    propertiesCards.Add(item);
                        //}
                    }
                    catch (Exception ex)
                    {
                        break;
                    }
                }

                foreach (var prop in listOfUrlProperties.properties)
                {
                    //var propertyCode = int.Parse(!string.IsNullOrEmpty(prop.Attributes["href"].Value?.Trim()) ? string.Join("", Regex.Matches(prop.Attributes["href"].Value?.Trim(), @"[0-9]").Select(a => a.Value)) : "0");

                    //decimal priceProperty = 0;
                    //if (decimal.TryParse(prop.SelectSingleNode(".//div[@class=\"propertyCard-priceValue\"]").InnerText.Replace("£", "")?.Trim(), out decimal temp))
                    //{
                    //    priceProperty = temp;
                    //}

                    //var priceQualifier = prop.SelectSingleNode(".//div[@class=\"propertyCard-priceQualifier\"]").InnerText?.Trim();


                    var urlCrawlerModel = new UrlCrawlerModel
                    {
                        Price          = prop.price.amount,
                        PriceQualifier = prop.price.displayPrices[0].displayPriceQualifier,
                        PropertyCode   = prop.id,
                        UrlTypeId      = urlTypes.FirstOrDefault(a => prop.propertyUrl.Trim().Contains(a.UrlPortion))?.Id ?? null
                    };
                    allUrlCrawlerModels.Add(urlCrawlerModel);
                }
                var pricePropertyList = new List <PropertyPrice>();
                foreach (var item in allUrlCrawlerModels)
                {
                    var existUrlProperty = (from url in _appContext.Urls
                                            join prop in _appContext.Properties on url.Id equals prop.UrlId
                                            where url.PropertyCode == item.PropertyCode
                                            select prop.Id)?.FirstOrDefault();
                    if (existUrlProperty == null)
                    {
                        var existUrl = _appContext.Urls.FirstOrDefault(x => x.Active && x.PropertyCode == item.PropertyCode);
                        if (existUrl == null)
                        {
                            ProcessProperty(portal, existUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode);
                        }
                        else
                        {
                            var insertUrl = new Url
                            {
                                PropertyCode = item.PropertyCode,
                                Type         = (int)propertyType,
                                PortalId     = portal.Id,
                                DateModified = DateTime.Now,
                                DateAdded    = DateTime.Now,
                                Active       = true,
                                PostalCodeId = postalCode.Id,
                                UrlTypeId    = item.UrlTypeId
                            };

                            _appContext.Urls.Add(insertUrl);
                            _appContext.SaveChanges();

                            ProcessProperty(portal, insertUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode);
                        }
                    }
                    else
                    {
                        var priceProperty = new PropertyPrice
                        {
                            PropertyId     = existUrlProperty.Value,
                            Price          = item.Price,
                            PriceQualifier = item.PriceQualifier,
                            DateModified   = DateTime.Now,
                            DateAdded      = DateTime.Now,
                            Active         = true
                        };
                        pricePropertyList.Add(priceProperty);
                    }
                }
                _appContext.PropertyPrices.AddRange(pricePropertyList);
                _appContext.SaveChanges();
            }
            //UpdateProcess(processPostalCode, ProcessStatus.Success);
            //}
            //catch (Exception ex)
            //{
            //    UpdateProcess(processPostalCode, ProcessStatus.Failed);
            //}
        }
Пример #2
0
        public void ProcessUpdatePriceUrl(Portal portal, PostalCode postalCode, PropertyType propertyType, List <UrlType> urlTypes, ProcessPostalCode processPostalCode, ProxyIp proxyIp, ProcessType processType)
        {
            var _appContext = new PropertyCrawler.Data.AppContext(true);

            var type    = propertyType == PropertyType.Sell ? "SALE" : "RENT";
            var urlType = propertyType == PropertyType.Sell ? "/property-for-sale" : "/property-to-rent";

            var currentUrls             = _appContext.Urls.Where(x => x.Type == (int)propertyType && x.PostalCodeId == postalCode.Id).ToList();
            var currentUrlIds           = currentUrls.Select(x => x.Id).OrderBy(x => x).ToList();
            var currentUrlPropertyCodes = currentUrls.Select(x => x.PropertyCode).OrderBy(x => x).ToList();
            var currentProperties       = _appContext.Properties.Where(x => x.UrlId.HasValue && currentUrlIds.Contains(x.UrlId.Value)).ToList();

            var proxy = new WebProxy()
            {
                Address               = String.IsNullOrWhiteSpace(proxyIp.Port) ? new Uri($"http://{proxyIp.Ip}"): new Uri($"http://{proxyIp.Ip}:{proxyIp.Port}"),
                BypassProxyOnLocal    = false,
                UseDefaultCredentials = false,

                // *** These creds are given to the proxy server, not the web server ***
                Credentials = new NetworkCredential(
                    userName: proxyIp.Username,
                    password: proxyIp.Password)
            };

            // Now create a client handler which uses that proxy

            var httpClientHandler = new HttpClientHandler()
            {
                Proxy = proxy,
            };

// Omit this part if you don't need to authenticate with the web server:
            //if (needServerAuthentication)
            //{
            //    httpClientHandler.PreAuthenticate = true;
            //    httpClientHandler.UseDefaultCredentials = false;

            //    // *** These creds are given to the web server, not the proxy server ***
            //    httpClientHandler.Credentials = new NetworkCredential(
            //        userName: serverUserName,
            //        password: serverPassword);
            // }


            //try
            //{
            using (var client = new HttpClient(handler: httpClientHandler, disposeHandler: true))
            {
                client.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
                client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
                client.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.9,it;q=0.8,sq;q=0.7");
                var page        = 0;
                var queryString = $"/find.html?locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&sortType=10&propertyTypes=&includeLetAgreed=false&mustHave=&dontShow=&furnishTypes=&keywords=";

                if (processType.ToString() == "LastWeek")
                {
                    queryString += "&maxDaysSinceAdded=7";
                }
                else if (processType.ToString() == "LastTwoWeeks")
                {
                    queryString += "&maxDaysSinceAdded=14";
                }

                var          pagesHtml     = client.GetStringAsync(portal.Url + urlType + queryString).Result;
                HtmlDocument pagesDocument = new HtmlDocument();
                pagesDocument.LoadHtml(pagesHtml);

                var allUrlCrawlerModels = new List <UrlCrawlerModel>();

                //a// class// propertyCard-priceLink
                var listOfUrlProperties = new PropertyUrlDetails();

                var jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}";

                var jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString);
                listOfUrlProperties.properties.AddRange(jsonObj.properties);

                var totalcount = int.Parse(pagesDocument.DocumentNode.SelectSingleNode(".//span[@class=\"searchHeader-resultCount\"]").InnerText.Replace(",", ""));

                var pages = (totalcount / 24) + ((totalcount % 24) == 0 ? 0 : 1);

                for (int i = 1; i < pages; i++)
                {
                    try
                    {
                        queryString = $"/find.html?locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&sortType=10&index={24 * i}&propertyTypes=&includeLetAgreed=false&mustHave=&dontShow=&furnishTypes=&keywords=";

                        pagesHtml     = client.GetStringAsync(portal.Url + urlType + queryString).Result;
                        pagesDocument = new HtmlDocument();
                        pagesDocument.LoadHtml(pagesHtml);

                        jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}";

                        jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString);

                        listOfUrlProperties.properties.AddRange(jsonObj.properties);
                    }
                    catch (Exception ex)
                    {
                        break;
                    }
                }

                foreach (var prop in listOfUrlProperties.properties)
                {
                    var urlCrawlerModel = new UrlCrawlerModel
                    {
                        Price          = prop.price.amount,
                        PriceQualifier = prop.price.displayPrices[0].displayPriceQualifier,
                        PropertyCode   = prop.id,
                        UrlTypeId      = urlTypes.FirstOrDefault(a => prop.propertyUrl.Trim().Contains(a.UrlPortion))?.Id ?? null
                    };
                    allUrlCrawlerModels.Add(urlCrawlerModel);
                }
                allUrlCrawlerModels = allUrlCrawlerModels.Distinct().ToList();
                var pricePropertyList = new List <PropertyPrice>();



                //Find urls that don't exist any more at page

                //Update list with property codes
                var propertyCodesList = allUrlCrawlerModels.Select(x => x.PropertyCode).OrderBy(x => x).ToList();

                //Property Urls that already exist into database
                var currentExistentUrls   = currentUrls.Where(x => x.Active && propertyCodesList.Contains(x.PropertyCode)).ToList();
                var currentExistenturlsId = currentExistentUrls.Select(x => x.Id);

                var toBeLogicalDeleteIds = currentUrlIds.Except(currentExistenturlsId);

                //From current property url list  get what does not exist any more and updated into active false

                var toBeLogicDeletedUrls = currentUrls.Where(x => toBeLogicalDeleteIds.Contains(x.Id));   ///currentUrls.Except(currentExistentUrls).ToList();



                //logic delete base on propertycodes into Url and Properties table
                var dateNow = DateTime.UtcNow;
                foreach (var url in toBeLogicDeletedUrls)
                {
                    var urlToBeUpdated = url;
                    urlToBeUpdated.Active       = false;
                    urlToBeUpdated.DateModified = dateNow;
                    var propertyToBeUpdated = currentProperties.FirstOrDefault(x => x.UrlId.HasValue && x.UrlId == url.Id);
                    if (propertyToBeUpdated != null)
                    {
                        propertyToBeUpdated.DateModified = dateNow;
                        propertyToBeUpdated.Active       = false;
                        _appContext.Properties.Update(propertyToBeUpdated);
                    }
                    _appContext.Urls.Update(urlToBeUpdated);
                }

                _appContext.SaveChanges();
                //Existent



                foreach (var item in allUrlCrawlerModels)
                {
                    var existUrl = currentUrls.FirstOrDefault(x => x.PropertyCode == item.PropertyCode);
                    PropertyCrawler.Data.Property existProperty = null;
                    if (existUrl != null)
                    {
                        existProperty = currentProperties.FirstOrDefault(x => x.UrlId == existUrl.Id);
                    }

                    //Properties does not exists
                    //Insert into Url Table, Start the process Properties
                    if (existUrl == null)
                    {
                        var insertUrl = new Url
                        {
                            PropertyCode = item.PropertyCode,
                            Type         = (int)propertyType,
                            PortalId     = portal.Id,
                            DateModified = DateTime.Now,
                            DateAdded    = DateTime.Now,
                            Active       = true,
                            PostalCodeId = postalCode.Id,
                            UrlTypeId    = item.UrlTypeId
                        };
                        if (existProperty != null)
                        {
                            var property = existProperty;
                            property.Price = item.Price;
                            _appContext.Properties.Update(property);
                        }

                        _appContext.Urls.Add(insertUrl);
                        _appContext.SaveChanges();

                        if (existProperty == null)
                        {
                            ProcessProperty(portal, insertUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode, proxyIp);
                        }
                    }
                    //else if (existUrlProperty.prop == null)
                    //{
                    //    ProcessProperty(portal, existUrlProperty.url, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode);
                    //}
                    //Property exist
                    //insert into priceProperty table
                    //Update Properties last Price
                    else
                    {
                        if (existProperty == null)
                        {
                            if (existUrl != null)
                            {
                                var updUrl = existUrl;
                                updUrl.Active = true;
                                _appContext.Urls.Update(updUrl);
                                _appContext.SaveChanges();
                                ProcessProperty(portal, updUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode, proxyIp);
                            }
                            else
                            {
                                var insertUrl = new Url
                                {
                                    PropertyCode = item.PropertyCode,
                                    Type         = (int)propertyType,
                                    PortalId     = portal.Id,
                                    DateModified = DateTime.Now,
                                    DateAdded    = DateTime.Now,
                                    Active       = true,
                                    PostalCodeId = postalCode.Id,
                                    UrlTypeId    = item.UrlTypeId
                                };
                                _appContext.Urls.Add(insertUrl);
                                _appContext.SaveChanges();
                                ProcessProperty(portal, insertUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode, proxyIp);
                            }
                        }
                        else
                        {
                            var priceProperty = new PropertyPrice
                            {
                                PropertyId     = existProperty.Id,
                                Price          = item.Price,
                                PriceQualifier = item.PriceQualifier,
                                DateModified   = DateTime.Now,
                                DateAdded      = DateTime.Now,
                                Active         = true
                            };

                            var updUrl = existUrl;
                            updUrl.Active = true;
                            _appContext.Urls.Update(updUrl);

                            var property = existProperty;
                            property.Price        = item.Price;
                            property.Active       = true;
                            property.DateModified = DateTime.UtcNow;

                            _appContext.PropertyPrices.Add(priceProperty);
                            _appContext.Properties.Update(property);
                            _appContext.SaveChanges();
                            //pricePropertyList.Add(priceProperty);
                        }
                    }
                }
                // _appContext.SaveChanges();
            }

            //}
            //catch (Exception ex)
            //{

            //}
        }
        public void ProcessUrl(Portal portal, PostalCode postalCode, PropertyType propertyType, ProcessPostalCode processPostalCode)
        {
            var _appContext = new PropertyCrawler.Data.AppContext(true);
            var type        = propertyType == PropertyType.Sell ? "SALE" : "RENT";

            using (var client = new HttpClient(/*handler: httpClientHandler*/))
            {
                client.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
                client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36");
                client.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.9,it;q=0.8,sq;q=0.7");
                var          page          = 0;
                var          queryString   = $"find.html?searchType={type}&locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&index={24 * page}";
                var          links         = client.GetStringAsync(portal.Url + queryString).Result;
                HtmlDocument documentlinks = new HtmlDocument();
                documentlinks.LoadHtml(links);

                var allUrlCrawlerModels = new List <UrlCrawlerModel>();

                //a// class// propertyCard-priceLink
                var propertiesCards = documentlinks.DocumentNode.SelectNodes(".//a[@class=\"propertyCard-priceLink\"]");



                var totalcount = int.Parse(documentlinks.DocumentNode.SelectSingleNode(".//span[@class=\"searchHeader-resultCount\"]").InnerText.Replace(",", ""));

                var pages = (totalcount / 24) + ((totalcount % 24) == 0 ? 0 : 1);

                for (int i = 1; i < pages; i++)
                {
                    try
                    {
                        queryString = $"find.html?searchType={type}&locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&index={24 * i}&includeSSTC=false";

                        links         = client.GetStringAsync(portal.Url + queryString).Result;
                        documentlinks = new HtmlDocument();
                        documentlinks.LoadHtml(links);

                        var pageNodes = documentlinks.DocumentNode.SelectNodes(".//a[@class=\"propertyCard-link\"]");

                        foreach (var item in pageNodes)
                        {
                            propertiesCards.Add(item);
                        }
                    }
                    catch (Exception ex)
                    {
                        break;
                    }
                }


                foreach (var prop in propertiesCards)
                {
                    var propertyCode = int.Parse(!string.IsNullOrEmpty(prop.Attributes["href"].Value?.Trim()) ? string.Join("", Regex.Matches(prop.Attributes["href"].Value?.Trim(), @"[0-9]").Select(a => a.Value)) : "0");

                    //decimal priceProperty = 0;
                    //if (decimal.TryParse(prop.SelectSingleNode(".//div[@class=\"propertyCard-priceValue\"]").InnerText.Replace("£", "")?.Trim(), out decimal temp))
                    //{
                    //    priceProperty = temp;
                    //}

                    //var priceQualifier = prop.SelectSingleNode(".//div[@class=\"propertyCard-priceQualifier\"]").InnerText?.Trim();


                    var urlCrawlerModel = new UrlCrawlerModel
                    {
                        //Price = priceProperty,
                        //PriceQualifier = priceQualifier,
                        PropertyCode = propertyCode
                                       //,
                                       //UrlTypeId = urlTypes.FirstOrDefault(a => prop.Attributes["href"].Value.Trim().Contains(a.UrlPortion))?.Id ?? null
                    };
                    allUrlCrawlerModels.Add(urlCrawlerModel);
                }

                var list = allUrlCrawlerModels.Select(x => x.PropertyCode).ToList();

                var existing = _appContext.Urls.Where(x => list.Contains(x.PropertyCode)).Select(x => x.PropertyCode);

                list = list.Where(x => !existing.Contains(x)).ToList();



                var urls = list.Select(x =>
                                       new Url
                {
                    PropertyCode = x,
                    Type         = (int)propertyType,
                    PortalId     = portal.Id,
                    DateModified = DateTime.Now,
                    DateAdded    = DateTime.Now,
                    Active       = true,
                    PostalCodeId = postalCode.Id
                });


                _appContext.Urls.AddRange(urls);
                _appContext.SaveChanges();
            }
        }