public void ProcessUpdatePriceUrl(Portal portal, PostalCode postalCode, PropertyType propertyType, List <UrlType> urlTypes, ProcessPostalCode processPostalCode, ProxyIp proxyIp, ProcessType processType) { var _appContext = new PropertyCrawler.Data.AppContext(true); var type = propertyType == PropertyType.Sell ? "SALE" : "RENT"; var urlType = propertyType == PropertyType.Sell ? "/property-for-sale" : "/property-to-rent"; var currentUrls = _appContext.Urls.Where(x => x.Type == (int)propertyType && x.PostalCodeId == postalCode.Id).ToList(); var currentUrlIds = currentUrls.Select(x => x.Id).OrderBy(x => x).ToList(); var currentUrlPropertyCodes = currentUrls.Select(x => x.PropertyCode).OrderBy(x => x).ToList(); var currentProperties = _appContext.Properties.Where(x => x.UrlId.HasValue && currentUrlIds.Contains(x.UrlId.Value)).ToList(); var proxy = new WebProxy() { Address = String.IsNullOrWhiteSpace(proxyIp.Port) ? new Uri($"http://{proxyIp.Ip}"): new Uri($"http://{proxyIp.Ip}:{proxyIp.Port}"), BypassProxyOnLocal = false, UseDefaultCredentials = false, // *** These creds are given to the proxy server, not the web server *** Credentials = new NetworkCredential( userName: proxyIp.Username, password: proxyIp.Password) }; // Now create a client handler which uses that proxy var httpClientHandler = new HttpClientHandler() { Proxy = proxy, }; // Omit this part if you don't need to authenticate with the web server: //if (needServerAuthentication) //{ // httpClientHandler.PreAuthenticate = true; // httpClientHandler.UseDefaultCredentials = false; // // *** These creds are given to the web server, not the proxy server *** // httpClientHandler.Credentials = new NetworkCredential( // userName: serverUserName, // password: serverPassword); // } //try //{ using (var client = new HttpClient(handler: httpClientHandler, disposeHandler: true)) { client.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"); client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"); client.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.9,it;q=0.8,sq;q=0.7"); var page = 0; var queryString = $"/find.html?locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&sortType=10&propertyTypes=&includeLetAgreed=false&mustHave=&dontShow=&furnishTypes=&keywords="; if (processType.ToString() == "LastWeek") { queryString += "&maxDaysSinceAdded=7"; } else if (processType.ToString() == "LastTwoWeeks") { queryString += "&maxDaysSinceAdded=14"; } var pagesHtml = client.GetStringAsync(portal.Url + urlType + queryString).Result; HtmlDocument pagesDocument = new HtmlDocument(); pagesDocument.LoadHtml(pagesHtml); var allUrlCrawlerModels = new List <UrlCrawlerModel>(); //a// class// propertyCard-priceLink var listOfUrlProperties = new PropertyUrlDetails(); var jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}"; var jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString); listOfUrlProperties.properties.AddRange(jsonObj.properties); var totalcount = int.Parse(pagesDocument.DocumentNode.SelectSingleNode(".//span[@class=\"searchHeader-resultCount\"]").InnerText.Replace(",", "")); var pages = (totalcount / 24) + ((totalcount % 24) == 0 ? 0 : 1); for (int i = 1; i < pages; i++) { try { queryString = $"/find.html?locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&sortType=10&index={24 * i}&propertyTypes=&includeLetAgreed=false&mustHave=&dontShow=&furnishTypes=&keywords="; pagesHtml = client.GetStringAsync(portal.Url + urlType + queryString).Result; pagesDocument = new HtmlDocument(); pagesDocument.LoadHtml(pagesHtml); jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}"; jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString); listOfUrlProperties.properties.AddRange(jsonObj.properties); } catch (Exception ex) { break; } } foreach (var prop in listOfUrlProperties.properties) { var urlCrawlerModel = new UrlCrawlerModel { Price = prop.price.amount, PriceQualifier = prop.price.displayPrices[0].displayPriceQualifier, PropertyCode = prop.id, UrlTypeId = urlTypes.FirstOrDefault(a => prop.propertyUrl.Trim().Contains(a.UrlPortion))?.Id ?? null }; allUrlCrawlerModels.Add(urlCrawlerModel); } allUrlCrawlerModels = allUrlCrawlerModels.Distinct().ToList(); var pricePropertyList = new List <PropertyPrice>(); //Find urls that don't exist any more at page //Update list with property codes var propertyCodesList = allUrlCrawlerModels.Select(x => x.PropertyCode).OrderBy(x => x).ToList(); //Property Urls that already exist into database var currentExistentUrls = currentUrls.Where(x => x.Active && propertyCodesList.Contains(x.PropertyCode)).ToList(); var currentExistenturlsId = currentExistentUrls.Select(x => x.Id); var toBeLogicalDeleteIds = currentUrlIds.Except(currentExistenturlsId); //From current property url list get what does not exist any more and updated into active false var toBeLogicDeletedUrls = currentUrls.Where(x => toBeLogicalDeleteIds.Contains(x.Id)); ///currentUrls.Except(currentExistentUrls).ToList(); //logic delete base on propertycodes into Url and Properties table var dateNow = DateTime.UtcNow; foreach (var url in toBeLogicDeletedUrls) { var urlToBeUpdated = url; urlToBeUpdated.Active = false; urlToBeUpdated.DateModified = dateNow; var propertyToBeUpdated = currentProperties.FirstOrDefault(x => x.UrlId.HasValue && x.UrlId == url.Id); if (propertyToBeUpdated != null) { propertyToBeUpdated.DateModified = dateNow; propertyToBeUpdated.Active = false; _appContext.Properties.Update(propertyToBeUpdated); } _appContext.Urls.Update(urlToBeUpdated); } _appContext.SaveChanges(); //Existent foreach (var item in allUrlCrawlerModels) { var existUrl = currentUrls.FirstOrDefault(x => x.PropertyCode == item.PropertyCode); PropertyCrawler.Data.Property existProperty = null; if (existUrl != null) { existProperty = currentProperties.FirstOrDefault(x => x.UrlId == existUrl.Id); } //Properties does not exists //Insert into Url Table, Start the process Properties if (existUrl == null) { var insertUrl = new Url { PropertyCode = item.PropertyCode, Type = (int)propertyType, PortalId = portal.Id, DateModified = DateTime.Now, DateAdded = DateTime.Now, Active = true, PostalCodeId = postalCode.Id, UrlTypeId = item.UrlTypeId }; if (existProperty != null) { var property = existProperty; property.Price = item.Price; _appContext.Properties.Update(property); } _appContext.Urls.Add(insertUrl); _appContext.SaveChanges(); if (existProperty == null) { ProcessProperty(portal, insertUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode, proxyIp); } } //else if (existUrlProperty.prop == null) //{ // ProcessProperty(portal, existUrlProperty.url, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode); //} //Property exist //insert into priceProperty table //Update Properties last Price else { if (existProperty == null) { if (existUrl != null) { var updUrl = existUrl; updUrl.Active = true; _appContext.Urls.Update(updUrl); _appContext.SaveChanges(); ProcessProperty(portal, updUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode, proxyIp); } else { var insertUrl = new Url { PropertyCode = item.PropertyCode, Type = (int)propertyType, PortalId = portal.Id, DateModified = DateTime.Now, DateAdded = DateTime.Now, Active = true, PostalCodeId = postalCode.Id, UrlTypeId = item.UrlTypeId }; _appContext.Urls.Add(insertUrl); _appContext.SaveChanges(); ProcessProperty(portal, insertUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode, proxyIp); } } else { var priceProperty = new PropertyPrice { PropertyId = existProperty.Id, Price = item.Price, PriceQualifier = item.PriceQualifier, DateModified = DateTime.Now, DateAdded = DateTime.Now, Active = true }; var updUrl = existUrl; updUrl.Active = true; _appContext.Urls.Update(updUrl); var property = existProperty; property.Price = item.Price; property.Active = true; property.DateModified = DateTime.UtcNow; _appContext.PropertyPrices.Add(priceProperty); _appContext.Properties.Update(property); _appContext.SaveChanges(); //pricePropertyList.Add(priceProperty); } } } // _appContext.SaveChanges(); } //} //catch (Exception ex) //{ //} }
public void ProcessUpdatePriceUrl(Portal portal, PostalCode postalCode, PropertyType propertyType, List <UrlType> urlTypes, ProcessPostalCode processPostalCode) { var _appContext = new PropertyCrawler.Data.AppContext(true); var type = propertyType == PropertyType.Sell ? "SALE" : "RENT"; var urlType = propertyType == PropertyType.Sell ? "/property-for-sale" : "/property-to-rent"; //try //{ using (var client = new HttpClient(/*handler: httpClientHandler*/)) { client.DefaultRequestHeaders.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"); client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"); client.DefaultRequestHeaders.Add("Accept-Language", "en-US,en;q=0.9,it;q=0.8,sq;q=0.7"); var page = 0; var queryString = $"/find.html?searchType={type}&locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&index={24 * page}"; var pagesHtml = client.GetStringAsync(portal.Url + urlType + queryString).Result; HtmlDocument pagesDocument = new HtmlDocument(); pagesDocument.LoadHtml(pagesHtml); var allUrlCrawlerModels = new List <UrlCrawlerModel>(); //a// class// propertyCard-priceLink var listOfUrlProperties = new PropertyUrlDetails(); var jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}"; var jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString); listOfUrlProperties.properties.AddRange(jsonObj.properties); //var propertiesCards = pagesDocument.DocumentNode.SelectNodes(".//a[@class=\"propertyCard-priceLink\"]"); var totalcount = int.Parse(pagesDocument.DocumentNode.SelectSingleNode(".//span[@class=\"searchHeader-resultCount\"]").InnerText.Replace(",", "")); var pages = (totalcount / 24) + ((totalcount % 24) == 0 ? 0 : 1); for (int i = 1; i < pages; i++) { try { queryString = $"/find.html?searchType={type}&locationIdentifier={portal.OutCodeKey + postalCode.OutCode}&index={24 * i}&includeSSTC=false"; pagesHtml = client.GetStringAsync(portal.Url + queryString).Result; pagesDocument = new HtmlDocument(); pagesDocument.LoadHtml(pagesHtml); jsonString = pagesDocument.DocumentNode.InnerHtml.Split("<script>window.jsonModel = ")[1].Split("}</script><script>")[0] + "}"; jsonObj = JsonConvert.DeserializeObject <PropertyUrlDetails>(jsonString); listOfUrlProperties.properties.AddRange(jsonObj.properties); //var pageNodes= pagesDocument.DocumentNode.SelectNodes(".//a[@class=\"propertyCard-link\"]"); //foreach (var item in pageNodes) //{ // propertiesCards.Add(item); //} } catch (Exception ex) { break; } } foreach (var prop in listOfUrlProperties.properties) { //var propertyCode = int.Parse(!string.IsNullOrEmpty(prop.Attributes["href"].Value?.Trim()) ? string.Join("", Regex.Matches(prop.Attributes["href"].Value?.Trim(), @"[0-9]").Select(a => a.Value)) : "0"); //decimal priceProperty = 0; //if (decimal.TryParse(prop.SelectSingleNode(".//div[@class=\"propertyCard-priceValue\"]").InnerText.Replace("£", "")?.Trim(), out decimal temp)) //{ // priceProperty = temp; //} //var priceQualifier = prop.SelectSingleNode(".//div[@class=\"propertyCard-priceQualifier\"]").InnerText?.Trim(); var urlCrawlerModel = new UrlCrawlerModel { Price = prop.price.amount, PriceQualifier = prop.price.displayPrices[0].displayPriceQualifier, PropertyCode = prop.id, UrlTypeId = urlTypes.FirstOrDefault(a => prop.propertyUrl.Trim().Contains(a.UrlPortion))?.Id ?? null }; allUrlCrawlerModels.Add(urlCrawlerModel); } var pricePropertyList = new List <PropertyPrice>(); foreach (var item in allUrlCrawlerModels) { var existUrlProperty = (from url in _appContext.Urls join prop in _appContext.Properties on url.Id equals prop.UrlId where url.PropertyCode == item.PropertyCode select prop.Id)?.FirstOrDefault(); if (existUrlProperty == null) { var existUrl = _appContext.Urls.FirstOrDefault(x => x.Active && x.PropertyCode == item.PropertyCode); if (existUrl == null) { ProcessProperty(portal, existUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode); } else { var insertUrl = new Url { PropertyCode = item.PropertyCode, Type = (int)propertyType, PortalId = portal.Id, DateModified = DateTime.Now, DateAdded = DateTime.Now, Active = true, PostalCodeId = postalCode.Id, UrlTypeId = item.UrlTypeId }; _appContext.Urls.Add(insertUrl); _appContext.SaveChanges(); ProcessProperty(portal, insertUrl, urlTypes.FirstOrDefault(x => x.Id == item.UrlTypeId), processPostalCode); } } else { var priceProperty = new PropertyPrice { PropertyId = existUrlProperty.Value, Price = item.Price, PriceQualifier = item.PriceQualifier, DateModified = DateTime.Now, DateAdded = DateTime.Now, Active = true }; pricePropertyList.Add(priceProperty); } } _appContext.PropertyPrices.AddRange(pricePropertyList); _appContext.SaveChanges(); } //UpdateProcess(processPostalCode, ProcessStatus.Success); //} //catch (Exception ex) //{ // UpdateProcess(processPostalCode, ProcessStatus.Failed); //} }