public async Task <IEnumerable <PlanningApplication> > ExtractDataAsync(string searchArea, List <HttpResponseMessage> searchResultPages, CookieContainer cookieContainer, CancellationToken cancellationToken) { try { _configuration = _configResolver.ResolveConfig(searchArea); var currentPage = 0; await _logger.LogInformationAsync($"Processing {searchResultPages.Count} search result pages for {searchArea.ToUpper()}", cancellationToken); var client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer); foreach (var searchResults in searchResultPages) { currentPage++; var searchResultsHtml = await searchResults.Content.ReadAsStringAsync(); var searchPageResponseDoc = CQ.Create(searchResultsHtml); var appSummaryPaths = GetAppSummaryPaths(searchPageResponseDoc); await _logger.LogInformationAsync($"Found {appSummaryPaths.Count} planning applications in page {currentPage}...", cancellationToken); var row = 0; foreach (var appSummaryPath in appSummaryPaths) { row++; var planningApplication = new PlanningApplication(); await _logger.LogInformationAsync($"Getting application detail for result number {row} application {appSummaryPath}", cancellationToken); await ExtractApplicationSummary(cancellationToken, appSummaryPath, client, planningApplication); var appDetailsPath = await ExtractApplicationDetails(cancellationToken, appSummaryPath, client, planningApplication); await ExtractApplicationContact(cancellationToken, appDetailsPath, client, planningApplication); _planningApplications.Add(planningApplication); if (_configuration.UseProxy) { // refresh client/handler to get a new IP address client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer); } } } await _logger.LogInformationAsync($"Finished extracting planning data for {searchArea.ToUpper()}...", cancellationToken); client.Dispose(); return(_planningApplications); } catch (Exception ex) { throw new ExtractDataFailedException(ex.Message, ex.InnerException); } }
public async Task <IEnumerable <PlanningApplication> > ExtractDataAsync(string searchArea, List <HttpResponseMessage> searchResultPages, CookieContainer cookieContainer, CancellationToken cancellationToken) { try { var searchResults = searchResultPages.First(); var client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer); client.DefaultRequestHeaders.Add("Referer", $"{searchResults.RequestMessage.RequestUri}"); var searchResultsHtml = await searchResults.Content.ReadAsStringAsync(); var searchPageResponseDoc = CQ.Create(searchResultsHtml); await _logger.LogInformationAsync($"Found {searchPageResponseDoc.Select("table tbody tr").Length} planning applications for {searchArea.ToUpper()}...", cancellationToken); var row = 1; searchPageResponseDoc.Select("table tbody tr").Each(tr => { var planningApplication = new PlanningApplication(); GetSeachRowDetailAsync(tr, planningApplication, cancellationToken).GetAwaiter().GetResult(); _logger.LogInformationAsync($"Getting application detail for result number {row} application reference {planningApplication.ApplicationReference} from {Environment.NewLine}{planningApplication.ApplicationLink}", cancellationToken).GetAwaiter().GetResult(); GetPlanningApplicationDetailAsync(client, planningApplication, cancellationToken).GetAwaiter().GetResult(); // can cause too much data and not really required. //GetPlanningApplicationDocumentLinksAsync(client, planningApplication, cancellationToken).GetAwaiter().GetResult(); _planningApplications.Add(planningApplication); if (_configuration.UseProxy) { // refresh client/handler to get a new IP address client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer); } row++; }); await _logger.LogInformationAsync($"Finished extracting planning data for {searchArea.ToUpper()}...", cancellationToken); client.Dispose(); return(_planningApplications); } catch (Exception ex) { throw new ExtractDataFailedException(ex.Message, ex.InnerException); } }
public async Task <IEnumerable <PlanningApplication> > ExtractDataAsync(string searchArea, List <HttpResponseMessage> searchResultPages, CookieContainer cookieContainer, CancellationToken cancellationToken) { try { var currentPage = 0; await _logger.LogInformationAsync($"Processing {searchResultPages.Count} search result pages for {searchArea.ToUpper()}", cancellationToken); var client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer); foreach (var searchResults in searchResultPages) { currentPage++; var searchResultsHtml = await searchResults.Content.ReadAsStringAsync(); var searchPageResponseDoc = CQ.Create(searchResultsHtml); var row = 0; searchPageResponseDoc.Select("table tbody tr").Each(searchRow => { row++; // Filter by proposal text containing search term. if (searchRow.ChildNodes[4].InnerText.Contains(_configuration.SearchTerm)) { var planningApplication = new PlanningApplication(); var appLink = searchRow.ChildNodes[2].ChildNodes[1].Attributes["href"]; _logger.LogInformationAsync($"Getting application detail for result number {row} application {appLink}", cancellationToken).GetAwaiter().GetResult(); var appDetailsResponse = client.GetAsync(appLink, cancellationToken).GetAwaiter().GetResult(); var content = appDetailsResponse.Content.ReadAsStringAsync().GetAwaiter().GetResult(); var appDetailsResponseDoc = CQ.Create(content); appDetailsResponseDoc.Select("#MainContent_RadPageView1 .form tr").Each(tr => { foreach (var childnodes in tr.ChildNodes) { if (childnodes.HasChildren) { foreach (var child in childnodes.ChildNodes) { if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Application No") { planningApplication.ApplicationReference = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Type") { planningApplication.ApplicationType = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Applicant") { planningApplication.NameOfApplicant = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Agent") { planningApplication.AgentName = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Case Officer") { planningApplication.CaseOfficer = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Proposal") { planningApplication.Proposal = childnodes.NextSibling.NextSibling.ChildNodes[1].TextContent.Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Received Date") { planningApplication.RegisteredDate = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } } } } }); appDetailsResponseDoc.Select("#MainContent_RadPageView2 .form tr").Each(tr => { foreach (var childnodes in tr.ChildNodes) { if (childnodes.HasChildren) { foreach (var child in childnodes.ChildNodes) { if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Address") { planningApplication.SiteAddress = childnodes.NextSibling.NextSibling.ChildNodes[1].TextContent.Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Ward") { planningApplication.Wards = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Parish") { planningApplication.Parishes = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean(); break; } } } } }); _planningApplications.Add(planningApplication); if (_configuration.UseProxy) { // refresh client/handler to get a new IP address client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer); } } }); } await _logger.LogInformationAsync($"Finished extracting planning data for {searchArea.ToUpper()}...", cancellationToken); client.Dispose(); return(_planningApplications); } catch (Exception ex) { throw new ExtractDataFailedException(ex.Message, ex.InnerException); } }