Example #1
0
        public async Task <IEnumerable <PlanningApplication> > ExtractDataAsync(string searchArea, List <HttpResponseMessage> searchResultPages, CookieContainer cookieContainer, CancellationToken cancellationToken)
        {
            try
            {
                _configuration = _configResolver.ResolveConfig(searchArea);

                var currentPage = 0;

                await _logger.LogInformationAsync($"Processing {searchResultPages.Count} search result pages for {searchArea.ToUpper()}", cancellationToken);

                var client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer);

                foreach (var searchResults in searchResultPages)
                {
                    currentPage++;
                    var searchResultsHtml = await searchResults.Content.ReadAsStringAsync();

                    var searchPageResponseDoc = CQ.Create(searchResultsHtml);
                    var appSummaryPaths       = GetAppSummaryPaths(searchPageResponseDoc);

                    await _logger.LogInformationAsync($"Found {appSummaryPaths.Count} planning applications in page {currentPage}...", cancellationToken);

                    var row = 0;
                    foreach (var appSummaryPath in appSummaryPaths)
                    {
                        row++;
                        var planningApplication = new PlanningApplication();
                        await _logger.LogInformationAsync($"Getting application detail for result number {row} application {appSummaryPath}", cancellationToken);

                        await ExtractApplicationSummary(cancellationToken, appSummaryPath, client, planningApplication);

                        var appDetailsPath = await ExtractApplicationDetails(cancellationToken, appSummaryPath, client, planningApplication);

                        await ExtractApplicationContact(cancellationToken, appDetailsPath, client, planningApplication);

                        _planningApplications.Add(planningApplication);

                        if (_configuration.UseProxy)
                        {
                            // refresh client/handler to get a new IP address
                            client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer);
                        }
                    }
                }

                await _logger.LogInformationAsync($"Finished extracting planning data for {searchArea.ToUpper()}...", cancellationToken);

                client.Dispose();

                return(_planningApplications);
            }
            catch (Exception ex)
            {
                throw new ExtractDataFailedException(ex.Message, ex.InnerException);
            }
        }
Example #2
0
        public async Task <IEnumerable <PlanningApplication> > ExtractDataAsync(string searchArea, List <HttpResponseMessage> searchResultPages, CookieContainer cookieContainer, CancellationToken cancellationToken)
        {
            try
            {
                var searchResults = searchResultPages.First();
                var client        = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer);
                client.DefaultRequestHeaders.Add("Referer", $"{searchResults.RequestMessage.RequestUri}");

                var searchResultsHtml = await searchResults.Content.ReadAsStringAsync();

                var searchPageResponseDoc = CQ.Create(searchResultsHtml);

                await _logger.LogInformationAsync($"Found {searchPageResponseDoc.Select("table tbody tr").Length} planning applications for {searchArea.ToUpper()}...", cancellationToken);

                var row = 1;
                searchPageResponseDoc.Select("table tbody tr").Each(tr =>
                {
                    var planningApplication = new PlanningApplication();
                    GetSeachRowDetailAsync(tr, planningApplication, cancellationToken).GetAwaiter().GetResult();

                    _logger.LogInformationAsync($"Getting application detail for result number {row} application reference {planningApplication.ApplicationReference} from {Environment.NewLine}{planningApplication.ApplicationLink}", cancellationToken).GetAwaiter().GetResult();

                    GetPlanningApplicationDetailAsync(client, planningApplication, cancellationToken).GetAwaiter().GetResult();

                    // can cause too much data and not really required.
                    //GetPlanningApplicationDocumentLinksAsync(client, planningApplication, cancellationToken).GetAwaiter().GetResult();

                    _planningApplications.Add(planningApplication);

                    if (_configuration.UseProxy)
                    {
                        // refresh client/handler to get a new IP address
                        client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer);
                    }

                    row++;
                });

                await _logger.LogInformationAsync($"Finished extracting planning data for {searchArea.ToUpper()}...", cancellationToken);

                client.Dispose();

                return(_planningApplications);
            }
            catch (Exception ex)
            {
                throw new ExtractDataFailedException(ex.Message, ex.InnerException);
            }
        }
        public async Task <IEnumerable <PlanningApplication> > ExtractDataAsync(string searchArea, List <HttpResponseMessage> searchResultPages, CookieContainer cookieContainer, CancellationToken cancellationToken)
        {
            try
            {
                var currentPage = 0;

                await _logger.LogInformationAsync($"Processing {searchResultPages.Count} search result pages for {searchArea.ToUpper()}", cancellationToken);

                var client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer);

                foreach (var searchResults in searchResultPages)
                {
                    currentPage++;
                    var searchResultsHtml = await searchResults.Content.ReadAsStringAsync();

                    var searchPageResponseDoc = CQ.Create(searchResultsHtml);

                    var row = 0;
                    searchPageResponseDoc.Select("table tbody tr").Each(searchRow =>
                    {
                        row++;

                        // Filter by proposal text containing search term.
                        if (searchRow.ChildNodes[4].InnerText.Contains(_configuration.SearchTerm))
                        {
                            var planningApplication = new PlanningApplication();

                            var appLink = searchRow.ChildNodes[2].ChildNodes[1].Attributes["href"];
                            _logger.LogInformationAsync($"Getting application detail for result number {row} application {appLink}", cancellationToken).GetAwaiter().GetResult();

                            var appDetailsResponse    = client.GetAsync(appLink, cancellationToken).GetAwaiter().GetResult();
                            var content               = appDetailsResponse.Content.ReadAsStringAsync().GetAwaiter().GetResult();
                            var appDetailsResponseDoc = CQ.Create(content);

                            appDetailsResponseDoc.Select("#MainContent_RadPageView1 .form tr").Each(tr =>
                            {
                                foreach (var childnodes in tr.ChildNodes)
                                {
                                    if (childnodes.HasChildren)
                                    {
                                        foreach (var child in childnodes.ChildNodes)
                                        {
                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Application No")
                                            {
                                                planningApplication.ApplicationReference = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Type")
                                            {
                                                planningApplication.ApplicationType = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Applicant")
                                            {
                                                planningApplication.NameOfApplicant = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Agent")
                                            {
                                                planningApplication.AgentName = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Case Officer")
                                            {
                                                planningApplication.CaseOfficer = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Proposal")
                                            {
                                                planningApplication.Proposal = childnodes.NextSibling.NextSibling.ChildNodes[1].TextContent.Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Received Date")
                                            {
                                                planningApplication.RegisteredDate = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }
                                        }
                                    }
                                }
                            });

                            appDetailsResponseDoc.Select("#MainContent_RadPageView2 .form tr").Each(tr =>
                            {
                                foreach (var childnodes in tr.ChildNodes)
                                {
                                    if (childnodes.HasChildren)
                                    {
                                        foreach (var child in childnodes.ChildNodes)
                                        {
                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Address")
                                            {
                                                planningApplication.SiteAddress = childnodes.NextSibling.NextSibling.ChildNodes[1].TextContent.Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Ward")
                                            {
                                                planningApplication.Wards = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }

                                            if (child.NodeType == NodeType.ELEMENT_NODE && child.InnerText == "Parish")
                                            {
                                                planningApplication.Parishes = childnodes.NextSibling.NextSibling.ChildNodes[1].Attributes["Value"].Clean();
                                                break;
                                            }
                                        }
                                    }
                                }
                            });

                            _planningApplications.Add(planningApplication);

                            if (_configuration.UseProxy)
                            {
                                // refresh client/handler to get a new IP address
                                client = HttpClientHelpers.CreateClient(_configuration.BaseUri, _systemConfig, _configuration, _logger, cookieContainer);
                            }
                        }
                    });
                }

                await _logger.LogInformationAsync($"Finished extracting planning data for {searchArea.ToUpper()}...", cancellationToken);

                client.Dispose();

                return(_planningApplications);
            }
            catch (Exception ex)
            {
                throw new ExtractDataFailedException(ex.Message, ex.InnerException);
            }
        }