public PublicSectorOrganisationLookUp Scrape(string url, ILog logger) { var ol = new PublicSectorOrganisationLookUp(); try { var web = new HtmlWeb(); var doc = web.Load(url); var englandPolice = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div[2]/div/section/div[2]/div[4]/div/div/div[1]/div/div[2]/div/ul/li/a/text()") .Where(p => !string.IsNullOrWhiteSpace(p.InnerText)) .Select(p => p.InnerText.Trim()); var nationalPolice = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div[2]/div/section/div[2]/div[4]/div/div/div[5]/div/div[2]/div/ul/li/a/text()") .Where(p => !string.IsNullOrWhiteSpace(p.InnerText)) .Select(p => p.InnerText.Trim()); ol.Organisations = englandPolice.Concat(nationalPolice) .Select(x => new PublicSectorOrganisation { Name = x, Sector = "", Source = DataSource.Police }).ToList(); } catch (Exception e) { logger.Error(e, "Cannot get Police organisations, potential format change"); throw; } return(ol); }
public void ExportFile(string filename, PublicSectorOrganisationLookUp orgs) { try { using (var fs = File.Open(filename, FileMode.Create)) { using (var sw = new StreamWriter(fs)) { using (var jw = new JsonTextWriter(sw)) { jw.Formatting = Formatting.Indented; var serializer = new JsonSerializer(); serializer.Serialize(jw, orgs); } } } } catch (Exception ex) { _logger.Error(ex, $"An error occurred exporting data to {filename}: {ex.Message}"); throw; } Console.WriteLine($"Exported {orgs.Organisations.Count()} records"); }
public PublicSectorOrganisationLookUp UpdateDatabase(string excelFile) { var ol = new PublicSectorOrganisationLookUp(); try { var connectionString = $"Provider=Microsoft.Jet.OLEDB.4.0;Extended Properties='Excel 8.0;HDR=NO;';Data Source={excelFile}"; using (var conn = new OleDbConnection(connectionString)) { using (var cmd = new OleDbCommand()) { conn.Open(); cmd.Connection = conn; conn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null); const string sheetName = "Index$"; cmd.CommandText = "SELECT F1, F2 FROM [" + sheetName + "] WHERE F1 IS NOT NULL AND F2 IS NOT NULL AND F1 <> 'Index'"; var dt = new DataTable(sheetName); var da = new OleDbDataAdapter(cmd); da.Fill(dt); var rowDel = dt.Rows[0]; dt.Rows.Remove(rowDel); var data = dt.AsEnumerable(); ol.Organisations = data.Where(s => !s.Field <string>("F2").ToLower().Contains("former")).Select(x => new PublicSectorOrganisation { Name = x.Field <string>("F1"), Sector = x.Field <string>("F2"), Source = DataSource.Ons }).ToList(); conn.Close(); } } } catch (Exception e) { _logger.Error(e, "Cannot get ONS organisations, potential format change"); throw; } return(ol); }
public void Arrange() { _lookup = new PublicSectorOrganisationLookUp { Organisations = new List <PublicSectorOrganisation> { new PublicSectorOrganisation { Name = "Test 1", Source = DataSource.Ons }, new PublicSectorOrganisation { Name = "Test 2", Source = DataSource.Ons }, new PublicSectorOrganisation { Name = "Test 3", Source = DataSource.Ons }, new PublicSectorOrganisation { Name = "Example 1", Source = DataSource.Ons }, new PublicSectorOrganisation { Name = "Example 2", Source = DataSource.Ons }, new PublicSectorOrganisation { Name = "Example 3", Source = DataSource.Ons } } }; _azureService = new Mock <IAzureService>(); _azureService.Setup(x => x.GetModelFromBlobStorage <PublicSectorOrganisationLookUp>( It.IsAny <string>(), It.IsAny <string>())) .ReturnsAsync(_lookup); _cacheProvider = new Mock <ICacheProvider>(); _logger = new Mock <ILog>(); _repository = new PublicSectorOrganisationRepository(_cacheProvider.Object, _azureService.Object, _logger.Object); }
public async Task RunUpdate() { try { _logger.Info("Running Public Organisations updater"); if (!_configuration.NhsTrustsUrls.Any() || string.IsNullOrWhiteSpace(_configuration.PoliceForcesUrl) || string.IsNullOrWhiteSpace(_configuration.ONSUrl) || string.IsNullOrWhiteSpace(_configuration.OnsUrlDateFormat)) { const string errorMessage = "Missing configuration, check table storage configuration for NhsTrustsUrls, PoliceForcesUrl, ONSUrl and ONSUrlDateFormat"; _logger.Error(new Exception(errorMessage), errorMessage); throw new Exception("Missing configuration, check table storage configuration for NhsTrustsUrls, PoliceForcesUrl, ONSUrl and ONSUrlDateFormat"); } var onsOrgs = await GetOnsOrganisations(); var nhsOrgs = await GetNhsOrganisations(); var policeOrgs = GetPoliceOrganisations(); var orgs = new PublicSectorOrganisationLookUp { Organisations = onsOrgs.Organisations .Concat(nhsOrgs.Organisations) .Concat(policeOrgs.Organisations).ToList() }; var jsonFilePath = Path.Combine(_workingFolder, _jsonFileName); _jsonManager.ExportFile(jsonFilePath, orgs); _jsonManager.UploadJsonToStorage(jsonFilePath); } catch (Exception ex) { _logger.Fatal(ex, $"The {_jsonFileName} has not been updated"); throw; } }