Validate(SubjectData subjectData, ExecutionContext executionContext, ImportMessage message) { _logger.LogInformation($"Validating: {message.DataFileName}"); await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_1); return(await ValidateCsvFile(subjectData.DataBlob, false) .OnSuccessDo(async() => await ValidateCsvFile(subjectData.MetaBlob, true)) .OnSuccess( async() => { await using var dataFileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob); var dataFileTable = DataTableUtils.CreateFromStream(dataFileStream); await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); return await ValidateMetaHeader(metaFileTable.Columns) .OnSuccess(() => ValidateMetaRows(metaFileTable.Columns, metaFileTable.Rows)) .OnSuccess(() => ValidateObservationHeaders(dataFileTable.Columns)) .OnSuccess( () => ValidateAndCountObservations(dataFileTable.Columns, dataFileTable.Rows, executionContext, message.Release.Id, message.DataFileName) .OnSuccess( result => { _logger.LogInformation( $"Validating: {message.DataFileName} complete"); return result; } ) ); })); }
public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context) { var import = await _dataImportService.GetImport(message.Id); _logger.LogInformation($"Importing Observations for {import.File.Filename} batchNo {message.BatchNo}"); if (import.Status.IsFinished()) { _logger.LogInformation($"Import for {import.File.Filename} already finished with state " + $"{import.Status} - ignoring Observations in file {message.ObservationsFilePath}"); return; } if (import.Status == CANCELLING) { _logger.LogInformation($"Import for {import.File.Filename} is " + $"{import.Status} - ignoring Observations in file {message.ObservationsFilePath} " + "and marking import as CANCELLED"); await _dataImportService.UpdateStatus(message.Id, CANCELLED, 100); return; } var subject = await context.Subject.FindAsync(import.SubjectId); var datafileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, message.ObservationsFilePath); var dataFileTable = DataTableUtils.CreateFromStream(datafileStream); var metaFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.MetaFile.Path()); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); await context.Database.CreateExecutionStrategy().Execute(async() => { await using var transaction = await context.Database.BeginTransactionAsync(); await _importerService.ImportObservations( import, dataFileTable.Columns, dataFileTable.Rows, subject, _importerService.GetMeta(metaFileTable, subject, context), message.BatchNo, context ); await transaction.CommitAsync(); await context.Database.CloseConnectionAsync(); }); if (import.NumBatches > 1) { await _blobStorageService.DeleteBlob(PrivateReleaseFiles, message.ObservationsFilePath); } await CheckComplete(message, context); }
public async Task ProcessStage2(Guid importId) { var statisticsDbContext = DbUtils.CreateStatisticsDbContext(); var import = await _dataImportService.GetImport(importId); var subject = await statisticsDbContext.Subject.FindAsync(import.SubjectId); var metaFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.MetaFile.Path()); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); _importerService.ImportMeta(metaFileTable, subject, statisticsDbContext); await statisticsDbContext.SaveChangesAsync(); await _fileImportService.ImportFiltersAndLocations(import.Id, statisticsDbContext); await statisticsDbContext.SaveChangesAsync(); }
public async Task SplitDataFile( ImportMessage message, SubjectData subjectData) { await using var dataFileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob); var dataFileTable = DataTableUtils.CreateFromStream(dataFileStream); if (dataFileTable.Rows.Count > message.RowsPerBatch) { _logger.LogInformation($"Splitting Datafile: {message.DataFileName}"); await SplitFiles(message, subjectData, dataFileTable); _logger.LogInformation($"Split of Datafile: {message.DataFileName} complete"); } else { _logger.LogInformation($"No splitting of datafile: {message.DataFileName} was necessary"); } }
public async Task SplitDataFile(Guid importId) { var import = await _dataImportService.GetImport(importId); var dataFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.File.Path()); var dataFileTable = DataTableUtils.CreateFromStream(dataFileStream); if (dataFileTable.Rows.Count > import.RowsPerBatch) { _logger.LogInformation($"Splitting Datafile: {import.File.Filename}"); await SplitFiles(import, dataFileTable); _logger.LogInformation($"Split of Datafile: {import.File.Filename} complete"); } else { _logger.LogInformation($"No splitting of datafile: {import.File.Filename} was necessary"); } }
public async Task ImportFiltersAndLocations(Guid importId, StatisticsDbContext context) { var import = await _dataImportService.GetImport(importId); var subject = await context.Subject.FindAsync(import.SubjectId); var dataFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.File.Path()); var dataFileTable = DataTableUtils.CreateFromStream(dataFileStream); var metaFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.MetaFile.Path()); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); await _importerService.ImportFiltersAndLocations( import, dataFileTable.Columns, dataFileTable.Rows, _importerService.GetMeta(metaFileTable, subject, context), context); }
private async Task ProcessSubject( ImportMessage message, StatisticsDbContext statisticsDbContext, ContentDbContext contentDbContext, SubjectData subjectData) { var subject = _releaseProcessorService.CreateOrUpdateRelease(subjectData, message, statisticsDbContext, contentDbContext); await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); _importerService.ImportMeta(metaFileTable, subject, statisticsDbContext); await statisticsDbContext.SaveChangesAsync(); await _fileImportService.ImportFiltersAndLocations(message, statisticsDbContext); await statisticsDbContext.SaveChangesAsync(); }
public async Task ImportFiltersAndLocations(ImportMessage message, StatisticsDbContext context) { var dataFileBlobPath = FileStoragePathUtils.AdminReleasePath(message.Release.Id, FileType.Data, message.DataFileName); var subjectData = await _fileStorageService.GetSubjectData(message.Release.Id, dataFileBlobPath); var releaseSubject = GetReleaseSubjectLink(message.Release.Id, message.SubjectId, context); await using var dataFileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob); var dataFileTable = DataTableUtils.CreateFromStream(dataFileStream); await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); await _importerService.ImportFiltersAndLocations( dataFileTable.Columns, dataFileTable.Rows, _importerService.GetMeta(metaFileTable, releaseSubject.Subject, context), context, message.Release.Id, message.DataFileName); }
public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context) { var releaseId = message.ReleaseId; var status = await _importStatusService.GetImportStatus(releaseId, message.DataFileName); if (status.IsFinished()) { _logger.LogInformation($"Import for {message.DataFileName} already finished with state " + $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath}"); return; } if (status.Status == CANCELLING) { _logger.LogInformation($"Import for {message.DataFileName} is CANCELLING " + $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath} " + $"and marking import as CANCELLED"); await _importStatusService.UpdateStatus(releaseId, message.DataFileName, CANCELLED, 100); return; } var subjectData = await _fileStorageService.GetSubjectData(message.ReleaseId, message.ObservationsFilePath); var releaseSubject = GetReleaseSubjectLink(message.ReleaseId, message.SubjectId, context); await using var datafileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob); var dataFileTable = DataTableUtils.CreateFromStream(datafileStream); await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); await context.Database.CreateExecutionStrategy().Execute(async() => { await using var transaction = await context.Database.BeginTransactionAsync(); await _importerService.ImportObservations( dataFileTable.Columns, dataFileTable.Rows, releaseSubject.Subject, _importerService.GetMeta(metaFileTable, releaseSubject.Subject, context), message.BatchNo, message.RowsPerBatch, context ); await transaction.CommitAsync(); await context.Database.CloseConnectionAsync(); }); if (message.NumBatches > 1) { await _fileStorageService.DeleteBlobByPath(message.ObservationsFilePath); } await CheckComplete(releaseId, message, context); }