public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context) { var import = await _dataImportService.GetImport(message.Id); _logger.LogInformation($"Importing Observations for {import.File.Filename} batchNo {message.BatchNo}"); if (import.Status.IsFinished()) { _logger.LogInformation($"Import for {import.File.Filename} already finished with state " + $"{import.Status} - ignoring Observations in file {message.ObservationsFilePath}"); return; } if (import.Status == CANCELLING) { _logger.LogInformation($"Import for {import.File.Filename} is " + $"{import.Status} - ignoring Observations in file {message.ObservationsFilePath} " + "and marking import as CANCELLED"); await _dataImportService.UpdateStatus(message.Id, CANCELLED, 100); return; } var subject = await context.Subject.FindAsync(import.SubjectId); var datafileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, message.ObservationsFilePath); var dataFileTable = DataTableUtils.CreateFromStream(datafileStream); var metaFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.MetaFile.Path()); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); await context.Database.CreateExecutionStrategy().Execute(async() => { await using var transaction = await context.Database.BeginTransactionAsync(); await _importerService.ImportObservations( import, dataFileTable.Columns, dataFileTable.Rows, subject, _importerService.GetMeta(metaFileTable, subject, context), message.BatchNo, context ); await transaction.CommitAsync(); await context.Database.CloseConnectionAsync(); }); if (import.NumBatches > 1) { await _blobStorageService.DeleteBlob(PrivateReleaseFiles, message.ObservationsFilePath); } await CheckComplete(message, context); }
public async Task ProcessUploads( [QueueTrigger(ImportsPendingQueue)] ImportMessage message, ExecutionContext executionContext, [Queue(ImportsPendingQueue)] ICollector <ImportMessage> importStagesMessageQueue, [Queue(ImportsAvailableQueue)] ICollector <ImportObservationsMessage> importObservationsMessageQueue ) { try { var import = await _dataImportService.GetImport(message.Id); _logger.LogInformation($"Processor Function processing import message for " + $"{import.File.Filename} at stage {import.Status}"); switch (import.Status) { case DataImportStatus.CANCELLING: _logger.LogInformation($"Import for {import.File.Filename} is in the process of being " + "cancelled, so not processing to the next import stage - marking as " + "CANCELLED"); await _dataImportService.UpdateStatus(import.Id, DataImportStatus.CANCELLED, 100); break; case DataImportStatus.CANCELLED: _logger.LogInformation($"Import for {import.File.Filename} is cancelled, so not " + "processing any further"); break; case DataImportStatus.QUEUED: case DataImportStatus.PROCESSING_ARCHIVE_FILE: { if (import.ZipFile != null) { _logger.LogInformation($"Unpacking archive for {import.ZipFile.Filename}"); await _processorService.ProcessUnpackingArchive(import.Id); } await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_1, 0); importStagesMessageQueue.Add(message); break; } case DataImportStatus.STAGE_1: await _processorService.ProcessStage1(import.Id, executionContext); await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_2, 0); importStagesMessageQueue.Add(message); break; case DataImportStatus.STAGE_2: await _processorService.ProcessStage2(import.Id); await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_3, 0); importStagesMessageQueue.Add(message); break; case DataImportStatus.STAGE_3: await _processorService.ProcessStage3(import.Id); await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_4, 0); importStagesMessageQueue.Add(message); break; case DataImportStatus.STAGE_4: await _processorService.ProcessStage4Messages(import.Id, importObservationsMessageQueue); break; } } catch (Exception e) { var ex = GetInnerException(e); _logger.LogError(ex, $"{GetType().Name} function FAILED for Import: " + $"{message.Id} : {ex.Message}"); _logger.LogError(ex.StackTrace); await _dataImportService.FailImport(message.Id); } }
private async Task SplitFiles( DataImport dataImport, DataTable dataFileTable) { var colValues = CsvUtil.GetColumnValues(dataFileTable.Columns); var batches = dataFileTable.Rows.OfType <DataRow>().Batch(dataImport.RowsPerBatch); var batchCount = 1; var numRows = dataFileTable.Rows.Count + 1; var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / dataImport.RowsPerBatch); var existingBatchFiles = await _batchService.GetBatchFilesForDataFile(dataImport.File); var existingBatchFileNumbers = existingBatchFiles .AsQueryable() .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName)); // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date. // It is used in a legacy check to determine whether or not to generate a "no rows" batch file. // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file // situation, and whether this check can actually be entirely removed or not. var batchFilesExist = existingBatchFileNumbers.Any(); foreach (var batch in batches) { var currentStatus = await _dataImportService.GetImportStatus(dataImport.Id); if (currentStatus.IsFinishedOrAborting()) { _logger.LogInformation( $"Import for {dataImport.File.Filename} is finished or aborting - stopping creating batch files"); return; } if (existingBatchFileNumbers.Contains(batchCount)) { _logger.LogInformation($"Batch {batchCount} already exists - not recreating"); batchCount++; continue; } await using var stream = new MemoryStream(); var writer = new StreamWriter(stream); await writer.FlushAsync(); var table = new DataTable(); CopyColumns(dataFileTable, table); CopyRows(table, batch.ToList(), colValues, dataImport.HasSoleGeographicLevel()); var percentageComplete = (double)batchCount / numBatches * 100; await _dataImportService.UpdateStatus(dataImport.Id, DataImportStatus.STAGE_3, percentageComplete); // If no lines then don't create a batch unless it's the last one & there are zero // lines in total in which case create a zero lines batch if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist)) { _logger.LogInformation($"Skipping batch file for row count {table.Rows.Count} with batchCount {batchCount} and numBatches {numBatches} and batchFilesExist {batchFilesExist} and batch {batch.Count()}"); batchCount++; continue; } WriteDataTableToStream(table, writer); await writer.FlushAsync(); stream.Seek(0, SeekOrigin.Begin); await _blobStorageService.UploadStream( containerName : PrivateReleaseFiles, path : dataImport.File.BatchPath(batchCount), stream : stream, contentType : "text/csv", metadata : GetDataFileMetaValues( metaFileName: dataImport.MetaFile.Filename, numberOfRows: numRows )); batchFilesExist = true; batchCount++; } }