public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context)
        {
            var import = await _dataImportService.GetImport(message.Id);

            _logger.LogInformation($"Importing Observations for {import.File.Filename} batchNo {message.BatchNo}");

            if (import.Status.IsFinished())
            {
                _logger.LogInformation($"Import for {import.File.Filename} already finished with state " +
                                       $"{import.Status} - ignoring Observations in file {message.ObservationsFilePath}");
                return;
            }

            if (import.Status == CANCELLING)
            {
                _logger.LogInformation($"Import for {import.File.Filename} is " +
                                       $"{import.Status} - ignoring Observations in file {message.ObservationsFilePath} " +
                                       "and marking import as CANCELLED");

                await _dataImportService.UpdateStatus(message.Id, CANCELLED, 100);

                return;
            }

            var subject = await context.Subject.FindAsync(import.SubjectId);

            var datafileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, message.ObservationsFilePath);

            var dataFileTable = DataTableUtils.CreateFromStream(datafileStream);

            var metaFileStream = await _blobStorageService.StreamBlob(PrivateReleaseFiles, import.MetaFile.Path());

            var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream);

            await context.Database.CreateExecutionStrategy().Execute(async() =>
            {
                await using var transaction = await context.Database.BeginTransactionAsync();

                await _importerService.ImportObservations(
                    import,
                    dataFileTable.Columns,
                    dataFileTable.Rows,
                    subject,
                    _importerService.GetMeta(metaFileTable, subject, context),
                    message.BatchNo,
                    context
                    );

                await transaction.CommitAsync();
                await context.Database.CloseConnectionAsync();
            });

            if (import.NumBatches > 1)
            {
                await _blobStorageService.DeleteBlob(PrivateReleaseFiles, message.ObservationsFilePath);
            }

            await CheckComplete(message, context);
        }
        public async Task ProcessUploads(
            [QueueTrigger(ImportsPendingQueue)] ImportMessage message,
            ExecutionContext executionContext,
            [Queue(ImportsPendingQueue)] ICollector <ImportMessage> importStagesMessageQueue,
            [Queue(ImportsAvailableQueue)] ICollector <ImportObservationsMessage> importObservationsMessageQueue
            )
        {
            try
            {
                var import = await _dataImportService.GetImport(message.Id);

                _logger.LogInformation($"Processor Function processing import message for " +
                                       $"{import.File.Filename} at stage {import.Status}");

                switch (import.Status)
                {
                case DataImportStatus.CANCELLING:
                    _logger.LogInformation($"Import for {import.File.Filename} is in the process of being " +
                                           "cancelled, so not processing to the next import stage - marking as " +
                                           "CANCELLED");
                    await _dataImportService.UpdateStatus(import.Id, DataImportStatus.CANCELLED, 100);

                    break;

                case DataImportStatus.CANCELLED:
                    _logger.LogInformation($"Import for {import.File.Filename} is cancelled, so not " +
                                           "processing any further");
                    break;

                case DataImportStatus.QUEUED:
                case DataImportStatus.PROCESSING_ARCHIVE_FILE:
                {
                    if (import.ZipFile != null)
                    {
                        _logger.LogInformation($"Unpacking archive for {import.ZipFile.Filename}");
                        await _processorService.ProcessUnpackingArchive(import.Id);
                    }

                    await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_1, 0);

                    importStagesMessageQueue.Add(message);
                    break;
                }

                case DataImportStatus.STAGE_1:
                    await _processorService.ProcessStage1(import.Id, executionContext);

                    await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_2, 0);

                    importStagesMessageQueue.Add(message);
                    break;

                case DataImportStatus.STAGE_2:
                    await _processorService.ProcessStage2(import.Id);

                    await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_3, 0);

                    importStagesMessageQueue.Add(message);
                    break;

                case DataImportStatus.STAGE_3:
                    await _processorService.ProcessStage3(import.Id);

                    await _dataImportService.UpdateStatus(import.Id, DataImportStatus.STAGE_4, 0);

                    importStagesMessageQueue.Add(message);
                    break;

                case DataImportStatus.STAGE_4:
                    await _processorService.ProcessStage4Messages(import.Id, importObservationsMessageQueue);

                    break;
                }
            }
            catch (Exception e)
            {
                var ex = GetInnerException(e);

                _logger.LogError(ex, $"{GetType().Name} function FAILED for Import: " +
                                 $"{message.Id} : {ex.Message}");
                _logger.LogError(ex.StackTrace);

                await _dataImportService.FailImport(message.Id);
            }
        }
コード例 #3
0
        private async Task SplitFiles(
            DataImport dataImport,
            DataTable dataFileTable)
        {
            var colValues  = CsvUtil.GetColumnValues(dataFileTable.Columns);
            var batches    = dataFileTable.Rows.OfType <DataRow>().Batch(dataImport.RowsPerBatch);
            var batchCount = 1;
            var numRows    = dataFileTable.Rows.Count + 1;
            var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / dataImport.RowsPerBatch);

            var existingBatchFiles = await _batchService.GetBatchFilesForDataFile(dataImport.File);

            var existingBatchFileNumbers = existingBatchFiles
                                           .AsQueryable()
                                           .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName));

            // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date.
            // It is used in a legacy check to determine whether or not to generate a "no rows" batch file.
            // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file
            // situation, and whether this check can actually be entirely removed or not.
            var batchFilesExist = existingBatchFileNumbers.Any();

            foreach (var batch in batches)
            {
                var currentStatus = await _dataImportService.GetImportStatus(dataImport.Id);

                if (currentStatus.IsFinishedOrAborting())
                {
                    _logger.LogInformation(
                        $"Import for {dataImport.File.Filename} is finished or aborting - stopping creating batch files");
                    return;
                }

                if (existingBatchFileNumbers.Contains(batchCount))
                {
                    _logger.LogInformation($"Batch {batchCount} already exists - not recreating");
                    batchCount++;
                    continue;
                }

                await using var stream = new MemoryStream();
                var writer = new StreamWriter(stream);
                await writer.FlushAsync();

                var table = new DataTable();
                CopyColumns(dataFileTable, table);
                CopyRows(table, batch.ToList(), colValues, dataImport.HasSoleGeographicLevel());

                var percentageComplete = (double)batchCount / numBatches * 100;

                await _dataImportService.UpdateStatus(dataImport.Id, DataImportStatus.STAGE_3, percentageComplete);

                // If no lines then don't create a batch unless it's the last one & there are zero
                // lines in total in which case create a zero lines batch
                if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist))
                {
                    _logger.LogInformation($"Skipping batch file for row count {table.Rows.Count} with batchCount {batchCount} and numBatches {numBatches} and batchFilesExist {batchFilesExist} and batch {batch.Count()}");
                    batchCount++;
                    continue;
                }

                WriteDataTableToStream(table, writer);
                await writer.FlushAsync();

                stream.Seek(0, SeekOrigin.Begin);

                await _blobStorageService.UploadStream(
                    containerName : PrivateReleaseFiles,
                    path : dataImport.File.BatchPath(batchCount),
                    stream : stream,
                    contentType : "text/csv",
                    metadata : GetDataFileMetaValues(
                        metaFileName: dataImport.MetaFile.Filename,
                        numberOfRows: numRows
                        ));

                batchFilesExist = true;
                batchCount++;
            }
        }