Validate(SubjectData subjectData, ExecutionContext executionContext, ImportMessage message)
        {
            _logger.LogInformation($"Validating: {message.DataFileName}");

            await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_1);

            return(await ValidateCsvFile(subjectData.DataBlob, false)
                   .OnSuccessDo(async() => await ValidateCsvFile(subjectData.MetaBlob, true))
                   .OnSuccess(
                       async() =>
            {
                await using var dataFileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob);
                var dataFileTable = DataTableUtils.CreateFromStream(dataFileStream);

                await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob);
                var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream);

                return await ValidateMetaHeader(metaFileTable.Columns)
                .OnSuccess(() => ValidateMetaRows(metaFileTable.Columns, metaFileTable.Rows))
                .OnSuccess(() => ValidateObservationHeaders(dataFileTable.Columns))
                .OnSuccess(
                    () =>
                    ValidateAndCountObservations(dataFileTable.Columns, dataFileTable.Rows,
                                                 executionContext, message.Release.Id, message.DataFileName)
                    .OnSuccess(
                        result =>
                {
                    _logger.LogInformation(
                        $"Validating: {message.DataFileName} complete");
                    return result;
                }
                        )
                    );
            }));
        }
Example #2
0
        public async Task ImportFiltersAndLocations(DataColumnCollection cols, DataRowCollection rows,
                                                    SubjectMeta subjectMeta, StatisticsDbContext context, Guid releaseId, string dataFileName)
        {
            // Clearing the caches is required here as the seeder shares the cache with all subjects
            _importerFilterService.ClearCache();
            _importerLocationService.ClearCache();

            var headers   = CsvUtil.GetColumnValues(cols);
            var rowCount  = 1;
            var totalRows = rows.Count;

            foreach (DataRow row in rows)
            {
                if (rowCount % STAGE_2_ROW_CHECK == 0)
                {
                    var currentStatus = await _importStatusService.GetImportStatus(releaseId, dataFileName);

                    if (currentStatus.IsFinishedOrAborting())
                    {
                        _logger.LogInformation($"Import for {dataFileName} has finished or is being aborted, " +
                                               $"so finishing importing Filters and Locations early");
                        return;
                    }

                    await _importStatusService.UpdateStatus(releaseId,
                                                            dataFileName,
                                                            IStatus.STAGE_2,
                                                            (double)rowCount / totalRows * 100);
                }

                CreateFiltersAndLocationsFromCsv(context, CsvUtil.GetRowValues(row), headers, subjectMeta.Filters);
                rowCount++;
            }
        }
        private async Task SplitFiles(
            ImportMessage message,
            SubjectData subjectData,
            DataTable dataFileTable)
        {
            var headerList = CsvUtil.GetColumnValues(dataFileTable.Columns);
            var batches    = dataFileTable.Rows.OfType <DataRow>().Batch(message.RowsPerBatch);
            var batchCount = 1;
            var numRows    = dataFileTable.Rows.Count + 1;
            var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / message.RowsPerBatch);

            var existingBatchFiles = await _fileStorageService.GetBatchFilesForDataFile(
                message.Release.Id,
                message.DataFileName);

            var existingBatchFileNumbers = existingBatchFiles
                                           .AsQueryable()
                                           .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName));

            // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date.
            // It is used in a legacy check to determine whether or not to generate a "no rows" batch file.
            // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file
            // situation, and whether this check can actually be entirely removed or not.
            var batchFilesExist = existingBatchFileNumbers.Any();

            foreach (var batch in batches)
            {
                var currentStatus = await _importStatusService.GetImportStatus(message.Release.Id, message.DataFileName);

                if (currentStatus.IsFinishedOrAborting())
                {
                    _logger.LogInformation($"Import for {message.DataFileName} is finished or aborting - " +
                                           $"stopping creating batch files");
                    return;
                }

                var batchFileName = $"{message.DataFileName}_{batchCount:000000}";

                if (existingBatchFileNumbers.Contains(batchCount))
                {
                    _logger.LogInformation($"Batch file {batchFileName} already exists - not recreating");
                    batchCount++;
                    continue;
                }

                var batchFilePath = $"{BatchesDir}/{batchFileName}";

                await using var stream = new MemoryStream();
                var writer = new StreamWriter(stream);
                await writer.FlushAsync();

                var table = new DataTable();
                CopyColumns(dataFileTable, table);
                CopyRows(table, batch.ToList(), headerList);

                var percentageComplete = (double)batchCount / numBatches * 100;

                await _importStatusService.UpdateStatus(message.Release.Id,
                                                        message.DataFileName,
                                                        IStatus.STAGE_3,
                                                        percentageComplete);

                // If no lines then don't create a batch or message unless it's the last one & there are zero
                // lines in total in which case create a zero lines batch
                if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist))
                {
                    batchCount++;
                    continue;
                }

                WriteDataTableToStream(table, writer);
                await writer.FlushAsync();

                stream.Seek(0, SeekOrigin.Begin);

                await _fileStorageService.UploadStream(
                    message.Release.Id,
                    fileType : FileType.Data,
                    fileName : batchFilePath,
                    stream : stream,
                    contentType : "text/csv",
                    FileStorageUtils.GetDataFileMetaValues(
                        name: subjectData.DataBlob.Name,
                        metaFileName: subjectData.DataBlob.GetMetaFileName(),
                        userName: subjectData.DataBlob.GetUserName(),
                        numberOfRows: numRows
                        )
                    );

                batchFilesExist = true;
                batchCount++;
            }
        }
        public async void ProcessUploads(
            [QueueTrigger("imports-pending")] ImportMessage message,
            ExecutionContext executionContext,
            [Queue("imports-pending")] ICollector <ImportMessage> importStagesMessageQueue,
            [Queue("imports-available")] ICollector <ImportObservationsMessage> importObservationsMessageQueue
            )
        {
            try
            {
                var status = await _importStatusService.GetImportStatus(message.Release.Id, message.DataFileName);

                _logger.LogInformation($"Processor Function processing import message for " +
                                       $"{message.DataFileName} at stage {status.Status}");

                switch (status.Status)
                {
                case IStatus.CANCELLING:
                    _logger.LogInformation($"Import for {message.DataFileName} is in the process of being " +
                                           $"cancelled, so not processing to the next import stage - marking as " +
                                           $"CANCELLED");
                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.CANCELLED,
                                                            100);

                    break;

                case IStatus.CANCELLED:
                    _logger.LogInformation($"Import for {message.DataFileName} is cancelled, so not " +
                                           $"processing any further");
                    break;

                case IStatus.QUEUED:
                case IStatus.PROCESSING_ARCHIVE_FILE:
                {
                    if (message.ArchiveFileName != "")
                    {
                        _logger.LogInformation($"Unpacking archive for {message.DataFileName}");
                        await _processorService.ProcessUnpackingArchive(message);
                    }

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_1);

                    importStagesMessageQueue.Add(message);
                    break;
                }

                case IStatus.STAGE_1:
                    await _processorService.ProcessStage1(message, executionContext);

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_2);

                    importStagesMessageQueue.Add(message);
                    break;

                case IStatus.STAGE_2:
                    await _processorService.ProcessStage2(message);

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_3);

                    importStagesMessageQueue.Add(message);
                    break;

                case IStatus.STAGE_3:
                    await _processorService.ProcessStage3(message);

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_4);

                    importStagesMessageQueue.Add(message);
                    break;

                case IStatus.STAGE_4:
                    await _processorService.ProcessStage4Messages(message, importObservationsMessageQueue);

                    break;
                }
            }
            catch (Exception e)
            {
                var ex = GetInnerException(e);

                await _batchService.FailImport(message.Release.Id,
                                               message.DataFileName,
                                               new List <ValidationError>
                {
                    new ValidationError(ex.Message)
                });

                _logger.LogError(ex, $"{GetType().Name} function FAILED for : Datafile: " +
                                 $"{message.DataFileName} : {ex.Message}");
                _logger.LogError(ex.StackTrace);
            }
        }
        public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context)
        {
            var releaseId = message.ReleaseId;

            var status = await _importStatusService.GetImportStatus(releaseId, message.DataFileName);

            if (status.IsFinished())
            {
                _logger.LogInformation($"Import for {message.DataFileName} already finished with state " +
                                       $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath}");
                return;
            }

            if (status.Status == CANCELLING)
            {
                _logger.LogInformation($"Import for {message.DataFileName} is CANCELLING " +
                                       $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath} " +
                                       $"and marking import as CANCELLED");

                await _importStatusService.UpdateStatus(releaseId, message.DataFileName, CANCELLED, 100);

                return;
            }

            var subjectData = await _fileStorageService.GetSubjectData(message.ReleaseId, message.ObservationsFilePath);

            var releaseSubject = GetReleaseSubjectLink(message.ReleaseId, message.SubjectId, context);

            await using var datafileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob);

            var dataFileTable = DataTableUtils.CreateFromStream(datafileStream);

            await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob);

            var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream);

            await context.Database.CreateExecutionStrategy().Execute(async() =>
            {
                await using var transaction = await context.Database.BeginTransactionAsync();

                await _importerService.ImportObservations(
                    dataFileTable.Columns,
                    dataFileTable.Rows,
                    releaseSubject.Subject,
                    _importerService.GetMeta(metaFileTable, releaseSubject.Subject, context),
                    message.BatchNo,
                    message.RowsPerBatch,
                    context
                    );

                await transaction.CommitAsync();
                await context.Database.CloseConnectionAsync();
            });

            if (message.NumBatches > 1)
            {
                await _fileStorageService.DeleteBlobByPath(message.ObservationsFilePath);
            }

            await CheckComplete(releaseId, message, context);
        }