Esempio n. 1
0
        public async Task ImportFiltersAndLocations(DataColumnCollection cols, DataRowCollection rows,
                                                    SubjectMeta subjectMeta, StatisticsDbContext context, Guid releaseId, string dataFileName)
        {
            // Clearing the caches is required here as the seeder shares the cache with all subjects
            _importerFilterService.ClearCache();
            _importerLocationService.ClearCache();

            var headers   = CsvUtil.GetColumnValues(cols);
            var rowCount  = 1;
            var totalRows = rows.Count;

            foreach (DataRow row in rows)
            {
                if (rowCount % STAGE_2_ROW_CHECK == 0)
                {
                    var currentStatus = await _importStatusService.GetImportStatus(releaseId, dataFileName);

                    if (currentStatus.IsFinishedOrAborting())
                    {
                        _logger.LogInformation($"Import for {dataFileName} has finished or is being aborted, " +
                                               $"so finishing importing Filters and Locations early");
                        return;
                    }

                    await _importStatusService.UpdateStatus(releaseId,
                                                            dataFileName,
                                                            IStatus.STAGE_2,
                                                            (double)rowCount / totalRows * 100);
                }

                CreateFiltersAndLocationsFromCsv(context, CsvUtil.GetRowValues(row), headers, subjectMeta.Filters);
                rowCount++;
            }
        }
        private async Task <DataFileInfo> GetDataFileInfo(Guid releaseId, File dataFile)
        {
            // Files should exists in storage but if not then allow user to delete
            var blobExists =
                await _blobStorageService.CheckBlobExists(PrivateFilesContainerName, dataFile.Path());

            if (!blobExists)
            {
                return(await GetFallbackDataFileInfo(releaseId, dataFile));
            }

            var blob = await _blobStorageService.GetBlob(PrivateFilesContainerName, dataFile.Path());

            // If the file does exist then it could possibly be
            // partially uploaded so make sure meta data exists for it
            if (string.IsNullOrEmpty(blob.GetUserName()))
            {
                return(await GetFallbackDataFileInfo(releaseId, dataFile));
            }

            var metaFile = await GetAssociatedReleaseFileReference(dataFile, Metadata);

            var importStatus = await _importStatusService.GetImportStatus(dataFile.ReleaseId, dataFile.Filename);

            return(new DataFileInfo
            {
                Id = dataFile.Id,
                FileName = dataFile.Filename,
                Name = dataFile.SubjectId.HasValue ? await GetSubjectName(dataFile) : blob.Name,
                Path = blob.Path,
                Size = blob.Size,
                MetaFileId = metaFile.Id,
                MetaFileName = blob.GetMetaFileName(),
                ReplacedBy = dataFile.ReplacedById,
                Rows = blob.GetNumberOfRows(),
                UserName = blob.GetUserName(),
                Status = importStatus.Status,
                Created = blob.Created,
                Permissions = await _userService.GetDataFilePermissions(releaseId, blob.FileName)
            });
        }
        public async Task <Either <ActionResult, SubjectsMetaViewModel> > GetSubjects(Guid releaseId)
        {
            return(await _contentPersistenceHelper
                   .CheckEntityExists <Release>(releaseId)
                   .OnSuccess(_userService.CheckCanViewRelease)
                   .OnSuccess(release =>
            {
                var files = _contentDbContext.ReleaseFiles
                            .Include(file => file.File)
                            .Where(file => file.ReleaseId == releaseId &&
                                   file.File.Type == FileType.Data)
                            .Select(file => file.File);

                // Exclude files that are replacements in progress
                var filesExcludingReplacements = files
                                                 .Where(file => !file.ReplacingId.HasValue)
                                                 .ToList();

                var subjectIds = filesExcludingReplacements
                                 .WhereAsync(
                    async file =>
                {
                    // Not optimal, ideally we should be able to fetch
                    // the status with the file reference itself.
                    // TODO EES-1231 Move imports table into database
                    var importStatus = await _importStatusService
                                       .GetImportStatus(file.ReleaseId, file.Filename);

                    return importStatus.Status == IStatus.COMPLETE;
                }
                    )
                                 .Select(file => file.SubjectId)
                                 .ToList();

                var subjects = _statisticsDbContext.ReleaseSubject
                               .Include(subject => subject.Subject)
                               .Where(subject => subject.ReleaseId == releaseId &&
                                      subjectIds.Contains(subject.SubjectId))
                               .Select(subject =>
                                       new IdLabel(
                                           subject.Subject.Id,
                                           subject.Subject.Name))
                               .ToList();

                return new SubjectsMetaViewModel
                {
                    ReleaseId = releaseId,
                    Subjects = subjects
                };
            }));
        }
        protected override async Task HandleRequirementAsync(AuthorizationHandlerContext ctx,
                                                             CancelSpecificFileImportRequirement requirement,
                                                             ReleaseFileImportInfo import)
        {
            var status = await _importStatusService.GetImportStatus(import.ReleaseId, import.DataFileName);

            if (status.IsFinishedOrAborting())
            {
                return;
            }

            if (SecurityUtils.HasClaim(ctx.User, SecurityClaimTypes.CancelAllFileImports))
            {
                ctx.Succeed(requirement);
            }
        }
Esempio n. 5
0
        public async Task <Either <ActionResult, ImportStatus> > GetDataFileImportStatus(Guid releaseId, string dataFileName)
        {
            return(await _persistenceHelper
                   .CheckEntityExists <Release>(releaseId)
                   .OnSuccess(_userService.CheckCanViewRelease)
                   .OnSuccess(async _ =>
            {
                var fileLink = _context
                               .ReleaseFiles
                               .Include(f => f.File)
                               .FirstOrDefault(f => f.ReleaseId == releaseId && f.File.Filename == dataFileName);

                if (fileLink == null)
                {
                    return new ImportStatus
                    {
                        Status = IStatus.NOT_FOUND
                    };
                }

                return await _importStatusService.GetImportStatus(fileLink.File.ReleaseId, dataFileName);
            }));
        }
        private async Task SplitFiles(
            ImportMessage message,
            SubjectData subjectData,
            DataTable dataFileTable)
        {
            var headerList = CsvUtil.GetColumnValues(dataFileTable.Columns);
            var batches    = dataFileTable.Rows.OfType <DataRow>().Batch(message.RowsPerBatch);
            var batchCount = 1;
            var numRows    = dataFileTable.Rows.Count + 1;
            var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / message.RowsPerBatch);

            var existingBatchFiles = await _fileStorageService.GetBatchFilesForDataFile(
                message.Release.Id,
                message.DataFileName);

            var existingBatchFileNumbers = existingBatchFiles
                                           .AsQueryable()
                                           .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName));

            // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date.
            // It is used in a legacy check to determine whether or not to generate a "no rows" batch file.
            // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file
            // situation, and whether this check can actually be entirely removed or not.
            var batchFilesExist = existingBatchFileNumbers.Any();

            foreach (var batch in batches)
            {
                var currentStatus = await _importStatusService.GetImportStatus(message.Release.Id, message.DataFileName);

                if (currentStatus.IsFinishedOrAborting())
                {
                    _logger.LogInformation($"Import for {message.DataFileName} is finished or aborting - " +
                                           $"stopping creating batch files");
                    return;
                }

                var batchFileName = $"{message.DataFileName}_{batchCount:000000}";

                if (existingBatchFileNumbers.Contains(batchCount))
                {
                    _logger.LogInformation($"Batch file {batchFileName} already exists - not recreating");
                    batchCount++;
                    continue;
                }

                var batchFilePath = $"{BatchesDir}/{batchFileName}";

                await using var stream = new MemoryStream();
                var writer = new StreamWriter(stream);
                await writer.FlushAsync();

                var table = new DataTable();
                CopyColumns(dataFileTable, table);
                CopyRows(table, batch.ToList(), headerList);

                var percentageComplete = (double)batchCount / numBatches * 100;

                await _importStatusService.UpdateStatus(message.Release.Id,
                                                        message.DataFileName,
                                                        IStatus.STAGE_3,
                                                        percentageComplete);

                // If no lines then don't create a batch or message unless it's the last one & there are zero
                // lines in total in which case create a zero lines batch
                if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist))
                {
                    batchCount++;
                    continue;
                }

                WriteDataTableToStream(table, writer);
                await writer.FlushAsync();

                stream.Seek(0, SeekOrigin.Begin);

                await _fileStorageService.UploadStream(
                    message.Release.Id,
                    fileType : FileType.Data,
                    fileName : batchFilePath,
                    stream : stream,
                    contentType : "text/csv",
                    FileStorageUtils.GetDataFileMetaValues(
                        name: subjectData.DataBlob.Name,
                        metaFileName: subjectData.DataBlob.GetMetaFileName(),
                        userName: subjectData.DataBlob.GetUserName(),
                        numberOfRows: numRows
                        )
                    );

                batchFilesExist = true;
                batchCount++;
            }
        }
        public async void ProcessUploads(
            [QueueTrigger("imports-pending")] ImportMessage message,
            ExecutionContext executionContext,
            [Queue("imports-pending")] ICollector <ImportMessage> importStagesMessageQueue,
            [Queue("imports-available")] ICollector <ImportObservationsMessage> importObservationsMessageQueue
            )
        {
            try
            {
                var status = await _importStatusService.GetImportStatus(message.Release.Id, message.DataFileName);

                _logger.LogInformation($"Processor Function processing import message for " +
                                       $"{message.DataFileName} at stage {status.Status}");

                switch (status.Status)
                {
                case IStatus.CANCELLING:
                    _logger.LogInformation($"Import for {message.DataFileName} is in the process of being " +
                                           $"cancelled, so not processing to the next import stage - marking as " +
                                           $"CANCELLED");
                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.CANCELLED,
                                                            100);

                    break;

                case IStatus.CANCELLED:
                    _logger.LogInformation($"Import for {message.DataFileName} is cancelled, so not " +
                                           $"processing any further");
                    break;

                case IStatus.QUEUED:
                case IStatus.PROCESSING_ARCHIVE_FILE:
                {
                    if (message.ArchiveFileName != "")
                    {
                        _logger.LogInformation($"Unpacking archive for {message.DataFileName}");
                        await _processorService.ProcessUnpackingArchive(message);
                    }

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_1);

                    importStagesMessageQueue.Add(message);
                    break;
                }

                case IStatus.STAGE_1:
                    await _processorService.ProcessStage1(message, executionContext);

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_2);

                    importStagesMessageQueue.Add(message);
                    break;

                case IStatus.STAGE_2:
                    await _processorService.ProcessStage2(message);

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_3);

                    importStagesMessageQueue.Add(message);
                    break;

                case IStatus.STAGE_3:
                    await _processorService.ProcessStage3(message);

                    await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName,
                                                            IStatus.STAGE_4);

                    importStagesMessageQueue.Add(message);
                    break;

                case IStatus.STAGE_4:
                    await _processorService.ProcessStage4Messages(message, importObservationsMessageQueue);

                    break;
                }
            }
            catch (Exception e)
            {
                var ex = GetInnerException(e);

                await _batchService.FailImport(message.Release.Id,
                                               message.DataFileName,
                                               new List <ValidationError>
                {
                    new ValidationError(ex.Message)
                });

                _logger.LogError(ex, $"{GetType().Name} function FAILED for : Datafile: " +
                                 $"{message.DataFileName} : {ex.Message}");
                _logger.LogError(ex.StackTrace);
            }
        }
        public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context)
        {
            var releaseId = message.ReleaseId;

            var status = await _importStatusService.GetImportStatus(releaseId, message.DataFileName);

            if (status.IsFinished())
            {
                _logger.LogInformation($"Import for {message.DataFileName} already finished with state " +
                                       $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath}");
                return;
            }

            if (status.Status == CANCELLING)
            {
                _logger.LogInformation($"Import for {message.DataFileName} is CANCELLING " +
                                       $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath} " +
                                       $"and marking import as CANCELLED");

                await _importStatusService.UpdateStatus(releaseId, message.DataFileName, CANCELLED, 100);

                return;
            }

            var subjectData = await _fileStorageService.GetSubjectData(message.ReleaseId, message.ObservationsFilePath);

            var releaseSubject = GetReleaseSubjectLink(message.ReleaseId, message.SubjectId, context);

            await using var datafileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob);

            var dataFileTable = DataTableUtils.CreateFromStream(datafileStream);

            await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob);

            var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream);

            await context.Database.CreateExecutionStrategy().Execute(async() =>
            {
                await using var transaction = await context.Database.BeginTransactionAsync();

                await _importerService.ImportObservations(
                    dataFileTable.Columns,
                    dataFileTable.Rows,
                    releaseSubject.Subject,
                    _importerService.GetMeta(metaFileTable, releaseSubject.Subject, context),
                    message.BatchNo,
                    message.RowsPerBatch,
                    context
                    );

                await transaction.CommitAsync();
                await context.Database.CloseConnectionAsync();
            });

            if (message.NumBatches > 1)
            {
                await _fileStorageService.DeleteBlobByPath(message.ObservationsFilePath);
            }

            await CheckComplete(releaseId, message, context);
        }