public async Task ImportFiltersAndLocations(DataColumnCollection cols, DataRowCollection rows, SubjectMeta subjectMeta, StatisticsDbContext context, Guid releaseId, string dataFileName) { // Clearing the caches is required here as the seeder shares the cache with all subjects _importerFilterService.ClearCache(); _importerLocationService.ClearCache(); var headers = CsvUtil.GetColumnValues(cols); var rowCount = 1; var totalRows = rows.Count; foreach (DataRow row in rows) { if (rowCount % STAGE_2_ROW_CHECK == 0) { var currentStatus = await _importStatusService.GetImportStatus(releaseId, dataFileName); if (currentStatus.IsFinishedOrAborting()) { _logger.LogInformation($"Import for {dataFileName} has finished or is being aborted, " + $"so finishing importing Filters and Locations early"); return; } await _importStatusService.UpdateStatus(releaseId, dataFileName, IStatus.STAGE_2, (double)rowCount / totalRows * 100); } CreateFiltersAndLocationsFromCsv(context, CsvUtil.GetRowValues(row), headers, subjectMeta.Filters); rowCount++; } }
private async Task <DataFileInfo> GetDataFileInfo(Guid releaseId, File dataFile) { // Files should exists in storage but if not then allow user to delete var blobExists = await _blobStorageService.CheckBlobExists(PrivateFilesContainerName, dataFile.Path()); if (!blobExists) { return(await GetFallbackDataFileInfo(releaseId, dataFile)); } var blob = await _blobStorageService.GetBlob(PrivateFilesContainerName, dataFile.Path()); // If the file does exist then it could possibly be // partially uploaded so make sure meta data exists for it if (string.IsNullOrEmpty(blob.GetUserName())) { return(await GetFallbackDataFileInfo(releaseId, dataFile)); } var metaFile = await GetAssociatedReleaseFileReference(dataFile, Metadata); var importStatus = await _importStatusService.GetImportStatus(dataFile.ReleaseId, dataFile.Filename); return(new DataFileInfo { Id = dataFile.Id, FileName = dataFile.Filename, Name = dataFile.SubjectId.HasValue ? await GetSubjectName(dataFile) : blob.Name, Path = blob.Path, Size = blob.Size, MetaFileId = metaFile.Id, MetaFileName = blob.GetMetaFileName(), ReplacedBy = dataFile.ReplacedById, Rows = blob.GetNumberOfRows(), UserName = blob.GetUserName(), Status = importStatus.Status, Created = blob.Created, Permissions = await _userService.GetDataFilePermissions(releaseId, blob.FileName) }); }
public async Task <Either <ActionResult, SubjectsMetaViewModel> > GetSubjects(Guid releaseId) { return(await _contentPersistenceHelper .CheckEntityExists <Release>(releaseId) .OnSuccess(_userService.CheckCanViewRelease) .OnSuccess(release => { var files = _contentDbContext.ReleaseFiles .Include(file => file.File) .Where(file => file.ReleaseId == releaseId && file.File.Type == FileType.Data) .Select(file => file.File); // Exclude files that are replacements in progress var filesExcludingReplacements = files .Where(file => !file.ReplacingId.HasValue) .ToList(); var subjectIds = filesExcludingReplacements .WhereAsync( async file => { // Not optimal, ideally we should be able to fetch // the status with the file reference itself. // TODO EES-1231 Move imports table into database var importStatus = await _importStatusService .GetImportStatus(file.ReleaseId, file.Filename); return importStatus.Status == IStatus.COMPLETE; } ) .Select(file => file.SubjectId) .ToList(); var subjects = _statisticsDbContext.ReleaseSubject .Include(subject => subject.Subject) .Where(subject => subject.ReleaseId == releaseId && subjectIds.Contains(subject.SubjectId)) .Select(subject => new IdLabel( subject.Subject.Id, subject.Subject.Name)) .ToList(); return new SubjectsMetaViewModel { ReleaseId = releaseId, Subjects = subjects }; })); }
protected override async Task HandleRequirementAsync(AuthorizationHandlerContext ctx, CancelSpecificFileImportRequirement requirement, ReleaseFileImportInfo import) { var status = await _importStatusService.GetImportStatus(import.ReleaseId, import.DataFileName); if (status.IsFinishedOrAborting()) { return; } if (SecurityUtils.HasClaim(ctx.User, SecurityClaimTypes.CancelAllFileImports)) { ctx.Succeed(requirement); } }
public async Task <Either <ActionResult, ImportStatus> > GetDataFileImportStatus(Guid releaseId, string dataFileName) { return(await _persistenceHelper .CheckEntityExists <Release>(releaseId) .OnSuccess(_userService.CheckCanViewRelease) .OnSuccess(async _ => { var fileLink = _context .ReleaseFiles .Include(f => f.File) .FirstOrDefault(f => f.ReleaseId == releaseId && f.File.Filename == dataFileName); if (fileLink == null) { return new ImportStatus { Status = IStatus.NOT_FOUND }; } return await _importStatusService.GetImportStatus(fileLink.File.ReleaseId, dataFileName); })); }
private async Task SplitFiles( ImportMessage message, SubjectData subjectData, DataTable dataFileTable) { var headerList = CsvUtil.GetColumnValues(dataFileTable.Columns); var batches = dataFileTable.Rows.OfType <DataRow>().Batch(message.RowsPerBatch); var batchCount = 1; var numRows = dataFileTable.Rows.Count + 1; var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / message.RowsPerBatch); var existingBatchFiles = await _fileStorageService.GetBatchFilesForDataFile( message.Release.Id, message.DataFileName); var existingBatchFileNumbers = existingBatchFiles .AsQueryable() .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName)); // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date. // It is used in a legacy check to determine whether or not to generate a "no rows" batch file. // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file // situation, and whether this check can actually be entirely removed or not. var batchFilesExist = existingBatchFileNumbers.Any(); foreach (var batch in batches) { var currentStatus = await _importStatusService.GetImportStatus(message.Release.Id, message.DataFileName); if (currentStatus.IsFinishedOrAborting()) { _logger.LogInformation($"Import for {message.DataFileName} is finished or aborting - " + $"stopping creating batch files"); return; } var batchFileName = $"{message.DataFileName}_{batchCount:000000}"; if (existingBatchFileNumbers.Contains(batchCount)) { _logger.LogInformation($"Batch file {batchFileName} already exists - not recreating"); batchCount++; continue; } var batchFilePath = $"{BatchesDir}/{batchFileName}"; await using var stream = new MemoryStream(); var writer = new StreamWriter(stream); await writer.FlushAsync(); var table = new DataTable(); CopyColumns(dataFileTable, table); CopyRows(table, batch.ToList(), headerList); var percentageComplete = (double)batchCount / numBatches * 100; await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_3, percentageComplete); // If no lines then don't create a batch or message unless it's the last one & there are zero // lines in total in which case create a zero lines batch if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist)) { batchCount++; continue; } WriteDataTableToStream(table, writer); await writer.FlushAsync(); stream.Seek(0, SeekOrigin.Begin); await _fileStorageService.UploadStream( message.Release.Id, fileType : FileType.Data, fileName : batchFilePath, stream : stream, contentType : "text/csv", FileStorageUtils.GetDataFileMetaValues( name: subjectData.DataBlob.Name, metaFileName: subjectData.DataBlob.GetMetaFileName(), userName: subjectData.DataBlob.GetUserName(), numberOfRows: numRows ) ); batchFilesExist = true; batchCount++; } }
public async void ProcessUploads( [QueueTrigger("imports-pending")] ImportMessage message, ExecutionContext executionContext, [Queue("imports-pending")] ICollector <ImportMessage> importStagesMessageQueue, [Queue("imports-available")] ICollector <ImportObservationsMessage> importObservationsMessageQueue ) { try { var status = await _importStatusService.GetImportStatus(message.Release.Id, message.DataFileName); _logger.LogInformation($"Processor Function processing import message for " + $"{message.DataFileName} at stage {status.Status}"); switch (status.Status) { case IStatus.CANCELLING: _logger.LogInformation($"Import for {message.DataFileName} is in the process of being " + $"cancelled, so not processing to the next import stage - marking as " + $"CANCELLED"); await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.CANCELLED, 100); break; case IStatus.CANCELLED: _logger.LogInformation($"Import for {message.DataFileName} is cancelled, so not " + $"processing any further"); break; case IStatus.QUEUED: case IStatus.PROCESSING_ARCHIVE_FILE: { if (message.ArchiveFileName != "") { _logger.LogInformation($"Unpacking archive for {message.DataFileName}"); await _processorService.ProcessUnpackingArchive(message); } await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_1); importStagesMessageQueue.Add(message); break; } case IStatus.STAGE_1: await _processorService.ProcessStage1(message, executionContext); await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_2); importStagesMessageQueue.Add(message); break; case IStatus.STAGE_2: await _processorService.ProcessStage2(message); await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_3); importStagesMessageQueue.Add(message); break; case IStatus.STAGE_3: await _processorService.ProcessStage3(message); await _importStatusService.UpdateStatus(message.Release.Id, message.DataFileName, IStatus.STAGE_4); importStagesMessageQueue.Add(message); break; case IStatus.STAGE_4: await _processorService.ProcessStage4Messages(message, importObservationsMessageQueue); break; } } catch (Exception e) { var ex = GetInnerException(e); await _batchService.FailImport(message.Release.Id, message.DataFileName, new List <ValidationError> { new ValidationError(ex.Message) }); _logger.LogError(ex, $"{GetType().Name} function FAILED for : Datafile: " + $"{message.DataFileName} : {ex.Message}"); _logger.LogError(ex.StackTrace); } }
public async Task ImportObservations(ImportObservationsMessage message, StatisticsDbContext context) { var releaseId = message.ReleaseId; var status = await _importStatusService.GetImportStatus(releaseId, message.DataFileName); if (status.IsFinished()) { _logger.LogInformation($"Import for {message.DataFileName} already finished with state " + $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath}"); return; } if (status.Status == CANCELLING) { _logger.LogInformation($"Import for {message.DataFileName} is CANCELLING " + $"{status.Status} - ignoring Observations in file {message.ObservationsFilePath} " + $"and marking import as CANCELLED"); await _importStatusService.UpdateStatus(releaseId, message.DataFileName, CANCELLED, 100); return; } var subjectData = await _fileStorageService.GetSubjectData(message.ReleaseId, message.ObservationsFilePath); var releaseSubject = GetReleaseSubjectLink(message.ReleaseId, message.SubjectId, context); await using var datafileStream = await _fileStorageService.StreamBlob(subjectData.DataBlob); var dataFileTable = DataTableUtils.CreateFromStream(datafileStream); await using var metaFileStream = await _fileStorageService.StreamBlob(subjectData.MetaBlob); var metaFileTable = DataTableUtils.CreateFromStream(metaFileStream); await context.Database.CreateExecutionStrategy().Execute(async() => { await using var transaction = await context.Database.BeginTransactionAsync(); await _importerService.ImportObservations( dataFileTable.Columns, dataFileTable.Rows, releaseSubject.Subject, _importerService.GetMeta(metaFileTable, releaseSubject.Subject, context), message.BatchNo, message.RowsPerBatch, context ); await transaction.CommitAsync(); await context.Database.CloseConnectionAsync(); }); if (message.NumBatches > 1) { await _fileStorageService.DeleteBlobByPath(message.ObservationsFilePath); } await CheckComplete(releaseId, message, context); }