private async Task SplitFiles( DataImport dataImport, DataTable dataFileTable) { var colValues = CsvUtil.GetColumnValues(dataFileTable.Columns); var batches = dataFileTable.Rows.OfType <DataRow>().Batch(dataImport.RowsPerBatch); var batchCount = 1; var numRows = dataFileTable.Rows.Count + 1; var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / dataImport.RowsPerBatch); var existingBatchFiles = await _batchService.GetBatchFilesForDataFile(dataImport.File); var existingBatchFileNumbers = existingBatchFiles .AsQueryable() .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName)); // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date. // It is used in a legacy check to determine whether or not to generate a "no rows" batch file. // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file // situation, and whether this check can actually be entirely removed or not. var batchFilesExist = existingBatchFileNumbers.Any(); foreach (var batch in batches) { var currentStatus = await _dataImportService.GetImportStatus(dataImport.Id); if (currentStatus.IsFinishedOrAborting()) { _logger.LogInformation( $"Import for {dataImport.File.Filename} is finished or aborting - stopping creating batch files"); return; } if (existingBatchFileNumbers.Contains(batchCount)) { _logger.LogInformation($"Batch {batchCount} already exists - not recreating"); batchCount++; continue; } await using var stream = new MemoryStream(); var writer = new StreamWriter(stream); await writer.FlushAsync(); var table = new DataTable(); CopyColumns(dataFileTable, table); CopyRows(table, batch.ToList(), colValues, dataImport.HasSoleGeographicLevel()); var percentageComplete = (double)batchCount / numBatches * 100; await _dataImportService.UpdateStatus(dataImport.Id, DataImportStatus.STAGE_3, percentageComplete); // If no lines then don't create a batch unless it's the last one & there are zero // lines in total in which case create a zero lines batch if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist)) { _logger.LogInformation($"Skipping batch file for row count {table.Rows.Count} with batchCount {batchCount} and numBatches {numBatches} and batchFilesExist {batchFilesExist} and batch {batch.Count()}"); batchCount++; continue; } WriteDataTableToStream(table, writer); await writer.FlushAsync(); stream.Seek(0, SeekOrigin.Begin); await _blobStorageService.UploadStream( containerName : PrivateReleaseFiles, path : dataImport.File.BatchPath(batchCount), stream : stream, contentType : "text/csv", metadata : GetDataFileMetaValues( metaFileName: dataImport.MetaFile.Filename, numberOfRows: numRows )); batchFilesExist = true; batchCount++; } }