コード例 #1
0
        private async Task SplitFiles(
            DataImport dataImport,
            DataTable dataFileTable)
        {
            var colValues  = CsvUtil.GetColumnValues(dataFileTable.Columns);
            var batches    = dataFileTable.Rows.OfType <DataRow>().Batch(dataImport.RowsPerBatch);
            var batchCount = 1;
            var numRows    = dataFileTable.Rows.Count + 1;
            var numBatches = (int)Math.Ceiling((double)dataFileTable.Rows.Count / dataImport.RowsPerBatch);

            var existingBatchFiles = await _batchService.GetBatchFilesForDataFile(dataImport.File);

            var existingBatchFileNumbers = existingBatchFiles
                                           .AsQueryable()
                                           .Select(blobInfo => GetBatchNumberFromBatchFileName(blobInfo.FileName));

            // TODO: EES-1608 - this flag keeps a track of whether any batch files have been generated to date.
            // It is used in a legacy check to determine whether or not to generate a "no rows" batch file.
            // EES-1608 will investigate what the circumstances are that could lead to a "no rows" batch file
            // situation, and whether this check can actually be entirely removed or not.
            var batchFilesExist = existingBatchFileNumbers.Any();

            foreach (var batch in batches)
            {
                var currentStatus = await _dataImportService.GetImportStatus(dataImport.Id);

                if (currentStatus.IsFinishedOrAborting())
                {
                    _logger.LogInformation(
                        $"Import for {dataImport.File.Filename} is finished or aborting - stopping creating batch files");
                    return;
                }

                if (existingBatchFileNumbers.Contains(batchCount))
                {
                    _logger.LogInformation($"Batch {batchCount} already exists - not recreating");
                    batchCount++;
                    continue;
                }

                await using var stream = new MemoryStream();
                var writer = new StreamWriter(stream);
                await writer.FlushAsync();

                var table = new DataTable();
                CopyColumns(dataFileTable, table);
                CopyRows(table, batch.ToList(), colValues, dataImport.HasSoleGeographicLevel());

                var percentageComplete = (double)batchCount / numBatches * 100;

                await _dataImportService.UpdateStatus(dataImport.Id, DataImportStatus.STAGE_3, percentageComplete);

                // If no lines then don't create a batch unless it's the last one & there are zero
                // lines in total in which case create a zero lines batch
                if (table.Rows.Count == 0 && (batchCount != numBatches || batchFilesExist))
                {
                    _logger.LogInformation($"Skipping batch file for row count {table.Rows.Count} with batchCount {batchCount} and numBatches {numBatches} and batchFilesExist {batchFilesExist} and batch {batch.Count()}");
                    batchCount++;
                    continue;
                }

                WriteDataTableToStream(table, writer);
                await writer.FlushAsync();

                stream.Seek(0, SeekOrigin.Begin);

                await _blobStorageService.UploadStream(
                    containerName : PrivateReleaseFiles,
                    path : dataImport.File.BatchPath(batchCount),
                    stream : stream,
                    contentType : "text/csv",
                    metadata : GetDataFileMetaValues(
                        metaFileName: dataImport.MetaFile.Filename,
                        numberOfRows: numRows
                        ));

                batchFilesExist = true;
                batchCount++;
            }
        }