private static async Task <IEnumerable <string> > ValidateCsvStructureAsync(ICloudBlob blob, uint requiredNumberOfColumnsPerLine, string filetypeDescription) { var errs = new List <string>(); try { using (var blobReader = new StreamReader(await blob.OpenReadAsync(new AccessCondition(), new BlobRequestOptions(), new OperationContext()))) { var fileAttributes = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); for (var lineNumber = 0; !blobReader.EndOfStream; lineNumber++) { var errorPrefix = $@"{filetypeDescription} file '{fileAttributes.Filename}' Record {lineNumber}"; var line = blobReader.ReadLine(); var fields = line.Split(','); if (fields.Length != requiredNumberOfColumnsPerLine) { errs.Add($@"{errorPrefix} is malformed. Should have {requiredNumberOfColumnsPerLine} values; has {fields.Length}"); continue; } for (var i = 0; i < fields.Length; i++) { errorPrefix = $@"{errorPrefix} Field {i}"; var field = fields[i]; // each field must be enclosed in double quotes if (field[0] != '"' || field.Last() != '"') { errs.Add($@"{errorPrefix}: value ({field}) is not enclosed in double quotes ("")"); continue; } } } // Validate file is UTF-8 encoded if (!blobReader.CurrentEncoding.BodyName.Equals("utf-8", StringComparison.OrdinalIgnoreCase)) { errs.Add($@"{blob.Name} is not UTF-8 encoded"); } } } catch (StorageException storEx) { SwallowStorage404(storEx); } return(errs); }
public async Task NewFile(string fileUri) { var newCustomerFile = CustomerBlobAttributes.Parse(fileUri); _logger.LogInformation($@"Got new file via event: {newCustomerFile.Filename}"); this.ReceivedFileTypes.Add(newCustomerFile.Filetype); _logger.LogTrace($@"Actor '{_id}' got file '{newCustomerFile.Filetype}'"); var filesStillWaitingFor = Helpers.GetExpectedFilesForCustomer().Except(this.ReceivedFileTypes); if (filesStillWaitingFor.Any()) { _logger.LogInformation($@"Still waiting for more files... Still need {string.Join(", ", filesStillWaitingFor)} for customer {newCustomerFile.CustomerName}, batch {newCustomerFile.BatchPrefix}"); } else { _logger.LogInformation(@"Got all the files! Moving on..."); // call next step in functions with the prefix so it knows what to go grab await Helpers.DoValidationAsync($@"{newCustomerFile.ContainerName}/inbound/{newCustomerFile.BatchPrefix}", _logger); } }
public static CustomerBlobAttributes ParseEventGridPayload(dynamic eventGridItem, ILogger log) { if (eventGridItem.eventType == @"Microsoft.Storage.BlobCreated" && eventGridItem.data.api == @"PutBlob" && eventGridItem.data.contentType == @"text/csv") { try { var retVal = CustomerBlobAttributes.Parse((string)eventGridItem.data.url); if (retVal != null && !retVal.ContainerName.Equals(retVal.CustomerName)) { throw new ArgumentException($@"File '{retVal.Filename}' uploaded to container '{retVal.ContainerName}' doesn't have the right prefix: the first token in the filename ({retVal.CustomerName}) must be the customer name, which should match the container name", nameof(eventGridItem)); } return(retVal); } catch (Exception ex) { log.LogError(@"Error parsing Event Grid payload", ex); } } return(null); }
public static async Task <HttpResponseMessage> Run([HttpTrigger(AuthorizationLevel.Function, @"post", Route = @"Validate")] HttpRequestMessage req, ILogger log) { log.LogTrace(@"ValidateFileSet run."); if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var storageAccount)) { throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); } var payload = JObject.Parse(await req.Content.ReadAsStringAsync()); var prefix = payload["prefix"].ToString(); // This is the entire path w/ prefix for the file set log.LogTrace($@"prefix: {prefix}"); var filePrefix = prefix.Substring(prefix.LastIndexOf('/') + 1); log.LogTrace($@"filePrefix: {filePrefix}"); var lockTable = await Helpers.GetLockTableAsync(); if (!await ShouldProceedAsync(lockTable, prefix, filePrefix, log)) { return(req.CreateResponse(HttpStatusCode.OK)); } var blobClient = storageAccount.CreateCloudBlobClient(); var targetBlobs = await blobClient.ListBlobsAsync(WebUtility.UrlDecode(prefix)); var customerName = filePrefix.Split('_').First().Split('-').Last(); var errors = new List <string>(); var filesToProcess = payload["fileTypes"].Values <string>(); foreach (var blobDetails in targetBlobs) { var blob = await blobClient.GetBlobReferenceFromServerAsync(blobDetails.StorageUri.PrimaryUri); var fileParts = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); if (!filesToProcess.Contains(fileParts.Filetype, StringComparer.OrdinalIgnoreCase)) { log.LogTrace($@"{blob.Name} skipped. Isn't in the list of file types to process ({string.Join(", ", filesToProcess)}) for bottler '{customerName}'"); continue; } var lowerFileType = fileParts.Filetype.ToLowerInvariant(); log.LogInformation($@"Validating {lowerFileType}..."); uint numColumns = 0; switch (lowerFileType) { case @"type5": // salestype numColumns = 2; break; case @"type10": // mixedpack case @"type4": // shipfrom numColumns = 3; break; case @"type1": // channel case @"type2": // customer numColumns = 4; break; case @"type9": // itemdetail case @"type3": // shipto numColumns = 14; break; case @"type6": // salesdetail numColumns = 15; break; case @"type8": // product numColumns = 21; break; case @"type7": // sales numColumns = 23; break; default: throw new ArgumentOutOfRangeException(nameof(prefix), $@"Unhandled file type: {fileParts.Filetype}"); } errors.AddRange(await ValidateCsvStructureAsync(blob, numColumns, lowerFileType)); } try { await LockTableEntity.UpdateAsync(filePrefix, LockTableEntity.BatchState.Done, lockTable); } catch (StorageException) { log.LogWarning($@"That's weird. The lock for prefix {prefix} wasn't there. Shouldn't happen!"); return(req.CreateResponse(HttpStatusCode.OK)); } if (errors.Any()) { log.LogError($@"Errors found in batch {filePrefix}: {string.Join(@", ", errors)}"); // move files to 'invalid-set' folder await MoveBlobsAsync(log, blobClient, targetBlobs, @"invalid-set"); return(req.CreateErrorResponse(HttpStatusCode.BadRequest, string.Join(@", ", errors))); } else { // move these files to 'valid-set' folder await MoveBlobsAsync(log, blobClient, targetBlobs, @"valid-set"); log.LogInformation($@"Set {filePrefix} successfully validated and queued for further processing."); return(req.CreateResponse(HttpStatusCode.OK)); } }
public static async Task <bool> DoValidationAsync(string prefix, ILogger logger = null) { logger?.LogTrace(@"ValidateFileSet run."); if (!CloudStorageAccount.TryParse(Environment.GetEnvironmentVariable(@"CustomerBlobStorage"), out var storageAccount)) { throw new Exception(@"Can't create a storage account accessor from app setting connection string, sorry!"); } logger?.LogTrace($@"prefix: {prefix}"); var filePrefix = prefix.Substring(prefix.LastIndexOf('/') + 1); logger?.LogTrace($@"filePrefix: {filePrefix}"); var blobClient = storageAccount.CreateCloudBlobClient(); var targetBlobs = await blobClient.ListBlobsAsync(WebUtility.UrlDecode(prefix)); var customerName = filePrefix.Split('_').First().Split('-').Last(); var errors = new List <string>(); var expectedFiles = Helpers.GetExpectedFilesForCustomer(); foreach (var blobDetails in targetBlobs) { var blob = await blobClient.GetBlobReferenceFromServerAsync(blobDetails.StorageUri.PrimaryUri); var fileParts = CustomerBlobAttributes.Parse(blob.Uri.AbsolutePath); if (!expectedFiles.Contains(fileParts.Filetype, StringComparer.OrdinalIgnoreCase)) { logger?.LogTrace($@"{blob.Name} skipped. Isn't in the list of file types to process ({string.Join(", ", expectedFiles)}) for customer '{customerName}'"); continue; } var lowerFileType = fileParts.Filetype.ToLowerInvariant(); uint numColumns = 0; switch (lowerFileType) { case @"type5": // salestype numColumns = 2; break; case @"type10": // mixed case @"type4": // shipfrom numColumns = 3; break; case @"type1": // channel case @"type2": // customer numColumns = 4; break; case @"type9": // itemdetail numColumns = 5; break; case @"type3": // shipto numColumns = 14; break; case @"type6": // salesdetail numColumns = 15; break; case @"type8": // product numColumns = 21; break; case @"type7": // sales numColumns = 23; break; default: throw new ArgumentOutOfRangeException(nameof(prefix), $@"Unhandled file type: {fileParts.Filetype}"); } errors.AddRange(await ValidateCsvStructureAsync(blob, numColumns, lowerFileType)); } if (errors.Any()) { logger.LogError($@"Errors found in batch {filePrefix}: {string.Join(@", ", errors)}"); // move files to 'invalid-set' folder await Helpers.MoveBlobsAsync(blobClient, targetBlobs, @"invalid-set", logger); return(false); } else { // move these files to 'valid-set' folder await Helpers.MoveBlobsAsync(blobClient, targetBlobs, @"valid-set", logger); logger.LogInformation($@"Set {filePrefix} successfully validated and queued for further processing."); return(true); } }