public override async Task Run() { var ftpClient = new FtpRawLogClient(LoggerFactory, _configuration.FtpSourceUsername, _configuration.FtpSourcePassword); var azureClient = new CloudBlobRawLogClient(LoggerFactory, _cloudStorageAccount); // Collect directory listing. var rawLogFileUris = await ftpClient.GetRawLogFileUris(_ftpServerUri); // Prepare cloud storage blob container. var cloudBlobContainer = await azureClient.CreateContainerIfNotExistsAsync(_configuration.AzureCdnCloudStorageContainerName); foreach (var rawLogFileUri in rawLogFileUris) { try { var rawLogFile = new RawLogFileInfo(rawLogFileUri); if (_azureCdnPlatform != rawLogFile.AzureCdnPlatform || !_configuration.AzureCdnAccountNumber.Equals(rawLogFile.AzureCdnAccountNumber, StringComparison.InvariantCultureIgnoreCase)) { // Only process the raw log files matching the target CDN platform and account number. continue; } var skipProcessing = false; var uploadSucceeded = false; var rawLogUri = rawLogFile.Uri; // Check if this is an already renamed file: // This would indicate that the file is being processed already (by another instance of this job), // or that the file is being reprocessed (and the ".download" renamed file was left behind). if (rawLogFile.IsPendingDownload) { // In order to support reprocessing ".gz" files, // we only skip processing ".download" files that have been successfully uploaded to blob storage, // which only happens when they have been processed successfully. // Check if the original ".gz" file has already been uploaded to blob storage. // If it already was uploaded to blob storage, // we can skip processing this ".download" file and delete it from the FTP server. var originalFileName = rawLogFile.FileName.Substring(0, rawLogFile.FileName.Length - FileExtensions.Download.Length); skipProcessing = await azureClient.CheckIfBlobExistsAsync(cloudBlobContainer, originalFileName); } else { // We are processing a ".gz" file. // Check if the file has already been uploaded to blob storage: are we reprocessing it? var isReprocessing = await azureClient.CheckIfBlobExistsAsync(cloudBlobContainer, rawLogFile.FileName); if (isReprocessing) { // As we are reprocessing this ".gz" file, // we should first delete the ".download" file if it already exists on the FTP server. var downloadFileUri = new Uri(rawLogFile.Uri + FileExtensions.Download); await ftpClient.DeleteAsync(downloadFileUri); } // Rename the file on the origin to ensure we're not locking a file that still can be written to. var downloadFileName = rawLogFile.FileName + FileExtensions.Download; rawLogUri = await ftpClient.RenameAsync(rawLogFile, downloadFileName); if (rawLogUri == null) { // Failed to rename the file. Leave it and try again later. continue; } } // Skip already processed ".download" files. if (!skipProcessing) { // open the raw log from FTP using (var rawLogStream = await ftpClient.OpenReadAsync(rawLogUri)) using (var rawLogStreamInMemory = new MemoryStream()) { // copy the raw, compressed stream to memory - FTP does not like reading line by line await rawLogStream.CopyToAsync(rawLogStreamInMemory); rawLogStreamInMemory.Position = 0; // process the raw, compressed memory stream using (var rawGzipStream = new GZipInputStream(rawLogStreamInMemory)) { // ensure the .download suffix is trimmed away var fileName = rawLogFile.FileName.Replace(".download", string.Empty); using (Logger.BeginScope("Started uploading file '{FileName}' to {BlobUri}.", fileName, rawLogFile.Uri.ToString())) { try { // open the resulting cleaned blob and stream modified entries // note the missing using() statement so that we can skip committing if an exception occurs var resultLogStream = await azureClient.OpenBlobForWriteAsync(cloudBlobContainer, rawLogFile, fileName); try { using (var resultGzipStream = new GZipOutputStream(resultLogStream)) { resultGzipStream.IsStreamOwner = false; ProcessLogStream(rawGzipStream, resultGzipStream, fileName); resultGzipStream.Flush(); } // commit to blob storage resultLogStream.Commit(); uploadSucceeded = true; } catch { uploadSucceeded = false; throw; } Logger.LogInformation("Finished uploading file."); } catch (Exception exception) { Logger.LogError( LogEvents.FailedBlobUpload, exception, LogMessages.FailedBlobUpload, rawLogUri); } } } } } // Delete the renamed file from the origin. if (skipProcessing || uploadSucceeded) { await ftpClient.DeleteAsync(rawLogUri); } } catch (UnknownAzureCdnPlatformException exception) { // Log the failing file, but ignore it. Other files should go through just fine. Logger.LogWarning( LogEvents.UnknownAzureCdnPlatform, exception, LogMessages.UnknownAzureCdnPlatform); } catch (InvalidRawLogFileNameException exception) { // Log the failing file, but ignore it. Other files should go through just fine. Logger.LogWarning( LogEvents.InvalidRawLogFileName, exception, LogMessages.InvalidRawLogFileName); } } }
public override async Task <bool> Run() { try { var ftpClient = new FtpRawLogClient(JobEventSource.Log, _ftpUsername, _ftpPassword); var azureClient = new CloudBlobRawLogClient(JobEventSource.Log, _cloudStorageAccount); // Collect directory listing. IEnumerable <RawLogFileInfo> rawLogFiles = await ftpClient.GetRawLogFiles(_ftpServerUri); // Prepare cloud storage blob container. var cloudBlobContainer = await azureClient.CreateContainerIfNotExistsAsync(_cloudStorageContainerName); foreach (var rawLogFile in rawLogFiles) { try { // Only process the raw log files matching the target CDN platform and account number. if (_azureCdnPlatform == rawLogFile.AzureCdnPlatform && _azureCdnAccountNumber.Equals(rawLogFile.AzureCdnAccountNumber, StringComparison.InvariantCultureIgnoreCase)) { bool alreadyUploaded = false; bool uploadSucceeded = false; Uri rawLogUri = rawLogFile.Uri; // Check if this is an already renamed file. if (rawLogFile.IsPendingDownload) { // Check if the file has already been uploaded to blob storage. alreadyUploaded = await azureClient.CheckIfBlobExistsAsync(cloudBlobContainer, rawLogFile); } else { // Rename the file on the origin to ensure we're not locking a file that still can be written to. rawLogUri = await ftpClient.RenameAsync(rawLogFile, rawLogFile.FileName + FileExtensions.Download); if (rawLogUri == null) { // Failed to rename the file. Leave it and try again later. continue; } } if (!alreadyUploaded) { // open the raw log from FTP using (var rawLogStream = await ftpClient.OpenReadAsync(rawLogUri)) using (var rawLogStreamInMemory = new MemoryStream()) { // copy the raw, compressed stream to memory - FTP does not like reading line by line await rawLogStream.CopyToAsync(rawLogStreamInMemory); rawLogStreamInMemory.Position = 0; // process the raw, compressed memory stream using (var rawGzipStream = new GZipInputStream(rawLogStreamInMemory)) { try { // ensure the .download suffix is trimmed away var fileName = rawLogFile.FileName.Replace(".download", string.Empty); Trace.TraceInformation("Uploading file '{0}'.", fileName); JobEventSource.Log.BeginningBlobUpload(rawLogFile.Uri.ToString()); // open the resulting cleaned blob and stream modified entries // note the missing using() statement so that we can skip committing if an exception occurs var resultLogStream = await azureClient.OpenBlobForWriteAsync(cloudBlobContainer, rawLogFile, fileName); try { using (var resultGzipStream = new GZipOutputStream(resultLogStream)) { resultGzipStream.IsStreamOwner = false; ProcessLogStream(rawGzipStream, resultGzipStream); resultGzipStream.Flush(); } // commit to blob storage resultLogStream.Commit(); uploadSucceeded = true; } catch { resultLogStream = null; uploadSucceeded = false; throw; } Trace.TraceInformation("Finished uploading file '{0}' to '{1}'.", fileName, rawLogFile.Uri.AbsoluteUri); JobEventSource.Log.FinishingBlobUpload(rawLogFile.Uri.ToString()); } catch (Exception exception) { Trace.TraceError(exception.ToString()); JobEventSource.Log.FailedToUploadFile(rawLogFile.Uri.ToString(), exception.ToString()); } } } } // Delete the renamed file from the origin. if (alreadyUploaded || uploadSucceeded) { await ftpClient.DeleteAsync(rawLogUri); } } } catch (UnknownAzureCdnPlatformException exception) { // Trace, but ignore the failing file. Other files should go through just fine. Trace.TraceWarning(exception.ToString()); } catch (InvalidRawLogFileNameException exception) { // Trace, but ignore the failing file. Other files should go through just fine. Trace.TraceWarning(exception.ToString()); } } return(true); } catch (Exception exception) { Trace.TraceError(exception.ToString()); } return(false); }