Пример #1
0
        public override async Task Run()
        {
            var ftpClient   = new FtpRawLogClient(LoggerFactory, _configuration.FtpSourceUsername, _configuration.FtpSourcePassword);
            var azureClient = new CloudBlobRawLogClient(LoggerFactory, _cloudStorageAccount);

            // Collect directory listing.
            var rawLogFileUris = await ftpClient.GetRawLogFileUris(_ftpServerUri);

            // Prepare cloud storage blob container.
            var cloudBlobContainer = await azureClient.CreateContainerIfNotExistsAsync(_configuration.AzureCdnCloudStorageContainerName);

            foreach (var rawLogFileUri in rawLogFileUris)
            {
                try
                {
                    var rawLogFile = new RawLogFileInfo(rawLogFileUri);

                    if (_azureCdnPlatform != rawLogFile.AzureCdnPlatform ||
                        !_configuration.AzureCdnAccountNumber.Equals(rawLogFile.AzureCdnAccountNumber, StringComparison.InvariantCultureIgnoreCase))
                    {
                        // Only process the raw log files matching the target CDN platform and account number.
                        continue;
                    }

                    var skipProcessing  = false;
                    var uploadSucceeded = false;
                    var rawLogUri       = rawLogFile.Uri;

                    // Check if this is an already renamed file:
                    // This would indicate that the file is being processed already (by another instance of this job),
                    // or that the file is being reprocessed (and the ".download" renamed file was left behind).
                    if (rawLogFile.IsPendingDownload)
                    {
                        // In order to support reprocessing ".gz" files,
                        // we only skip processing ".download" files that have been successfully uploaded to blob storage,
                        // which only happens when they have been processed successfully.
                        // Check if the original ".gz" file has already been uploaded to blob storage.
                        // If it already was uploaded to blob storage,
                        // we can skip processing this ".download" file and delete it from the FTP server.
                        var originalFileName = rawLogFile.FileName.Substring(0, rawLogFile.FileName.Length - FileExtensions.Download.Length);
                        skipProcessing = await azureClient.CheckIfBlobExistsAsync(cloudBlobContainer, originalFileName);
                    }
                    else
                    {
                        // We are processing a ".gz" file.
                        // Check if the file has already been uploaded to blob storage: are we reprocessing it?
                        var isReprocessing = await azureClient.CheckIfBlobExistsAsync(cloudBlobContainer, rawLogFile.FileName);

                        if (isReprocessing)
                        {
                            // As we are reprocessing this ".gz" file,
                            // we should first delete the ".download" file if it already exists on the FTP server.
                            var downloadFileUri = new Uri(rawLogFile.Uri + FileExtensions.Download);
                            await ftpClient.DeleteAsync(downloadFileUri);
                        }

                        // Rename the file on the origin to ensure we're not locking a file that still can be written to.
                        var downloadFileName = rawLogFile.FileName + FileExtensions.Download;
                        rawLogUri = await ftpClient.RenameAsync(rawLogFile, downloadFileName);

                        if (rawLogUri == null)
                        {
                            // Failed to rename the file. Leave it and try again later.
                            continue;
                        }
                    }

                    // Skip already processed ".download" files.
                    if (!skipProcessing)
                    {
                        // open the raw log from FTP
                        using (var rawLogStream = await ftpClient.OpenReadAsync(rawLogUri))
                            using (var rawLogStreamInMemory = new MemoryStream())
                            {
                                // copy the raw, compressed stream to memory - FTP does not like reading line by line
                                await rawLogStream.CopyToAsync(rawLogStreamInMemory);

                                rawLogStreamInMemory.Position = 0;

                                // process the raw, compressed memory stream
                                using (var rawGzipStream = new GZipInputStream(rawLogStreamInMemory))
                                {
                                    // ensure the .download suffix is trimmed away
                                    var fileName = rawLogFile.FileName.Replace(".download", string.Empty);

                                    using (Logger.BeginScope("Started uploading file '{FileName}' to {BlobUri}.", fileName, rawLogFile.Uri.ToString()))
                                    {
                                        try
                                        {
                                            // open the resulting cleaned blob and stream modified entries
                                            // note the missing using() statement so that we can skip committing if an exception occurs
                                            var resultLogStream = await azureClient.OpenBlobForWriteAsync(cloudBlobContainer, rawLogFile, fileName);

                                            try
                                            {
                                                using (var resultGzipStream = new GZipOutputStream(resultLogStream))
                                                {
                                                    resultGzipStream.IsStreamOwner = false;

                                                    ProcessLogStream(rawGzipStream, resultGzipStream, fileName);

                                                    resultGzipStream.Flush();
                                                }

                                                // commit to blob storage
                                                resultLogStream.Commit();

                                                uploadSucceeded = true;
                                            }
                                            catch
                                            {
                                                uploadSucceeded = false;
                                                throw;
                                            }

                                            Logger.LogInformation("Finished uploading file.");
                                        }
                                        catch (Exception exception)
                                        {
                                            Logger.LogError(
                                                LogEvents.FailedBlobUpload,
                                                exception,
                                                LogMessages.FailedBlobUpload,
                                                rawLogUri);
                                        }
                                    }
                                }
                            }
                    }

                    // Delete the renamed file from the origin.
                    if (skipProcessing || uploadSucceeded)
                    {
                        await ftpClient.DeleteAsync(rawLogUri);
                    }
                }
                catch (UnknownAzureCdnPlatformException exception)
                {
                    // Log the failing file, but ignore it. Other files should go through just fine.
                    Logger.LogWarning(
                        LogEvents.UnknownAzureCdnPlatform,
                        exception,
                        LogMessages.UnknownAzureCdnPlatform);
                }
                catch (InvalidRawLogFileNameException exception)
                {
                    // Log the failing file, but ignore it. Other files should go through just fine.
                    Logger.LogWarning(
                        LogEvents.InvalidRawLogFileName,
                        exception,
                        LogMessages.InvalidRawLogFileName);
                }
            }
        }
Пример #2
0
        public override async Task <bool> Run()
        {
            try
            {
                var ftpClient   = new FtpRawLogClient(JobEventSource.Log, _ftpUsername, _ftpPassword);
                var azureClient = new CloudBlobRawLogClient(JobEventSource.Log, _cloudStorageAccount);

                // Collect directory listing.
                IEnumerable <RawLogFileInfo> rawLogFiles = await ftpClient.GetRawLogFiles(_ftpServerUri);

                // Prepare cloud storage blob container.
                var cloudBlobContainer = await azureClient.CreateContainerIfNotExistsAsync(_cloudStorageContainerName);

                foreach (var rawLogFile in rawLogFiles)
                {
                    try
                    {
                        // Only process the raw log files matching the target CDN platform and account number.
                        if (_azureCdnPlatform == rawLogFile.AzureCdnPlatform && _azureCdnAccountNumber.Equals(rawLogFile.AzureCdnAccountNumber, StringComparison.InvariantCultureIgnoreCase))
                        {
                            bool alreadyUploaded = false;
                            bool uploadSucceeded = false;
                            Uri  rawLogUri       = rawLogFile.Uri;

                            // Check if this is an already renamed file.
                            if (rawLogFile.IsPendingDownload)
                            {
                                // Check if the file has already been uploaded to blob storage.
                                alreadyUploaded = await azureClient.CheckIfBlobExistsAsync(cloudBlobContainer, rawLogFile);
                            }
                            else
                            {
                                // Rename the file on the origin to ensure we're not locking a file that still can be written to.
                                rawLogUri = await ftpClient.RenameAsync(rawLogFile, rawLogFile.FileName + FileExtensions.Download);

                                if (rawLogUri == null)
                                {
                                    // Failed to rename the file. Leave it and try again later.
                                    continue;
                                }
                            }

                            if (!alreadyUploaded)
                            {
                                // open the raw log from FTP
                                using (var rawLogStream = await ftpClient.OpenReadAsync(rawLogUri))
                                    using (var rawLogStreamInMemory = new MemoryStream())
                                    {
                                        // copy the raw, compressed stream to memory - FTP does not like reading line by line
                                        await rawLogStream.CopyToAsync(rawLogStreamInMemory);

                                        rawLogStreamInMemory.Position = 0;

                                        // process the raw, compressed memory stream
                                        using (var rawGzipStream = new GZipInputStream(rawLogStreamInMemory))
                                        {
                                            try
                                            {
                                                // ensure the .download suffix is trimmed away
                                                var fileName = rawLogFile.FileName.Replace(".download", string.Empty);

                                                Trace.TraceInformation("Uploading file '{0}'.", fileName);
                                                JobEventSource.Log.BeginningBlobUpload(rawLogFile.Uri.ToString());

                                                // open the resulting cleaned blob and stream modified entries
                                                // note the missing using() statement so that we can skip committing if an exception occurs
                                                var resultLogStream = await azureClient.OpenBlobForWriteAsync(cloudBlobContainer, rawLogFile, fileName);

                                                try
                                                {
                                                    using (var resultGzipStream = new GZipOutputStream(resultLogStream))
                                                    {
                                                        resultGzipStream.IsStreamOwner = false;

                                                        ProcessLogStream(rawGzipStream, resultGzipStream);

                                                        resultGzipStream.Flush();
                                                    }

                                                    // commit to blob storage
                                                    resultLogStream.Commit();

                                                    uploadSucceeded = true;
                                                }
                                                catch
                                                {
                                                    resultLogStream = null;
                                                    uploadSucceeded = false;
                                                    throw;
                                                }

                                                Trace.TraceInformation("Finished uploading file '{0}' to '{1}'.", fileName, rawLogFile.Uri.AbsoluteUri);
                                                JobEventSource.Log.FinishingBlobUpload(rawLogFile.Uri.ToString());
                                            }
                                            catch (Exception exception)
                                            {
                                                Trace.TraceError(exception.ToString());
                                                JobEventSource.Log.FailedToUploadFile(rawLogFile.Uri.ToString(), exception.ToString());
                                            }
                                        }
                                    }
                            }

                            // Delete the renamed file from the origin.
                            if (alreadyUploaded || uploadSucceeded)
                            {
                                await ftpClient.DeleteAsync(rawLogUri);
                            }
                        }
                    }
                    catch (UnknownAzureCdnPlatformException exception)
                    {
                        // Trace, but ignore the failing file. Other files should go through just fine.
                        Trace.TraceWarning(exception.ToString());
                    }
                    catch (InvalidRawLogFileNameException exception)
                    {
                        // Trace, but ignore the failing file. Other files should go through just fine.
                        Trace.TraceWarning(exception.ToString());
                    }
                }

                return(true);
            }
            catch (Exception exception)
            {
                Trace.TraceError(exception.ToString());
            }
            return(false);
        }