/// <summary> /// If configured, sends an SNS notification to a topic /// </summary> /// <param name="message"></param> /// <param name="context"></param> /// <returns></returns> private static async Task SNSNotify(string message, ILambdaContext context) { string SendSNS = Environment.GetEnvironmentVariable("SEND_SNS"); bool Send = false; if (!String.IsNullOrEmpty(SendSNS)) { Boolean.TryParse(SendSNS, out Send); } if (!String.IsNullOrEmpty(_SNSTopic) && Send) { try { PublishResponse Response = await _SNSClient.PublishAsync(_SNSTopic, message, _Subject); if (Response.HttpStatusCode != HttpStatusCode.OK) { context.LogError($"Failed to send SNS notification with status code {(int)Response.HttpStatusCode}."); } } catch (Exception e) { context.LogError("Failed to send SNS notification.", e); } } }
public override async Task <CustomResourceResponse> CreateAsync(CustomResourceRequest request, ILambdaContext context) { try { context.LogInfo("Attempting to create a pipeline."); CreatePipelineRequest PipelineRequest = JsonConvert.DeserializeObject <CreatePipelineRequest>(JsonConvert.SerializeObject(request.ResourceProperties)); CreatePipelineResponse CreateResponse = await this._ETClient.CreatePipelineAsync(PipelineRequest); if ((int)CreateResponse.HttpStatusCode < 200 || (int)CreateResponse.HttpStatusCode > 299) { return(new CustomResourceResponse(CustomResourceResponse.RequestStatus.FAILED, $"Received HTTP status code {(int)CreateResponse.HttpStatusCode}.", request)); } else { return(new CustomResourceResponse( CustomResourceResponse.RequestStatus.SUCCESS, $"See the details in CloudWatch Log Stream: {context.LogStreamName}.", CreateResponse.Pipeline.Id, request.StackId, request.RequestId, request.LogicalResourceId, false, new Dictionary <string, object>() { { "Name", CreateResponse.Pipeline.Name }, { "Arn", CreateResponse.Pipeline.Arn }, { "Id", CreateResponse.Pipeline.Id } } )); } } catch (AmazonElasticTranscoderException e) { context.LogError(e); return(new CustomResourceResponse( CustomResourceResponse.RequestStatus.FAILED, e.Message, Guid.NewGuid().ToString(), request.StackId, request.RequestId, request.LogicalResourceId )); } catch (Exception e) { context.LogError(e); return(new CustomResourceResponse( CustomResourceResponse.RequestStatus.FAILED, e.Message, Guid.NewGuid().ToString(), request.StackId, request.RequestId, request.LogicalResourceId )); } }
/// <summary> /// Gets the contents of the retry file /// </summary> /// <param name="bucket"></param> /// <param name="key"></param> /// <param name="context"></param> /// <returns></returns> private static async Task <List <string> > GetRetryFileAsync(string bucket, string key, ILambdaContext context) { try { GetObjectRequest GetRequest = new GetObjectRequest() { BucketName = bucket, Key = key }; GetObjectResponse Response = await _S3Client.GetObjectAsync(GetRequest); using (Stream ResponseStream = Response.ResponseStream) { using (StreamReader Reader = new StreamReader(ResponseStream)) { if (ResponseStream.Length > 0) { return(Reader.ReadToEnd().Split("\n").Select(x => x.Trim()).Where(x => !String.IsNullOrEmpty(x)).ToList()); } } } } catch (Exception e) { context.LogError("It's likely the retry file does not exist.", e); } return(Enumerable.Empty <string>().ToList()); }
/// <summary> /// Entrypoint for the Lambda function /// </summary> /// <param name="request"></param> /// <returns></returns> public async Task ExecS3(S3Event request, ILambdaContext context) { string DestinationBucket; if (String.IsNullOrEmpty(DestinationBucket = await GetDestinationBucket(context))) { return; } string PrefixPattern; if (String.IsNullOrEmpty(PrefixPattern = await GetPrefixPattern(context))) { return; } bool DeleteSource = false; Boolean.TryParse(Environment.GetEnvironmentVariable("DELETE_SOURCE"), out DeleteSource); foreach (S3EventNotificationRecord Record in request.Records) { try { string Key = Record.S3.Object.Key; string Bucket = Record.S3.Bucket.Name; CopyObjectResponse Response = await Copy(Bucket, Key, DestinationBucket, PrefixPattern, context); } catch (AggregateException e) { context.LogError(e); await SendFailureSNS(e.InnerException, context); } catch (Exception e) { context.LogError(e); await SendFailureSNS(e, context); } } }
/// <summary> /// If configured, sends an SNS notification to a topic /// </summary> /// <param name="message"></param> /// <param name="context"></param> /// <returns></returns> private static async Task SNSNotify(string message, ILambdaContext context) { if (!String.IsNullOrEmpty(_SNSTopic)) { try { PublishResponse Response = await _SNSClient.PublishAsync(_SNSTopic, message, _Subject); if (Response.HttpStatusCode != HttpStatusCode.OK) { context.LogError($"Failed to send SNS notification with status code {(int)Response.HttpStatusCode}."); } } catch (Exception e) { context.LogError("Failed to send SNS notification.", e); } } }
private static async Task <string> GetPrefixPattern(ILambdaContext context) { string PrefixPattern = Environment.GetEnvironmentVariable("PREFIX_PATTERN"); if (String.IsNullOrEmpty(PrefixPattern)) { string Msg = "The environment variable PREFIX_PATTERN was not set."; context.LogError(Msg); await SendFailureSNS(Msg, context); } return(PrefixPattern); }
private static async Task <string> GetDestinationBucket(ILambdaContext context) { string DestinationBucket = Environment.GetEnvironmentVariable("S3_BUCKET"); if (String.IsNullOrEmpty(DestinationBucket)) { string Msg = "The environment variable S3_BUCKET for the destination was not set."; context.LogError(Msg); await SendFailureSNS(Msg, context); } return(DestinationBucket); }
/// <summary> /// Entrypoint for the Lambda function, calls the correct create, update, or delete function /// </summary> /// <param name="request">The custom resource request</param> /// <param name="context">The ILambdaContext object</param> /// <returns></returns> public async Task Execute(CustomResourceRequest request, ILambdaContext context) { context.LogInfo($"Received request:\n{JsonConvert.SerializeObject(request)}"); CustomResourceResult Result = await this._Handler.ExecuteAsync(request, context); if (Result.IsSuccess) { context.LogInfo("Successfully ran custom resource handler."); } else { context.LogError("Custom resource handler failed to run successfully."); } }
/// <summary> /// Initiates a lambda function for each service that we want to get from the price list api /// </summary> /// <param name="ev"></param> /// <param name="context"></param> /// <returns></returns> public async Task LaunchWorkersAsync(SNSEvent ev, ILambdaContext context) { _context = context; context.LogInfo(JsonConvert.SerializeObject(ev)); List <Task <InvokeResponse> > response = new List <Task <InvokeResponse> >(); IEnumerable <string> services = Constants.ReservableServices; // Since Amazon EC2 has savings plans now, calculating RIs doesn't really add much // value, so only do them if they're specifically opted in if (!Boolean.TryParse(System.Environment.GetEnvironmentVariable("ComputeEC2"), out bool doEC2) || !doEC2) { services = services.Where(x => x != Constants.AmazonEC2); } foreach (string service in services) { try { InvokeRequest Req = new InvokeRequest() { FunctionName = (service == Constants.AmazonEC2) ? System.Environment.GetEnvironmentVariable("EC2FunctionName") : System.Environment.GetEnvironmentVariable("FunctionName"), Payload = $"{{\"service\":\"{service}\"}}", InvocationType = InvocationType.Event, ClientContext = JsonConvert.SerializeObject(context.ClientContext, Formatting.None), }; InvokeResponse lambdaResponse = await lambdaClient.InvokeAsync(Req); context.LogInfo($"Completed kickoff for {service} with http status {(int)lambdaResponse.StatusCode}."); } catch (Exception e) { context.LogError(e); string message = $"[ERROR] {DateTime.Now} {{{context.AwsRequestId}}} : There was a problem creating a lambda invocation request for service {service} - {e.Message}"; await SNSNotify(message, context); throw e; } } context.LogInfo("All kickoff requests completed."); }
/// <summary> /// Gets the last read query execution id stored in an s3 object /// </summary> /// <param name="bucket"></param> /// <param name="key"></param> /// <returns></returns> private static async Task <string> GetLastQueryExecutionIdAsync(string bucket, string key, ILambdaContext context) { string LastQueryExecutionId = String.Empty; try { using (Stream Content = await _S3Client.GetObjectStreamAsync(bucket, key, null)) { using (StreamReader Reader = new StreamReader(Content)) { LastQueryExecutionId = Reader.ReadToEnd(); } } } catch (Exception e) { context.LogError(e); } return(LastQueryExecutionId); }
/// <summary> /// Updates the contents of the marker file with the latest query execution id /// </summary> /// <param name="bucket"></param> /// <param name="key"></param> /// <param name="queryExecutionId"></param> /// <returns></returns> private static async Task SetLastQueryExecutionIdAsync(string bucket, string key, string queryExecutionId, ILambdaContext context) { try { PutObjectRequest Request = new PutObjectRequest() { BucketName = bucket, Key = key, ContentType = "text/plain", ContentBody = queryExecutionId }; await _S3Client.PutObjectAsync(Request); } catch (Exception e) { string Message = $"Failed to upload last query execution marker {queryExecutionId} to s3://${bucket}/${key}."; context.LogError(Message, e); await SNSNotify(e, Message, context); } }
private static async Task <CopyObjectResponse> Copy(string sourceBucket, string sourceKey, string destinationBucket, string prefixPattern, ILambdaContext context) { // The S3 key prefixes are separated with a forward slash string[] Parts = sourceKey.Split("/"); string DestinationKey = String.Format(prefixPattern, Parts); string DestinationUri = $"s3://{destinationBucket}/{DestinationKey}"; context.LogInfo($"Using destination: {DestinationUri}"); GetObjectTaggingRequest TagRequest = new GetObjectTaggingRequest() { BucketName = sourceBucket, Key = sourceKey }; GetObjectTaggingResponse TagResponse = await _S3Client.GetObjectTaggingAsync(TagRequest); CopyObjectRequest CopyRequest = new CopyObjectRequest() { DestinationBucket = destinationBucket, SourceBucket = sourceBucket, SourceKey = sourceKey, DestinationKey = DestinationKey, TagSet = TagResponse.Tagging }; CopyObjectResponse Response = await _S3Client.CopyOrMoveObjectAsync(CopyRequest, true); if (Response.HttpStatusCode == HttpStatusCode.OK) { context.LogInfo($"Successfully moved s3://{sourceBucket}/{sourceKey} to {DestinationUri}."); } else { context.LogError($"Unsuccessful copy of s3://{sourceBucket}/{sourceKey} to {DestinationUri} : ${(int)Response.HttpStatusCode}"); } return(Response); }
/// <summary> /// Sets the contents of the file in S3 that contain the list of Ids that need to be retried because they weren't finished /// </summary> /// <param name="bucket"></param> /// <param name="key"></param> /// <param name="ids"></param> /// <param name="context"></param> /// <returns></returns> private static async Task <PutObjectResponse> SetRetryFileAsync(string bucket, string key, List <string> ids, ILambdaContext context) { try { PutObjectRequest PutRequest = new PutObjectRequest() { BucketName = Environment.GetEnvironmentVariable(RETRY_BUCKET), Key = Environment.GetEnvironmentVariable(RETRY_KEY), ContentBody = String.Join("\n", ids), ContentType = "text/plain" }; return(await _S3Client.PutObjectAsync(PutRequest)); } catch (Exception e) { string Message = $"Failed to set the retry file at s3://${bucket}/${key}."; context.LogError(Message, e); await SNSNotify(e, Message, context); return(null); } }
/// <summary> /// Logs ERROR level messages to CloudWatch /// </summary> /// <param name="context">The ILambdaContext</param> /// <param name="ex">The exception that will be flattened into json to log</param> /// <returns>The ILambdaContext that was used to log the message</returns> public static ILambdaContext LogError(this ILambdaContext context, Exception ex) { return(context.LogError(String.Empty, ex)); }
/// <summary> /// Converts the price list data from csv into our formatted csv /// </summary> /// <param name="csv"></param> /// <param name="writer"></param> private async Task GetFromCsv(Stream csv, CsvWriter writer) { // Find the the beginning of the header line // and remove the version data, etc from the csv long startIndex = csv.IndexOf(skuStringBytes); csv.Position = startIndex; Dictionary <string, List <CsvRowItem> > rows = new Dictionary <string, List <CsvRowItem> >(); try { using (StreamReader streamReader = new StreamReader(csv)) { // Make all of the headers lowercase so we don't have to worry about // case sensitivity later var config = new CsvConfiguration(CultureInfo.InvariantCulture) { PrepareHeaderForMatch = args => args.Header.ToLower() }; using (CsvReader reader = new CsvReader(streamReader, config)) { reader.Read(); // Advance to the next row, which is the header row reader.ReadHeader(); // Read the headers while (reader.Read()) // Read all lines in the CSV { // Will return null if it's a record we're not concerned about CsvRowItem row = CsvRowItem.Build(reader); if (row != null) { if (!rows.ContainsKey(row.Sku)) { rows.Add(row.Sku, new List <CsvRowItem>()); } rows[row.Sku].Add(row); } } // Close while loop } // Close CsvReader } // Close StreamReader List <ReservedInstancePricingTerm> terms = new List <ReservedInstancePricingTerm>(); foreach (KeyValuePair <string, List <CsvRowItem> > item in rows) { try { // Force the list to be enumerated to throw exceptions here // and catch them, for example if we can't find an on demand // pricing term in this set List <ReservedInstancePricingTerm> row = ReservedInstancePricingTerm.BuildFromCsv(item).ToList(); terms.AddRange(row); } catch (Exception e) { _context.LogError(e); await SNSNotify(e, _context); // Don't throw, at least populate with data that has all // required info } } writer.WriteRecords <ReservedInstancePricingTerm>(terms); } catch (Exception e) { _context.LogError(e); await SNSNotify(e, _context); throw e; } }
/// <summary> /// Executes the lambda function to get the price list data for the /// set of services we can buy reserved instances for /// </summary> /// <param name="ev"></param> /// <param name="context"></param> /// <returns></returns> public async Task RunForServiceAsync(ServiceRequest req, ILambdaContext context) { _context = context; if (req == null || String.IsNullOrEmpty(req.Service)) { string message = "No service was provided in the service request."; context.LogError(message); await SNSNotify(message, context); throw new Exception(message); } // Get the product price data for the service context.LogInfo($"Getting product data for {req.Service}"); string bucket = System.Environment.GetEnvironmentVariable("BUCKET"); string delimiter = System.Environment.GetEnvironmentVariable("DELIMITER"); string inputFormat = System.Environment.GetEnvironmentVariable("PRICELIST_FORMAT"); if (String.IsNullOrEmpty(inputFormat)) { inputFormat = "csv"; } inputFormat = inputFormat.ToLower().Trim(); context.LogInfo($"Using price list format: {inputFormat}"); if (String.IsNullOrEmpty(delimiter)) { delimiter = defaultDelimiter; } // This holds the disposable stream and writer objects // that need to be disposed at the end List <IDisposable> disposables = new List <IDisposable>(); try { // Will hold the stream of price data content that the // transfer utility will send MemoryStream memoryStreamOut = new MemoryStream(); disposables.Add(memoryStreamOut); // Provided to the csv writer to write to the memory stream TextWriter streamWriter = new StreamWriter(memoryStreamOut); disposables.Add(streamWriter); // The csv writer to write the price data objects var config = new CsvConfiguration(CultureInfo.InvariantCulture) { Delimiter = delimiter }; CsvWriter csvWriter = new CsvWriter(streamWriter, config); disposables.Add(csvWriter); // Write the header to the csv csvWriter.WriteHeader <ReservedInstancePricingTerm>(); csvWriter.NextRecord(); // Create the product request with the right format GetProductRequest productRequest = new GetProductRequest(req.Service) { Format = inputFormat.Equals("json", StringComparison.OrdinalIgnoreCase) ? Format.JSON : Format.CSV }; context.LogInfo("Getting price list offer file."); // Retrieve the finished get product price data response GetProductResponse response = await priceListClient.GetProductAsync(productRequest); string service = response.ServiceCode; context.LogInfo("Parsing price list data."); // Fill the output stream await this.FillOutputStreamWriter(response.Content, csvWriter, productRequest.Format); // Make sure everything is written out since we don't dispose // of these till later, if the textwriter isn't flushed // you will lose content from the csv file csvWriter.Flush(); streamWriter.Flush(); response.Dispose(); response = null; await UploadCsvToS3(memoryStreamOut, bucket, service, context); context.LogInfo("Completed upload"); } catch (Exception e) { context.LogError(e); string message = $"[ERROR] {DateTime.Now} {{{context.AwsRequestId}}} : There was a problem executing lambda for service {req.Service} - {e.Message}\n{e.StackTrace}"; await SNSNotify(message, context); throw e; } finally { // Dispose all of the streams and writers used to // write the CSV content, we need to dispose of these here // so the memory stream doesn't get closed by disposing // of the writers too early, which will cause the transfer utility // to fail the upload foreach (IDisposable item in disposables) { try { item.Dispose(); } catch { } } // Make sure memory is cleaned up GC.Collect(); GC.WaitForPendingFinalizers(); } }
/// <summary> /// Entrypoint for the Lambda function, calls the correct create, update, or delete function. While this method /// can be overridden, you will probably not need to. /// </summary> /// <param name="request">The custom resource request</param> /// <param name="context">The ILambdaContext object</param> /// <returns>A custom resource result with the included request, response, http respone, and any exception thrown</returns> public virtual async Task <CustomResourceResult> ExecuteAsync(CustomResourceRequest request, ILambdaContext context) { if (request == null) { throw new ArgumentNullException("request"); } if (context == null) { throw new ArgumentNullException("context"); } CustomResourceResponse response = null; switch (request.RequestType) { case CustomResourceRequest.StackOperation.CREATE: { response = await this.CreateAsync(request, context); break; } case CustomResourceRequest.StackOperation.DELETE: { response = await this.DeleteAsync(request, context); break; } case CustomResourceRequest.StackOperation.UPDATE: { response = await this.UpdateAsync(request, context); break; } default: { throw new ArgumentException($"Unknown stack operation: {request.RequestType}."); } } CustomResourceResult result = await helper.PutCustomResourceResponseAsync(request, response); if (!result.IsSuccess) { context.LogError(JsonConvert.SerializeObject(result.Response)); if (result.S3Response != null) { context.LogError(JsonConvert.SerializeObject(result.S3Response)); } if (result.Exception != null) { context.LogError(result.Exception); } } return(result); }
/// <summary> /// Processes a single manifest file and all of the report keys it contains /// </summary> /// <param name="item"></param> /// <param name="context"></param> /// <returns></returns> private static async Task <Manifest> ProcessItemAsync(S3EventNotificationRecord item, string destinationBucket, ILambdaContext context) { context.LogInfo(JsonConvert.SerializeObject(item)); // Make sure the event was when a new object was created if (item.EventName != EventType.ObjectCreatedPut && item.EventName != EventType.ObjectCreatedPost) { string Message = $"This Lambda function was triggered by a non ObjectCreated Put or Post event, {item.EventName}, for object {item.S3.Object.Key}; check the CloudFormation template configuration and S3 Event setup."; context.LogWarning(Message); await SNSNotify(Message, context); return(null); } // Get the manifest file contents GetObjectRequest Request = new GetObjectRequest() { BucketName = item.S3.Bucket.Name, Key = item.S3.Object.Key }; string Body = ""; using (GetObjectResponse Response = await _S3Client.GetObjectAsync(Request)) { using (Stream ResponseStream = Response.ResponseStream) { using (StreamReader Reader = new StreamReader(ResponseStream)) { Body = await Reader.ReadToEndAsync(); } } } Manifest ManifestFile = Manifest.Build(Body); string Prefix = GetDestinationPrefix(ManifestFile); // Build the destination key map to link source key to destination key Dictionary <string, string> DestinationKeyMap = ManifestFile.ReportKeys.ToDictionary(x => x, x => $"{Prefix}/{Path.GetFileName(x)}"); // If there are no destination keys // then there is nothing to do, return if (!DestinationKeyMap.Any()) { string Message = $"No destination keys produced for s3://{Request.BucketName}/{Request.Key}"; context.LogWarning(Message); await SNSNotify(Message, context); return(null); } // Copy all of the files over first to replace existing files, this way there // is no period of time where a file may not exist and break an active query List <Task <CopyResponse> > CopyTasks = new List <Task <CopyResponse> >(); // Initiate a copy object task for each key foreach (KeyValuePair <string, string> KeySet in DestinationKeyMap) { try { context.LogInfo($"Copying CUR from s3://{item.S3.Bucket.Name}/{KeySet.Key} to s3://{_DestinationBucket}/{KeySet.Value}"); CopyTasks.Add(CopyObjectAsync(KeySet.Key, KeySet.Value, item.S3.Bucket.Name, _DestinationBucket)); } catch (Exception e) { string Message = $"Failed to add a copy object task to the queue for s3://{item.S3.Bucket.Name}/{KeySet.Key} to s3://{_DestinationBucket}/{KeySet.Value}."; context.LogError(Message, e); await SNSNotify(Message, context); return(null); } } // Process the copy object results foreach (Task <CopyResponse> Response in CopyTasks.Interleaved()) { try { CopyResponse Result = await Response; if (Result.IsError) { string Message = $"Failed to copy s3://{Result.SourceBucket}/{Result.SourceKey} to s3://{Result.DestinationBucket}/{Result.DestinationKey}."; context.LogError(Message, Result.Exception); await SNSNotify(Message, context); return(null); } else { if (Result.Response.HttpStatusCode != HttpStatusCode.OK) { string Message = $"Failed to copy s3://{Result.SourceBucket}/{Result.SourceKey} to s3://{Result.DestinationBucket}/{Result.DestinationKey} with http code {(int)Result.Response.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return(null); } else { context.LogInfo($"Successfully copied CUR from s3://{Result.SourceBucket}/{Result.SourceKey} to s3://{Result.DestinationBucket}/{Result.DestinationKey}."); } } } catch (Exception e) { string Message = $"Internal error processing the copy async task."; context.LogError(Message, e); await SNSNotify(Message, context); return(null); } } // Delete all of the keys in the that are not the files we just copied over List <KeyVersion> KeysToDelete; try { // Find all keys under the same prefix, and that aren't one of the keys of the files that have been copied KeysToDelete = await ListAllObjectsAsync(destinationBucket, Prefix, x => x.Where(y => !DestinationKeyMap.Values.Contains(y.Key))); } catch (Exception e) { context.LogError(e); await SNSNotify($"{e.Message}\n{e.StackTrace}", context); return(null); } // Delete the old CUR files in the destination bucket try { if (KeysToDelete != null && KeysToDelete.Any()) { int DeletedCount = await DeleteObjectsAsync(KeysToDelete, destinationBucket); if (DeletedCount != KeysToDelete.Count) { string Message = $"Unable to delete all objects, expected to delete {KeysToDelete.Count} but only deleted {DeletedCount}."; context.LogError(Message); await SNSNotify(Message, context); return(null); } else { context.LogInfo($"Successfully deleted {DeletedCount} objects."); } } } catch (Exception e) { string Message = "Unable to delete all old CUR files."; context.LogError(Message, e); await SNSNotify(Message, context); return(null); } return(ManifestFile); }
/// <summary> /// Entrypoint for the Lambda function /// </summary> /// <param name="request"></param> /// <returns></returns> public async Task ExecSNS(SNSEvent request, ILambdaContext context) { string DestinationBucket; if (String.IsNullOrEmpty(DestinationBucket = await GetDestinationBucket(context))) { return; } string PrefixPattern; if (String.IsNullOrEmpty(PrefixPattern = await GetPrefixPattern(context))) { return; } bool DeleteSource = false; Boolean.TryParse(Environment.GetEnvironmentVariable("DELETE_SOURCE"), out DeleteSource); foreach (SNSRecord Record in request.Records) { try { string Message = Record.Sns.Message; if (S3TestMessage.IsTestMessage(Message)) { context.LogInfo($"Processing test event from SNS: {Message}"); return; } SNSS3RecordSet RecordSet = JsonConvert.DeserializeObject <SNSS3RecordSet>(Message); foreach (SNSS3Record S3Record in RecordSet.Records) { try { string Key = S3Record.S3.Object.Key; string Bucket = S3Record.S3.Bucket.Name; CopyObjectResponse Response = await Copy(Bucket, Key, DestinationBucket, PrefixPattern, context); } catch (AggregateException e) { context.LogError(e); await SendFailureSNS(e.InnerException, context); } catch (Exception e) { context.LogError(e); await SendFailureSNS(e, context); } } } catch (AggregateException e) { context.LogError(e); await SendFailureSNS(e.InnerException, context); } catch (Exception e) { context.LogError(e); await SendFailureSNS(e, context); } } }
/// <summary> /// If provided, runs a Glue job after the files have been copied /// </summary> /// <param name="context"></param> /// <returns></returns> private static async Task RunGlueJob(string table, ILambdaContext context) { if (String.IsNullOrEmpty(table)) { throw new ArgumentNullException("table"); } if (String.IsNullOrEmpty(_GlueDatabaseName)) { string Message = "The Glue database name was provided. Not running job."; context.LogWarning(Message); await SNSNotify(Message, context); return; } if (String.IsNullOrEmpty(_GlueJobName)) { string Message = "The Glue job name for the job was not provided as an environment variable. Not running job."; context.LogWarning(Message); await SNSNotify(Message, context); return; } context.LogInfo($"Running glue job on table {table} in database {_GlueDatabaseName}."); try { StartJobRunRequest Request = new StartJobRunRequest() { JobName = _GlueJobName, Timeout = 1440, // 24 Hours Arguments = new Dictionary <string, string>() { { "--table", table }, { "--database", _GlueDatabaseName } } }; if (!String.IsNullOrEmpty(_GlueDestinationBucket)) { Request.Arguments.Add("--destination_bucket", _GlueDestinationBucket); } StartJobRunResponse Response = await _GlueClient.StartJobRunAsync(Request); if (Response.HttpStatusCode != HttpStatusCode.OK) { string Message = $"Failed to start job with status code ${(int)Response.HttpStatusCode}"; context.LogError(Message); await SNSNotify(Message, context); } else { context.LogInfo($"Successfully started job {Response.JobRunId}"); } } catch (Exception e) { string Message = "Failed to start Glue job."; context.LogError(Message, e); await SNSNotify(Message + $" {e.Message}", context); } }
/// <summary> /// Creates or updates a glue table for the new CUR files. This makes sure any changes in the columns are captured /// and applied to the table. This will end up creating a new table for each billing period. /// </summary> /// <param name="manifest"></param> /// <param name="context"></param> /// <returns>The table name</returns> private static async Task <string> CreateOrUpdateGlueTable(Manifest manifest, ILambdaContext context) { if (String.IsNullOrEmpty(_GlueDatabaseName)) { string Message = "No Glue database name defined, cannot create a table."; context.LogWarning(Message); await SNSNotify(Message, context); return(String.Empty); } string Date = manifest.BillingPeriod.Start.ToString("yyyy-MM-dd"); string Format = manifest.ContentType.ToLower().Substring(manifest.ContentType.LastIndexOf("/") + 1); Dictionary <string, string> Parameters; StorageDescriptor Descriptor; switch (Format) { case "csv": { Parameters = new Dictionary <string, string>() { { "EXTERNAL", "TRUE" }, { "skip.header.line.count", "1" }, { "columnsOrdered", "true" }, { "compressionType", manifest.Compression.ToString().ToLower() }, { "classification", manifest.ContentType.ToLower().Substring(manifest.ContentType.LastIndexOf("/") + 1) } }; Descriptor = new StorageDescriptor() { Columns = manifest.Columns.Select(x => new Amazon.Glue.Model.Column() { Name = $"{x.Category}/{x.Name}", Type = "string" }).ToList(), InputFormat = "org.apache.hadoop.mapred.TextInputFormat", OutputFormat = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", Location = $"s3://{_DestinationBucket}/{GetDestinationPrefix(manifest)}", SerdeInfo = new SerDeInfo() { Name = "OpenCSVSerde", SerializationLibrary = "org.apache.hadoop.hive.serde2.OpenCSVSerde", Parameters = new Dictionary <string, string>() { { "escapeChar", "\\" }, { "quoteChar", "\"" }, { "separatorChar", "," } } } }; break; } case "parquet": { Parameters = new Dictionary <string, string>() { { "EXTERNAL", "TRUE" }, { "compressionType", manifest.Compression.ToString().ToLower() }, { "classification", manifest.ContentType.ToLower().Substring(manifest.ContentType.LastIndexOf("/") + 1) } }; Descriptor = new StorageDescriptor() { Columns = manifest.Columns.Select(x => new Amazon.Glue.Model.Column() { Name = x.Name, Type = (!String.IsNullOrEmpty(x.Type) ? x.Type.ToLower() : "string") }).ToList(), InputFormat = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", OutputFormat = "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", Location = $"s3://{_DestinationBucket}/{GetDestinationPrefix(manifest)}", SerdeInfo = new SerDeInfo() { Name = "ParquetHiveSerDe", SerializationLibrary = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", Parameters = new Dictionary <string, string>() { { "serialization.format", "1" } } } }; break; } default: { string Message = $"Failed to create or update the database {_GlueDatabaseName} table. Unknown format type ${manifest.ContentType}."; await SNSNotify(Message, context); return(String.Empty); } } // The updated table input for this particular CUR TableInput TblInput = new TableInput() { Description = Date, Name = Date, TableType = "EXTERNAL_TABLE", Parameters = Parameters, StorageDescriptor = Descriptor }; // Make sure the database exists GetDatabaseRequest GetDb = new GetDatabaseRequest() { Name = _GlueDatabaseName }; try { await _GlueClient.GetDatabaseAsync(GetDb); context.LogInfo($"Database {_GlueDatabaseName} already exists."); } catch (EntityNotFoundException) { try { CreateDatabaseRequest DbRequest = new CreateDatabaseRequest() { DatabaseInput = new DatabaseInput() { Name = _GlueDatabaseName } }; CreateDatabaseResponse Response = await _GlueClient.CreateDatabaseAsync(DbRequest); if (Response.HttpStatusCode == HttpStatusCode.OK) { context.LogInfo($"Successfully CREATED database {_GlueDatabaseName}."); } else { context.LogError($"Failed to CREATE database with status code {(int)Response.HttpStatusCode}."); } } catch (Exception ex) { string Message = $"Failed to create the database {_GlueDatabaseName}."; context.LogError(Message, ex); await SNSNotify(Message + $" {ex.Message}", context); return(String.Empty); } } // Make sure the table exists GetTableRequest GetTable = new GetTableRequest() { DatabaseName = _GlueDatabaseName, Name = Date }; try { GetTableResponse TableResponse = await _GlueClient.GetTableAsync(GetTable); UpdateTableRequest UpdateReq = new UpdateTableRequest() { TableInput = TblInput, DatabaseName = _GlueDatabaseName }; UpdateTableResponse Response = await _GlueClient.UpdateTableAsync(UpdateReq); if (Response.HttpStatusCode == HttpStatusCode.OK) { context.LogInfo($"Successfully UPDATED table {TblInput.Name} in database {_GlueDatabaseName}."); return(TblInput.Name); } else { string Message = $"Failed to UPDATE table with status code {(int)Response.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return(String.Empty); } } catch (EntityNotFoundException) // This means the table does not exist { CreateTableRequest CreateReq = new CreateTableRequest() { TableInput = TblInput, DatabaseName = _GlueDatabaseName }; CreateTableResponse Response = await _GlueClient.CreateTableAsync(CreateReq); if (Response.HttpStatusCode == HttpStatusCode.OK) { context.LogInfo($"Successfully CREATED table {TblInput.Name} in database {_GlueDatabaseName}."); return(TblInput.Name); } else { string Message = $"Failed to CREATE table with status code {(int)Response.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return(String.Empty); } } }
/// <summary> /// Entrypoint for the lambda function, processes each manifest file /// </summary> /// <param name="s3Event"></param> /// <param name="context"></param> /// <returns></returns> public async Task Exec(S3Event s3Event, ILambdaContext context) { context.LogInfo($"Recevied S3 Event : {JsonConvert.SerializeObject(s3Event)}"); // Validate the only required env variable has been set if (String.IsNullOrEmpty(_DestinationBucket)) { string Message = "The environment variable DESTINATION_S3_BUCKET was not set."; context.LogError(Message); await SNSNotify(Message, context); return; } // Keep track of each copy task in this list List <Task <Manifest> > Tasks = new List <Task <Manifest> >(); // Process each event record foreach (S3EventNotificationRecord Item in s3Event.Records) { if (ValidManifestFile(Item.S3.Object.Key)) { Tasks.Add(ProcessItemAsync(Item, _DestinationBucket, context)); } else { context.LogInfo($"The object s3://{Item.S3.Bucket.Name}/{Item.S3.Object.Key} is not a top level manifest file"); } } // Process each copy task as it finishes foreach (Task <Manifest> Task in Tasks.Interleaved()) { try { Manifest Result = await Task; if (Result == null) { string Message = "A task did not return successfully"; context.LogWarning(Message); await SNSNotify(Message, context); } else { // Create or update the glue data catalog table // for this CUR string TableName = await CreateOrUpdateGlueTable(Result, context); if (!String.IsNullOrEmpty(TableName)) { // If provided, run a glue job await RunGlueJob(TableName, context); } else { string Message = "The CreateOrUpdateGlueTable method returned an empty string for the table name, indicating either the DB or Table could not be created."; context.LogWarning(Message); await SNSNotify(Message, context); } } } catch (Exception e) { string Message = "A process item async task failed with an exception."; context.LogError(Message, e); await SNSNotify(Message + $" {e.Message}", context); } } context.LogInfo("Function completed."); }
/// <summary> /// A Lambda function to respond to HTTP Get methods from API Gateway /// </summary> /// <param name="request"></param> /// <returns>The list of blogs</returns> public async Task ExecAsync(CloudWatchScheduledEvent request, ILambdaContext context) { context.LogInfo($"Received scheduled event:\n{JsonConvert.SerializeObject(request)}"); // The list request for the query execution Ids ListQueryExecutionsRequest ListRequest = new ListQueryExecutionsRequest(); // Retrieve the last query execution id that was processed, i.e. the most recent one // the last time it ran string LastReadQueryExecutionId = await GetLastQueryExecutionIdAsync( Environment.GetEnvironmentVariable(MARKER_BUCKET), Environment.GetEnvironmentVariable(MARKER_KEY), context ); context.LogInfo($"Previous run last processed query execution id: {LastReadQueryExecutionId}."); // Track whether we're done in the do/while loop bool Finished = false; // Track whether this is the first time through the loop so we // can grab the first execution id bool FirstLoop = true; // This will be considered the most recent query, grab it here // and we'll write it at the end when everything's done and we're sure this all succeeded string NewLastQueryExecutionId = String.Empty; // This will count the number of successful queries written to S3 in total int Counter = 0; do { // Get the same list we got above again ListQueryExecutionsResponse ListResponse = await _AthenaClient.ListQueryExecutionsAsync(ListRequest); if (ListResponse.HttpStatusCode != HttpStatusCode.OK) { string Message = $"The list request did not return a success status code: {(int)ListResponse.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return; } // If the list response is null of doesn't have query execution ids, stop processing if (ListResponse == null || ListResponse.QueryExecutionIds == null || !ListResponse.QueryExecutionIds.Any()) { context.LogWarning("The list response was null or the query execution Ids were null or empty."); break; } // If it's the first loop if (FirstLoop) { NewLastQueryExecutionId = ListResponse.QueryExecutionIds.First(); context.LogInfo($"The new last processed query execution id will be: {NewLastQueryExecutionId}."); FirstLoop = false; if (LastReadQueryExecutionId == NewLastQueryExecutionId) { context.LogInfo("No new query execution ids."); break; } } // Batch get the query executions based on ids BatchGetQueryExecutionRequest BatchRequest = new BatchGetQueryExecutionRequest() { QueryExecutionIds = ListResponse.QueryExecutionIds }; // If any of the ids match the last read id, then we're done listing ids since // we've gotten back to the start of the last run if (ListResponse.QueryExecutionIds.Any(x => x.Equals(LastReadQueryExecutionId))) { // Take everything until we reach the last read id BatchRequest.QueryExecutionIds = BatchRequest.QueryExecutionIds.TakeWhile(x => !x.Equals(LastReadQueryExecutionId)).ToList(); Finished = true; } // Make sure there were ids in the request if (BatchRequest.QueryExecutionIds.Any()) { // Get query execution details BatchGetQueryExecutionResponse BatchResponse = await _AthenaClient.BatchGetQueryExecutionAsync(BatchRequest); if (BatchResponse == null) { string Message = $"The batch response was null, this shouldn't happen."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we received a good status code if (BatchResponse.HttpStatusCode != HttpStatusCode.OK) { string Message = $"The batch request did not return a success status code: {(int)BatchResponse.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we actually received data back if (BatchResponse.QueryExecutions == null || !BatchResponse.QueryExecutions.Any()) { string Message = $"The batch response did not contain any query executions."; context.LogError(Message); await SNSNotify(Message, context); } else { // These are all the transformed records IEnumerable <AthenaQueryMetric> Records = BatchResponse.QueryExecutions.Select(x => AthenaQueryMetric.Build(x)); // These are the queries that either succeeded or were cancelled and are done List <AthenaQueryMetric> FinishedQueries = Records.Where(x => x.Status == QueryExecutionState.SUCCEEDED.Value || x.Status == QueryExecutionState.CANCELLED.Value).ToList(); // These are the queries that are still running or are queued List <string> NotFinishedQueries = Records.Where(x => x.Status == QueryExecutionState.RUNNING.Value || x.Status == QueryExecutionState.QUEUED.Value).Select(x => x.QueryExecutionId).ToList(); // This block updates the retry list stored in S3 if (NotFinishedQueries.Any()) { context.LogInfo("Adding to the not finished queries list."); PutObjectResponse Response = await UpdateRetryFileAsync( Environment.GetEnvironmentVariable(RETRY_BUCKET), Environment.GetEnvironmentVariable(RETRY_KEY), NotFinishedQueries, context ); if (Response.HttpStatusCode != HttpStatusCode.OK) { string Message = $"Failed to upload retry file with status code: {(int)Response.HttpStatusCode}. Request Id: {Response.ResponseMetadata.RequestId}."; context.LogError(Message); await SNSNotify(Message, context); } } // Nothing to write, so skip to next iteration if (!FinishedQueries.Any()) { context.LogInfo("No successful queries found in this list."); continue; } // Add the finished queries to the total count Counter += FinishedQueries.Count; // Write the finished query data to S3 await WriteDataAsync(FinishedQueries, Environment.GetEnvironmentVariable(RESULT_BUCKET), Environment.GetEnvironmentVariable(OUTPUT_FORMAT), context); } } if (!String.IsNullOrEmpty(ListResponse.NextToken)) { ListRequest.NextToken = ListResponse.NextToken; } else { ListRequest.NextToken = String.Empty; } } while (!String.IsNullOrEmpty(ListRequest.NextToken) && !Finished); context.LogInfo($"Finished pulling query execution data and writing to S3. Wrote {Counter} records."); // Only update the new last query id if it's not empty, which might happen if there are no Ids in the first list // response, of if the new is the same as the old, meaning we didn't process any new queries if (!String.IsNullOrEmpty(NewLastQueryExecutionId) && NewLastQueryExecutionId != LastReadQueryExecutionId) { await SetLastQueryExecutionIdAsync( Environment.GetEnvironmentVariable(MARKER_BUCKET), Environment.GetEnvironmentVariable(MARKER_KEY), NewLastQueryExecutionId, context ) ; context.LogInfo($"Completed updating marker to {NewLastQueryExecutionId}."); } else { context.LogInfo($"No new query executions, not updating marker."); } context.LogInfo("Function complete."); }
/// <summary> /// Processes the execution Ids that need be retried because they weren't finished or cancelled /// </summary> /// <param name="request"></param> /// <param name="context"></param> /// <returns></returns> public async Task RetryAsync(CloudWatchScheduledEvent request, ILambdaContext context) { context.LogInfo($"Received scheduled event for retries:\n{JsonConvert.SerializeObject(request)}"); List <string> RetryIds = await GetRetryFileAsync(Environment.GetEnvironmentVariable(RETRY_BUCKET), Environment.GetEnvironmentVariable(RETRY_KEY), context); context.LogInfo($"Found {RetryIds.Count} ids to retry."); List <string> RemainingIds = new List <string>(); if (RetryIds != null && RetryIds.Any() && !RetryIds.All(x => String.IsNullOrEmpty(x))) { int Counter = 0; foreach (List <string> Chunk in ChunkList <string>(RetryIds, 50)) { BatchGetQueryExecutionRequest BatchRequest = new BatchGetQueryExecutionRequest() { QueryExecutionIds = Chunk }; BatchGetQueryExecutionResponse BatchResponse = await _AthenaClient.BatchGetQueryExecutionAsync(BatchRequest); if (BatchResponse == null) { string Message = $"The batch response was null, this shouldn't happen."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we received a good status code if (BatchResponse.HttpStatusCode != HttpStatusCode.OK) { string Message = $"The batch request did not return a success status code: {(int)BatchResponse.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we actually received data back if (BatchResponse.QueryExecutions == null || !BatchResponse.QueryExecutions.Any()) { string Message = $"The batch response did not contain any query executions."; context.LogError(Message); await SNSNotify(Message, context); } else { // These are all the transformed records IEnumerable <AthenaQueryMetric> Records = BatchResponse.QueryExecutions.Select(x => AthenaQueryMetric.Build(x)); // These are the queries that either succeeded or were cancelled and are done List <AthenaQueryMetric> FinishedQueries = Records.Where(x => x.Status == QueryExecutionState.SUCCEEDED.Value || x.Status == QueryExecutionState.CANCELLED.Value).ToList(); // These are the queries that are still running or are queued List <string> NotFinishedQueries = Records.Where(x => x.Status == QueryExecutionState.RUNNING.Value || x.Status == QueryExecutionState.QUEUED.Value).Select(x => x.QueryExecutionId).ToList(); if (NotFinishedQueries.Any()) { RemainingIds.AddRange(NotFinishedQueries); } // Nothing to write, so skip to next iteration if (!FinishedQueries.Any()) { context.LogInfo("No successful queries found in this list."); continue; } else { Counter += FinishedQueries.Count; await WriteDataAsync( FinishedQueries, Environment.GetEnvironmentVariable(RESULT_BUCKET), Environment.GetEnvironmentVariable(OUTPUT_FORMAT), context ); } } } context.LogInfo($"Finished pulling query execution data and writing to S3. Wrote {Counter} records."); if (RemainingIds.Count < RetryIds.Count) { context.LogInfo("Updating retry file."); await SetRetryFileAsync(Environment.GetEnvironmentVariable(RETRY_BUCKET), Environment.GetEnvironmentVariable(RETRY_KEY), RemainingIds, context); context.LogInfo("Finished updating retry file."); } else { context.LogInfo("No updates need to made to the retry file."); } } else { context.LogInfo("No ids in the retry file."); } }
/// <summary> /// Uploads the finished query data to S3 /// </summary> /// <param name="finishedQueries"></param> /// <param name="context"></param> /// <returns></returns> private static async Task WriteDataAsync(IEnumerable <AthenaQueryMetric> finishedQueries, string bucket, string format, ILambdaContext context) { if (finishedQueries == null) { throw new ArgumentNullException("finishedQueries"); } if (String.IsNullOrEmpty(bucket)) { throw new ArgumentNullException("bucket"); } if (context == null) { throw new ArgumentNullException("context"); } foreach (IGrouping <string, AthenaQueryMetric> Group in finishedQueries.GroupBy(x => x.BillingPeriod)) { // Maintains all of the disposables that need to be disposed of at the end, but // not before the streams have been completely read and uploaded, otherwise, it causes // a race condition if we use a using block where the streams will close before the // transfer utility has finished the upload List <IDisposable> Disposables = new List <IDisposable>(); // The memory stream the compressed stream will be written into MemoryStream MStreamOut = new MemoryStream(); Disposables.Add(MStreamOut); try { switch (format) { default: case "csv": { // The Gzip Stream only writes its file footer 10 byte data when the stream is closed // Calling dispose via the using block flushes and closes the stream first causing the // the footer data to be written out to the memory stream. The third parameter "true" // allows the memorystream to still access the gzip stream data, otherwise when trying to // upload the stream via the transfer utility, it will cause an exception that the stream // is closed using (GZipStream Gzip = new GZipStream(MStreamOut, CompressionLevel.Optimal, true)) { TextWriter TWriter = new StreamWriter(Gzip); CsvWriter Writer = new CsvWriter(TWriter); Writer.Configuration.RegisterClassMap <AthenaQueryMetricCsvMapping>(); Disposables.Add(Writer); Disposables.Add(TWriter); Writer.WriteHeader <AthenaQueryMetric>(); Writer.NextRecord(); // Advance the writer to the next line before // writing the records Writer.WriteRecords <AthenaQueryMetric>(finishedQueries); // Make sure to flush all of the data to the stream Writer.Flush(); TWriter.Flush(); } break; } case "parquet": { Schema PSchema = SchemaReflector.Reflect <AthenaQueryMetric>(); //ParquetConvert.Serialize<AthenaQueryMetric>(finishedQueries, MStreamOut, PSchema); break; } } // Make the transfer utility request to post the query data csv content TransferUtilityUploadRequest Request = new TransferUtilityUploadRequest() { BucketName = bucket, Key = $"data/billingperiod={Group.Key}/{finishedQueries.First().QueryExecutionId}_{finishedQueries.Last().QueryExecutionId}.csv.gz", InputStream = MStreamOut, AutoResetStreamPosition = true, AutoCloseStream = true, ContentType = "text/csv" }; using (TransferUtility XferUtil = new TransferUtility(_S3Client)) { try { context.LogInfo($"Starting file upload of {MStreamOut.Length} bytes: {Request.Key}."); // Make the upload await XferUtil.UploadAsync(Request); context.LogInfo($"Finished upload of {Request.Key}."); } catch (Exception e) { string Message = $"Failed to upload data file to s3://{Request.BucketName}/{Request.Key}."; context.LogError(Message, e); await SNSNotify(e, Message, context); } } } catch (Exception e) { context.LogError(e); await SNSNotify(e, context); } finally { // Dispose all of the streams and writers used to // write the CSV content, we need to dispose of these here // so the memory stream doesn't get closed by disposing // of the writers too early, which will cause the transfer utility // to fail the upload foreach (IDisposable Item in Disposables) { try { Item.Dispose(); } catch { } } // Make sure memory is cleaned up GC.Collect(); GC.WaitForPendingFinalizers(); } } }