/// <summary> /// Processes the execution Ids that need be retried because they weren't finished or cancelled /// </summary> /// <param name="request"></param> /// <param name="context"></param> /// <returns></returns> public async Task RetryAsync(CloudWatchScheduledEvent request, ILambdaContext context) { context.LogInfo($"Received scheduled event for retries:\n{JsonConvert.SerializeObject(request)}"); List <string> RetryIds = await GetRetryFileAsync(Environment.GetEnvironmentVariable(RETRY_BUCKET), Environment.GetEnvironmentVariable(RETRY_KEY), context); context.LogInfo($"Found {RetryIds.Count} ids to retry."); List <string> RemainingIds = new List <string>(); if (RetryIds != null && RetryIds.Any() && !RetryIds.All(x => String.IsNullOrEmpty(x))) { int Counter = 0; foreach (List <string> Chunk in ChunkList <string>(RetryIds, 50)) { BatchGetQueryExecutionRequest BatchRequest = new BatchGetQueryExecutionRequest() { QueryExecutionIds = Chunk }; BatchGetQueryExecutionResponse BatchResponse = await _AthenaClient.BatchGetQueryExecutionAsync(BatchRequest); if (BatchResponse == null) { string Message = $"The batch response was null, this shouldn't happen."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we received a good status code if (BatchResponse.HttpStatusCode != HttpStatusCode.OK) { string Message = $"The batch request did not return a success status code: {(int)BatchResponse.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we actually received data back if (BatchResponse.QueryExecutions == null || !BatchResponse.QueryExecutions.Any()) { string Message = $"The batch response did not contain any query executions."; context.LogError(Message); await SNSNotify(Message, context); } else { // These are all the transformed records IEnumerable <AthenaQueryMetric> Records = BatchResponse.QueryExecutions.Select(x => AthenaQueryMetric.Build(x)); // These are the queries that either succeeded or were cancelled and are done List <AthenaQueryMetric> FinishedQueries = Records.Where(x => x.Status == QueryExecutionState.SUCCEEDED.Value || x.Status == QueryExecutionState.CANCELLED.Value).ToList(); // These are the queries that are still running or are queued List <string> NotFinishedQueries = Records.Where(x => x.Status == QueryExecutionState.RUNNING.Value || x.Status == QueryExecutionState.QUEUED.Value).Select(x => x.QueryExecutionId).ToList(); if (NotFinishedQueries.Any()) { RemainingIds.AddRange(NotFinishedQueries); } // Nothing to write, so skip to next iteration if (!FinishedQueries.Any()) { context.LogInfo("No successful queries found in this list."); continue; } else { Counter += FinishedQueries.Count; await WriteDataAsync( FinishedQueries, Environment.GetEnvironmentVariable(RESULT_BUCKET), Environment.GetEnvironmentVariable(OUTPUT_FORMAT), context ); } } } context.LogInfo($"Finished pulling query execution data and writing to S3. Wrote {Counter} records."); if (RemainingIds.Count < RetryIds.Count) { context.LogInfo("Updating retry file."); await SetRetryFileAsync(Environment.GetEnvironmentVariable(RETRY_BUCKET), Environment.GetEnvironmentVariable(RETRY_KEY), RemainingIds, context); context.LogInfo("Finished updating retry file."); } else { context.LogInfo("No updates need to made to the retry file."); } } else { context.LogInfo("No ids in the retry file."); } }
/// <summary> /// A Lambda function to respond to HTTP Get methods from API Gateway /// </summary> /// <param name="request"></param> /// <returns>The list of blogs</returns> public async Task ExecAsync(CloudWatchScheduledEvent request, ILambdaContext context) { context.LogInfo($"Received scheduled event:\n{JsonConvert.SerializeObject(request)}"); // The list request for the query execution Ids ListQueryExecutionsRequest ListRequest = new ListQueryExecutionsRequest(); // Retrieve the last query execution id that was processed, i.e. the most recent one // the last time it ran string LastReadQueryExecutionId = await GetLastQueryExecutionIdAsync( Environment.GetEnvironmentVariable(MARKER_BUCKET), Environment.GetEnvironmentVariable(MARKER_KEY), context ); context.LogInfo($"Previous run last processed query execution id: {LastReadQueryExecutionId}."); // Track whether we're done in the do/while loop bool Finished = false; // Track whether this is the first time through the loop so we // can grab the first execution id bool FirstLoop = true; // This will be considered the most recent query, grab it here // and we'll write it at the end when everything's done and we're sure this all succeeded string NewLastQueryExecutionId = String.Empty; // This will count the number of successful queries written to S3 in total int Counter = 0; do { // Get the same list we got above again ListQueryExecutionsResponse ListResponse = await _AthenaClient.ListQueryExecutionsAsync(ListRequest); if (ListResponse.HttpStatusCode != HttpStatusCode.OK) { string Message = $"The list request did not return a success status code: {(int)ListResponse.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return; } // If the list response is null of doesn't have query execution ids, stop processing if (ListResponse == null || ListResponse.QueryExecutionIds == null || !ListResponse.QueryExecutionIds.Any()) { context.LogWarning("The list response was null or the query execution Ids were null or empty."); break; } // If it's the first loop if (FirstLoop) { NewLastQueryExecutionId = ListResponse.QueryExecutionIds.First(); context.LogInfo($"The new last processed query execution id will be: {NewLastQueryExecutionId}."); FirstLoop = false; if (LastReadQueryExecutionId == NewLastQueryExecutionId) { context.LogInfo("No new query execution ids."); break; } } // Batch get the query executions based on ids BatchGetQueryExecutionRequest BatchRequest = new BatchGetQueryExecutionRequest() { QueryExecutionIds = ListResponse.QueryExecutionIds }; // If any of the ids match the last read id, then we're done listing ids since // we've gotten back to the start of the last run if (ListResponse.QueryExecutionIds.Any(x => x.Equals(LastReadQueryExecutionId))) { // Take everything until we reach the last read id BatchRequest.QueryExecutionIds = BatchRequest.QueryExecutionIds.TakeWhile(x => !x.Equals(LastReadQueryExecutionId)).ToList(); Finished = true; } // Make sure there were ids in the request if (BatchRequest.QueryExecutionIds.Any()) { // Get query execution details BatchGetQueryExecutionResponse BatchResponse = await _AthenaClient.BatchGetQueryExecutionAsync(BatchRequest); if (BatchResponse == null) { string Message = $"The batch response was null, this shouldn't happen."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we received a good status code if (BatchResponse.HttpStatusCode != HttpStatusCode.OK) { string Message = $"The batch request did not return a success status code: {(int)BatchResponse.HttpStatusCode}."; context.LogError(Message); await SNSNotify(Message, context); return; } // Make sure we actually received data back if (BatchResponse.QueryExecutions == null || !BatchResponse.QueryExecutions.Any()) { string Message = $"The batch response did not contain any query executions."; context.LogError(Message); await SNSNotify(Message, context); } else { // These are all the transformed records IEnumerable <AthenaQueryMetric> Records = BatchResponse.QueryExecutions.Select(x => AthenaQueryMetric.Build(x)); // These are the queries that either succeeded or were cancelled and are done List <AthenaQueryMetric> FinishedQueries = Records.Where(x => x.Status == QueryExecutionState.SUCCEEDED.Value || x.Status == QueryExecutionState.CANCELLED.Value).ToList(); // These are the queries that are still running or are queued List <string> NotFinishedQueries = Records.Where(x => x.Status == QueryExecutionState.RUNNING.Value || x.Status == QueryExecutionState.QUEUED.Value).Select(x => x.QueryExecutionId).ToList(); // This block updates the retry list stored in S3 if (NotFinishedQueries.Any()) { context.LogInfo("Adding to the not finished queries list."); PutObjectResponse Response = await UpdateRetryFileAsync( Environment.GetEnvironmentVariable(RETRY_BUCKET), Environment.GetEnvironmentVariable(RETRY_KEY), NotFinishedQueries, context ); if (Response.HttpStatusCode != HttpStatusCode.OK) { string Message = $"Failed to upload retry file with status code: {(int)Response.HttpStatusCode}. Request Id: {Response.ResponseMetadata.RequestId}."; context.LogError(Message); await SNSNotify(Message, context); } } // Nothing to write, so skip to next iteration if (!FinishedQueries.Any()) { context.LogInfo("No successful queries found in this list."); continue; } // Add the finished queries to the total count Counter += FinishedQueries.Count; // Write the finished query data to S3 await WriteDataAsync(FinishedQueries, Environment.GetEnvironmentVariable(RESULT_BUCKET), Environment.GetEnvironmentVariable(OUTPUT_FORMAT), context); } } if (!String.IsNullOrEmpty(ListResponse.NextToken)) { ListRequest.NextToken = ListResponse.NextToken; } else { ListRequest.NextToken = String.Empty; } } while (!String.IsNullOrEmpty(ListRequest.NextToken) && !Finished); context.LogInfo($"Finished pulling query execution data and writing to S3. Wrote {Counter} records."); // Only update the new last query id if it's not empty, which might happen if there are no Ids in the first list // response, of if the new is the same as the old, meaning we didn't process any new queries if (!String.IsNullOrEmpty(NewLastQueryExecutionId) && NewLastQueryExecutionId != LastReadQueryExecutionId) { await SetLastQueryExecutionIdAsync( Environment.GetEnvironmentVariable(MARKER_BUCKET), Environment.GetEnvironmentVariable(MARKER_KEY), NewLastQueryExecutionId, context ) ; context.LogInfo($"Completed updating marker to {NewLastQueryExecutionId}."); } else { context.LogInfo($"No new query executions, not updating marker."); } context.LogInfo("Function complete."); }