public override void Stop() { _isStopped = true; Throttle.SetSuccess(); _buffer.Stop(); base.Stop(); }
/// <inheritdoc/> protected override async Task <PutMetricDataResponse> SendRequestAsync(PutMetricDataRequest putMetricDataRequest) { // Failover if (_failoverSinkEnabled) { // Failover to Secondary Region _ = FailOverToSecondaryRegion(_throttle); } PutMetricDataResponse response; try { response = await CloudWatchClient.PutMetricDataAsync(putMetricDataRequest); _throttle.SetSuccess(); } catch (Exception) { _throttle.SetError(); throw; } return(response); }
private async Task GetSequenceTokenAsync(string logGroup, string logStreamName, bool createStreamIfNotExists, CancellationToken stopToken) { var request = new DescribeLogStreamsRequest { LogGroupName = logGroup, LogStreamNamePrefix = logStreamName }; try { var describeLogsStreamsResponse = await _cloudWatchLogsClient.DescribeLogStreamsAsync(request, stopToken); var logStream = describeLogsStreamsResponse.LogStreams .FirstOrDefault(s => s.LogStreamName.Equals(logStreamName, StringComparison.CurrentCultureIgnoreCase)); if (logStream == null) { if (createStreamIfNotExists) { await CreateLogStreamAsync(logGroup, logStreamName, stopToken); } } else { _sequenceToken = logStream.UploadSequenceToken; } _throttle.SetSuccess(); } catch (ResourceNotFoundException rex) { // Create the log group if it doesn't exist. if (rex.Message.Contains("log group does not exist", StringComparison.OrdinalIgnoreCase)) { _logger?.LogInformation("Log group {0} does not exist. Creating it.", logGroup); await CreateLogGroupAsync(logGroup, stopToken); if (createStreamIfNotExists) { await CreateLogStreamAsync(logGroup, logStreamName, stopToken); } return; } } }
public void TestThrottleStates() { Throttle throttle = new Throttle(new TokenBucket(1000, 2000)); throttle.SetError(); throttle.SetError(); Assert.Equal(2, throttle.ConsecutiveErrorCount); throttle.SetSuccess(); Assert.Equal(0, throttle.ConsecutiveErrorCount); }
protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes) { // If the sink has been stopped, don't keep trying to write to the file. if (_isStopped) { return; } // Obtain the fileLock semaphore to ensure that only a single process can call this method at any given time. // This does not lock the file itself; we rely on the OS to manage file locking when we open the file stream. await _fileLock.WaitAsync(); try { // Create a stream writer on a new FileStream object pointing to the target output file. // This method will fail if the OS already has a lock on the file, which will trigger throttling behavior. // This allows us to effectively reproduce throttling at the sink level, and also helps debug issues outside of the IDE. using (var sw = new StreamWriter(new FileStream(FilePath, FileMode.Append, FileAccess.Write, FileShare.Read))) { foreach (var record in records) { await sw.WriteLineAsync(record.Data.Message); } await sw.FlushAsync(); } await SaveBookmarks(records); Throttle.SetSuccess(); } catch (Exception) { // If the sink has been stopped, don't keep trying to write to the file. if (_isStopped) { return; } // This is the magic that reproduces the throttling behavior. // We call the SetError method on the throttle object and requeue the events. // This uses the same logic as the CloudWatchLogs sink. Throttle.SetError(); _buffer.Requeue(records, Throttle.ConsecutiveErrorCount < _maxAttempts); } finally { // Release the semaphore. This doesn't release any locks on the file. _fileLock.Release(); } }
/// <inheritdoc/> public TAWSClient FailbackToPrimaryRegion(Throttle throttle) { _logger?.LogDebug($"FailoverSink id {Id} is trying to fail back to primary region."); // Setup client with Primary Region var client = _failoverSinkRegionStrategy.GetPrimaryRegionClient(); if (client is not null) { // Reset Throttle throttle.SetSuccess(); _logger?.LogInformation($"FailoverSink id {Id} failed back successfully to primary region {_failoverSinkRegionStrategy.GetCurrentRegion().Region.SystemName}."); return(client); } else { _logger?.LogDebug($"FailoverSink id {Id} fail back to primary region unsuccessful, primary region currently is down or in use."); } return(null); }
/// <inheritdoc/> public TAWSClient FailOverToSecondaryRegion(Throttle throttle) { // Stop timer if no errors if (throttle.ConsecutiveErrorCount == 0) { _secondaryRegionFailoverTimer.Stop(); _secondaryRegionFailoverActivated = false; } // Start timer on first error else if (throttle.ConsecutiveErrorCount > 0 && !_secondaryRegionFailoverTimer.Enabled) { _secondaryRegionFailoverTimer.Start(); } // Failover to Secondary Region if available // Reaching maximum consecutive error counts or timeout if (throttle.ConsecutiveErrorCount >= _maxErrorsCountBeforeFailover || _secondaryRegionFailoverActivated) { _logger?.LogWarning($"FailoverSink id {Id} max consecutive errors count {throttle.ConsecutiveErrorCount}, trying to fail over to secondary region."); // Setup client with Secondary Region var client = _failoverSinkRegionStrategy.GetSecondaryRegionClient(); if (client is not null) { // Reset Throttle throttle.SetSuccess(); _logger?.LogInformation($"FailoverSink id {Id} after reaching max consecutive errors limit to {throttle.ConsecutiveErrorCount}, failed over successfully to secondary region {_failoverSinkRegionStrategy.GetCurrentRegion().Region.SystemName}."); return(client); } else { _logger?.LogError($"FailoverSink id {Id} fail over to secondary region unsuccessful, looks like all of secondary regions are currently down."); } } return(null); }
protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes) { if (records == null || records.Count == 0) { return; } try { _logger?.LogDebug($"CloudWatchLogsSink client {this.Id} sending {records.Count} records {batchBytes} bytes."); DateTime timestamp = records[0].Timestamp; string logStreamName = ResolveTimestampInLogStreamName(timestamp); if (string.IsNullOrEmpty(_sequenceToken)) { await GetSequenceTokenAsync(logStreamName); } var request = new PutLogEventsRequest { LogGroupName = _logGroupName, LogStreamName = logStreamName, SequenceToken = _sequenceToken, LogEvents = records .Select(e => e.Data) .OrderBy(e => e.Timestamp) //Added sort here in case messages are from multiple streams and they are not merged in order .ToList() }; bool attemptedCreatingLogStream = false; int invalidSequenceTokenCount = 0; while (true) { DateTime utcNow = DateTime.UtcNow; _clientLatency = (long)records.Average(r => (utcNow - r.Timestamp).TotalMilliseconds); long elapsedMilliseconds = Utility.GetElapsedMilliseconds(); try { PutLogEventsResponse response = await _client.PutLogEventsAsync(request); _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetSuccess(); _sequenceToken = response.NextSequenceToken; RejectedLogEventsInfo rejectedLogEventsInfo = response.RejectedLogEventsInfo; _recordsAttempted += records.Count; _bytesAttempted += batchBytes; if (rejectedLogEventsInfo != null) { StringBuilder sb = new StringBuilder(); sb.Append($"CloudWatchLogsSink client {this.Id} some of the logs where rejected."); sb.Append($" ExpiredLogEventEndIndex {rejectedLogEventsInfo.ExpiredLogEventEndIndex}"); sb.Append($" TooNewLogEventStartIndex {rejectedLogEventsInfo.TooNewLogEventStartIndex}"); sb.Append($" TooOldLogEventEndIndex {rejectedLogEventsInfo.TooOldLogEventEndIndex}"); _logger?.LogError(sb.ToString()); long recordCount = records.Count - rejectedLogEventsInfo.ExpiredLogEventEndIndex - rejectedLogEventsInfo.TooOldLogEventEndIndex; if (rejectedLogEventsInfo.TooOldLogEventEndIndex > 0) { recordCount -= records.Count - rejectedLogEventsInfo.TooNewLogEventStartIndex; } _recordsSuccess += recordCount; _recordsFailedNonrecoverable += (records.Count - recordCount); } else { _recordsSuccess += records.Count; _logger?.LogDebug($"CloudWatchLogsSink client {this.Id} successfully sent {records.Count} records {batchBytes} bytes."); } this.SaveBookmarks(records); break; } catch (ResourceNotFoundException) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); //Logstream does not exist if (attemptedCreatingLogStream) { _nonrecoverableServiceErrors++; _recordsFailedNonrecoverable += records.Count; throw; } else { _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; await CreateLogStreamAsync(logStreamName); } } catch (AmazonCloudWatchLogsException ex) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); if (ex is InvalidSequenceTokenException || ex is ServiceUnavailableException) { if (ex is InvalidSequenceTokenException invalidSequenceTokenException) { invalidSequenceTokenCount++; _sequenceToken = invalidSequenceTokenException.GetExpectedSequenceToken(); //Sometimes we get a sequence token just say "null". This is obviously invalid if ("null".Equals(_sequenceToken)) { _sequenceToken = null; } else if (_sequenceToken != null && invalidSequenceTokenCount < 2) { continue; //Immediately try so that the sequence token does not become invalid again } } if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts)) { _logger?.LogWarning($"CloudWatchLogsSink client {this.Id} attempt={_throttle.ConsecutiveErrorCount} exception={ex.Message}. Will retry."); _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; break; } } _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } catch (Exception) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } } } catch (Exception ex) { _logger?.LogError($"CloudWatchLogsSink client {this.Id} exception (attempt={_throttle.ConsecutiveErrorCount}): {ex.ToMinimized()}"); } PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX); }
protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes) { if (records == null || records.Count == 0) { return; } try { _logger?.LogDebug("CloudWatchLogsSink client {0} sending {1} records {2} bytes.", this.Id, records.Count, batchBytes); var logStreamName = this.ResolveTimestampInLogStreamName(records[0].Timestamp); // If the sequence token is null, try to get it. // If the log stream doesn't exist, create it (by specifying "true" in the second parameter). // This should be the only place where a log stream is created. // This method will ensure that both the log group and stream exists, // so there is no need to handle a ResourceNotFound exception later. if (string.IsNullOrEmpty(_sequenceToken)) { await this.GetSequenceTokenAsync(logStreamName, true); } var request = new PutLogEventsRequest { LogGroupName = _logGroupName, LogStreamName = logStreamName, SequenceToken = _sequenceToken, LogEvents = records .Select(e => e.Data) .OrderBy(e => e.Timestamp) // Added sort here in case messages are from multiple streams and they are not merged in order .ToList() }; int invalidSequenceTokenCount = 0; while (true) { var utcNow = DateTime.UtcNow; _clientLatency = (long)records.Average(r => (utcNow - r.Timestamp).TotalMilliseconds); long elapsedMilliseconds = Utility.GetElapsedMilliseconds(); try { var response = await _client.PutLogEventsAsync(request); _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetSuccess(); _sequenceToken = response.NextSequenceToken; _recordsAttempted += records.Count; _bytesAttempted += batchBytes; var rejectedLogEventsInfo = response.RejectedLogEventsInfo; if (rejectedLogEventsInfo != null) { // Don't do the expensive string building unless we know the logger isn't null. if (_logger != null) { var sb = new StringBuilder() .AppendFormat("CloudWatchLogsSink client {0} encountered some rejected logs.", this.Id) .AppendFormat(" ExpiredLogEventEndIndex {0}", rejectedLogEventsInfo.ExpiredLogEventEndIndex) .AppendFormat(" TooNewLogEventStartIndex {0}", rejectedLogEventsInfo.TooNewLogEventStartIndex) .AppendFormat(" TooOldLogEventEndIndex {0}", rejectedLogEventsInfo.TooOldLogEventEndIndex); _logger.LogError(sb.ToString()); } var recordCount = records.Count - rejectedLogEventsInfo.ExpiredLogEventEndIndex - rejectedLogEventsInfo.TooOldLogEventEndIndex; if (rejectedLogEventsInfo.TooOldLogEventEndIndex > 0) { recordCount -= records.Count - rejectedLogEventsInfo.TooNewLogEventStartIndex; } _recordsFailedNonrecoverable += (records.Count - recordCount); } _logger?.LogDebug("CloudWatchLogsSink client {0} successfully sent {1} records {2} bytes.", this.Id, records.Count, batchBytes); _recordsSuccess += records.Count; this.SaveBookmarks(records); break; } catch (AmazonCloudWatchLogsException ex) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); // InvalidSequenceTokenExceptions are thrown when a PutLogEvents call doesn't have a valid sequence token. // This is usually recoverable, so we'll try twice before requeuing events. if (ex is InvalidSequenceTokenException invalidSequenceTokenException && invalidSequenceTokenCount < 2) { // Increment the invalid sequence token counter, to limit the "instant retries" that we attempt. invalidSequenceTokenCount++; // The exception from CloudWatch contains the sequence token, so we'll try to parse it out. _sequenceToken = invalidSequenceTokenException.GetExpectedSequenceToken(); // Sometimes we get a sequence token with a string value of "null". // This is invalid so we'll fetch it again and retry immediately. // If credentials have expired or this request is being throttled, // the wrapper try/catch will capture it and data will if (AWSConstants.NullString.Equals(_sequenceToken)) { _sequenceToken = null; await this.GetSequenceTokenAsync(logStreamName, false); } // Reset the sequence token in the request and immediately retry (without requeuing), // so that the sequence token does not become invalid again. request.SequenceToken = _sequenceToken; continue; } // Retry if one of the following was true: // - The exception was thrown because an invalid sequence token was used (more than twice in a row) // - The service was unavailable (transient error or service outage) // - The security token in the credentials has expired (previously this was handled as an unrecoverable error) if (this.IsRecoverableException(ex)) { // Try to requeue the records into the buffer. // This will mean that the events in the buffer are now out of order :( // There's nothing we can do about that short of rewriting all the buffering logic. // Having out of order events isn't that bad, because the service that we're uploading // to will be storing them based on their event time anyway. However, this can affect // the persistent bookmarking behavior, since bookmarks are updated based on the // position/eventId in the last batch sent, not what's currently in the buffer. if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts)) { _logger?.LogWarning("CloudWatchLogsSink client {0} attempt={1} exception={2}. Will retry.", this.Id, _throttle.ConsecutiveErrorCount, ex.Message); _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; break; } } _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } catch (Exception) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } } } catch (Exception ex) { _logger?.LogError("CloudWatchLogsSink client {0} exception (attempt={1}): {2}", this.Id, _throttle.ConsecutiveErrorCount, ex.ToMinimized()); } this.PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX); }
/// <summary> /// Send the most recent batch of logs to the configured S3 bucket. /// </summary> /// <param name="records">List of envelope-wrapped logs.</param> public async Task SendBatchAsync(List <Envelope <string> > records) { try { _logger?.LogDebug("S3Sink id {0} sending {1} logs", Id, records.Count); // Create the PutObject request to send to S3 var request = new PutObjectRequest { BucketName = _bucketName, Key = await GenerateS3ObjectKey(_filePath, _fileName) }; // Turn the list of records into a stream that can be consumed by S3 and will register as a zipped file var memStream = new MemoryStream(); using (var archive = new ZipArchive(memStream, ZipArchiveMode.Create, true)) { var logFile = archive.CreateEntry("AlarmLogs.txt", CompressionLevel.Optimal); using (var entryStream = logFile.Open()) using (var compressedStream = new MemoryStream()) { foreach (Envelope <string> record in records) { var byteArray = Encoding.UTF8.GetBytes(record.Data + "\n"); compressedStream.Write(byteArray, 0, byteArray.Length); } compressedStream.Seek(0, SeekOrigin.Begin); compressedStream.CopyTo(entryStream); } } var batchBytes = memStream.Length; request.InputStream = memStream; // Send the file to S3 while (true) { var utcNow = DateTime.UtcNow; long elapsedMilliseconds = Utility.GetElapsedMilliseconds(); try { var response = await S3Client.PutObjectAsync(request); _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetSuccess(); _recordsAttempted += records.Count; _bytesAttempted += batchBytes; _logger?.LogDebug("S3Sink id {0} successfully sent {1} logs, compressed to {2} bytes", Id, records.Count, batchBytes); _recordsSuccess += records.Count; await SaveBookmarks(records); break; } catch (AmazonS3Exception ex) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); // Retry PutObjectRequest if possibe if (ex.Retryable != null) { if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts)) { _logger?.LogWarning("S3Sink id {0} attempt={1} exception={2}. Will retry.", Id, _throttle.ConsecutiveErrorCount, ex.Message); _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; break; } } _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } catch (Exception) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } } } catch (Exception ex) { _logger?.LogError("S3Sink id {0} exception (attempt={1}): {2}", Id, _throttle.ConsecutiveErrorCount, ex.ToMinimized()); } PublishMetrics(MetricsConstants.S3_PREFIX); }