public void TestThrottleStates() { Throttle throttle = new Throttle(new TokenBucket(1000, 2000)); throttle.SetError(); throttle.SetError(); Assert.Equal(2, throttle.ConsecutiveErrorCount); throttle.SetSuccess(); Assert.Equal(0, throttle.ConsecutiveErrorCount); }
/// <inheritdoc/> protected override async Task <PutMetricDataResponse> SendRequestAsync(PutMetricDataRequest putMetricDataRequest) { // Failover if (_failoverSinkEnabled) { // Failover to Secondary Region _ = FailOverToSecondaryRegion(_throttle); } PutMetricDataResponse response; try { response = await CloudWatchClient.PutMetricDataAsync(putMetricDataRequest); _throttle.SetSuccess(); } catch (Exception) { _throttle.SetError(); throw; } return(response); }
private async Task ProcessingTask(CancellationToken stopToken) { var events = new List <Envelope <InputLogEvent> >(); while (!stopToken.IsCancellationRequested) { try { if (events.Count == 0) { await _queue.GetNextBatchAsync(events, _bufferIntervalMs, stopToken); events.Sort((r1, r2) => r1.Timestamp.CompareTo(r2.Timestamp)); } RemoveEmptyRecords(events); await ThrottleAsync(events, stopToken); await SendBatchAsync(events, stopToken); PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX); } catch (OperationCanceledException) when(stopToken.IsCancellationRequested) { break; } catch (Exception ex) { _throttle.SetError(); if (IsRecoverableException(ex)) { _logger.LogWarning("Encountered recoverable error of type {0}", ex.GetType()); Interlocked.Increment(ref _recoverableServiceErrors); Interlocked.Add(ref _recordsFailedRecoverable, events.Count); if (_throttle.ConsecutiveErrorCount > _maxAttempts && _queue.HasSecondaryQueue()) { // Re-queue into the secondary queue when we've reached attempt limits // This means that records will be out of order _logger.LogDebug("Requeueing to secondary queue"); await _queue.PushSecondaryAsync(events); events.Clear(); } continue; } _logger.LogError(ex, "Error sending events (attempt={0})", _throttle.ConsecutiveErrorCount); Interlocked.Increment(ref _nonrecoverableServiceErrors); Interlocked.Add(ref _recordsFailedNonrecoverable, events.Count); events.Clear(); continue; } } }
protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes) { // If the sink has been stopped, don't keep trying to write to the file. if (_isStopped) { return; } // Obtain the fileLock semaphore to ensure that only a single process can call this method at any given time. // This does not lock the file itself; we rely on the OS to manage file locking when we open the file stream. await _fileLock.WaitAsync(); try { // Create a stream writer on a new FileStream object pointing to the target output file. // This method will fail if the OS already has a lock on the file, which will trigger throttling behavior. // This allows us to effectively reproduce throttling at the sink level, and also helps debug issues outside of the IDE. using (var sw = new StreamWriter(new FileStream(FilePath, FileMode.Append, FileAccess.Write, FileShare.Read))) { foreach (var record in records) { await sw.WriteLineAsync(record.Data.Message); } await sw.FlushAsync(); } await SaveBookmarks(records); Throttle.SetSuccess(); } catch (Exception) { // If the sink has been stopped, don't keep trying to write to the file. if (_isStopped) { return; } // This is the magic that reproduces the throttling behavior. // We call the SetError method on the throttle object and requeue the events. // This uses the same logic as the CloudWatchLogs sink. Throttle.SetError(); _buffer.Requeue(records, Throttle.ConsecutiveErrorCount < _maxAttempts); } finally { // Release the semaphore. This doesn't release any locks on the file. _fileLock.Release(); } }
protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes) { if (records == null || records.Count == 0) { return; } try { _logger?.LogDebug($"CloudWatchLogsSink client {this.Id} sending {records.Count} records {batchBytes} bytes."); DateTime timestamp = records[0].Timestamp; string logStreamName = ResolveTimestampInLogStreamName(timestamp); if (string.IsNullOrEmpty(_sequenceToken)) { await GetSequenceTokenAsync(logStreamName); } var request = new PutLogEventsRequest { LogGroupName = _logGroupName, LogStreamName = logStreamName, SequenceToken = _sequenceToken, LogEvents = records .Select(e => e.Data) .OrderBy(e => e.Timestamp) //Added sort here in case messages are from multiple streams and they are not merged in order .ToList() }; bool attemptedCreatingLogStream = false; int invalidSequenceTokenCount = 0; while (true) { DateTime utcNow = DateTime.UtcNow; _clientLatency = (long)records.Average(r => (utcNow - r.Timestamp).TotalMilliseconds); long elapsedMilliseconds = Utility.GetElapsedMilliseconds(); try { PutLogEventsResponse response = await _client.PutLogEventsAsync(request); _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetSuccess(); _sequenceToken = response.NextSequenceToken; RejectedLogEventsInfo rejectedLogEventsInfo = response.RejectedLogEventsInfo; _recordsAttempted += records.Count; _bytesAttempted += batchBytes; if (rejectedLogEventsInfo != null) { StringBuilder sb = new StringBuilder(); sb.Append($"CloudWatchLogsSink client {this.Id} some of the logs where rejected."); sb.Append($" ExpiredLogEventEndIndex {rejectedLogEventsInfo.ExpiredLogEventEndIndex}"); sb.Append($" TooNewLogEventStartIndex {rejectedLogEventsInfo.TooNewLogEventStartIndex}"); sb.Append($" TooOldLogEventEndIndex {rejectedLogEventsInfo.TooOldLogEventEndIndex}"); _logger?.LogError(sb.ToString()); long recordCount = records.Count - rejectedLogEventsInfo.ExpiredLogEventEndIndex - rejectedLogEventsInfo.TooOldLogEventEndIndex; if (rejectedLogEventsInfo.TooOldLogEventEndIndex > 0) { recordCount -= records.Count - rejectedLogEventsInfo.TooNewLogEventStartIndex; } _recordsSuccess += recordCount; _recordsFailedNonrecoverable += (records.Count - recordCount); } else { _recordsSuccess += records.Count; _logger?.LogDebug($"CloudWatchLogsSink client {this.Id} successfully sent {records.Count} records {batchBytes} bytes."); } this.SaveBookmarks(records); break; } catch (ResourceNotFoundException) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); //Logstream does not exist if (attemptedCreatingLogStream) { _nonrecoverableServiceErrors++; _recordsFailedNonrecoverable += records.Count; throw; } else { _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; await CreateLogStreamAsync(logStreamName); } } catch (AmazonCloudWatchLogsException ex) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); if (ex is InvalidSequenceTokenException || ex is ServiceUnavailableException) { if (ex is InvalidSequenceTokenException invalidSequenceTokenException) { invalidSequenceTokenCount++; _sequenceToken = invalidSequenceTokenException.GetExpectedSequenceToken(); //Sometimes we get a sequence token just say "null". This is obviously invalid if ("null".Equals(_sequenceToken)) { _sequenceToken = null; } else if (_sequenceToken != null && invalidSequenceTokenCount < 2) { continue; //Immediately try so that the sequence token does not become invalid again } } if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts)) { _logger?.LogWarning($"CloudWatchLogsSink client {this.Id} attempt={_throttle.ConsecutiveErrorCount} exception={ex.Message}. Will retry."); _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; break; } } _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } catch (Exception) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } } } catch (Exception ex) { _logger?.LogError($"CloudWatchLogsSink client {this.Id} exception (attempt={_throttle.ConsecutiveErrorCount}): {ex.ToMinimized()}"); } PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX); }
protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes) { if (records == null || records.Count == 0) { return; } try { _logger?.LogDebug("CloudWatchLogsSink client {0} sending {1} records {2} bytes.", this.Id, records.Count, batchBytes); var logStreamName = this.ResolveTimestampInLogStreamName(records[0].Timestamp); // If the sequence token is null, try to get it. // If the log stream doesn't exist, create it (by specifying "true" in the second parameter). // This should be the only place where a log stream is created. // This method will ensure that both the log group and stream exists, // so there is no need to handle a ResourceNotFound exception later. if (string.IsNullOrEmpty(_sequenceToken)) { await this.GetSequenceTokenAsync(logStreamName, true); } var request = new PutLogEventsRequest { LogGroupName = _logGroupName, LogStreamName = logStreamName, SequenceToken = _sequenceToken, LogEvents = records .Select(e => e.Data) .OrderBy(e => e.Timestamp) // Added sort here in case messages are from multiple streams and they are not merged in order .ToList() }; int invalidSequenceTokenCount = 0; while (true) { var utcNow = DateTime.UtcNow; _clientLatency = (long)records.Average(r => (utcNow - r.Timestamp).TotalMilliseconds); long elapsedMilliseconds = Utility.GetElapsedMilliseconds(); try { var response = await _client.PutLogEventsAsync(request); _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetSuccess(); _sequenceToken = response.NextSequenceToken; _recordsAttempted += records.Count; _bytesAttempted += batchBytes; var rejectedLogEventsInfo = response.RejectedLogEventsInfo; if (rejectedLogEventsInfo != null) { // Don't do the expensive string building unless we know the logger isn't null. if (_logger != null) { var sb = new StringBuilder() .AppendFormat("CloudWatchLogsSink client {0} encountered some rejected logs.", this.Id) .AppendFormat(" ExpiredLogEventEndIndex {0}", rejectedLogEventsInfo.ExpiredLogEventEndIndex) .AppendFormat(" TooNewLogEventStartIndex {0}", rejectedLogEventsInfo.TooNewLogEventStartIndex) .AppendFormat(" TooOldLogEventEndIndex {0}", rejectedLogEventsInfo.TooOldLogEventEndIndex); _logger.LogError(sb.ToString()); } var recordCount = records.Count - rejectedLogEventsInfo.ExpiredLogEventEndIndex - rejectedLogEventsInfo.TooOldLogEventEndIndex; if (rejectedLogEventsInfo.TooOldLogEventEndIndex > 0) { recordCount -= records.Count - rejectedLogEventsInfo.TooNewLogEventStartIndex; } _recordsFailedNonrecoverable += (records.Count - recordCount); } _logger?.LogDebug("CloudWatchLogsSink client {0} successfully sent {1} records {2} bytes.", this.Id, records.Count, batchBytes); _recordsSuccess += records.Count; this.SaveBookmarks(records); break; } catch (AmazonCloudWatchLogsException ex) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); // InvalidSequenceTokenExceptions are thrown when a PutLogEvents call doesn't have a valid sequence token. // This is usually recoverable, so we'll try twice before requeuing events. if (ex is InvalidSequenceTokenException invalidSequenceTokenException && invalidSequenceTokenCount < 2) { // Increment the invalid sequence token counter, to limit the "instant retries" that we attempt. invalidSequenceTokenCount++; // The exception from CloudWatch contains the sequence token, so we'll try to parse it out. _sequenceToken = invalidSequenceTokenException.GetExpectedSequenceToken(); // Sometimes we get a sequence token with a string value of "null". // This is invalid so we'll fetch it again and retry immediately. // If credentials have expired or this request is being throttled, // the wrapper try/catch will capture it and data will if (AWSConstants.NullString.Equals(_sequenceToken)) { _sequenceToken = null; await this.GetSequenceTokenAsync(logStreamName, false); } // Reset the sequence token in the request and immediately retry (without requeuing), // so that the sequence token does not become invalid again. request.SequenceToken = _sequenceToken; continue; } // Retry if one of the following was true: // - The exception was thrown because an invalid sequence token was used (more than twice in a row) // - The service was unavailable (transient error or service outage) // - The security token in the credentials has expired (previously this was handled as an unrecoverable error) if (this.IsRecoverableException(ex)) { // Try to requeue the records into the buffer. // This will mean that the events in the buffer are now out of order :( // There's nothing we can do about that short of rewriting all the buffering logic. // Having out of order events isn't that bad, because the service that we're uploading // to will be storing them based on their event time anyway. However, this can affect // the persistent bookmarking behavior, since bookmarks are updated based on the // position/eventId in the last batch sent, not what's currently in the buffer. if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts)) { _logger?.LogWarning("CloudWatchLogsSink client {0} attempt={1} exception={2}. Will retry.", this.Id, _throttle.ConsecutiveErrorCount, ex.Message); _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; break; } } _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } catch (Exception) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } } } catch (Exception ex) { _logger?.LogError("CloudWatchLogsSink client {0} exception (attempt={1}): {2}", this.Id, _throttle.ConsecutiveErrorCount, ex.ToMinimized()); } this.PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX); }
/// <summary> /// Send the most recent batch of logs to the configured S3 bucket. /// </summary> /// <param name="records">List of envelope-wrapped logs.</param> public async Task SendBatchAsync(List <Envelope <string> > records) { try { _logger?.LogDebug("S3Sink id {0} sending {1} logs", Id, records.Count); // Create the PutObject request to send to S3 var request = new PutObjectRequest { BucketName = _bucketName, Key = await GenerateS3ObjectKey(_filePath, _fileName) }; // Turn the list of records into a stream that can be consumed by S3 and will register as a zipped file var memStream = new MemoryStream(); using (var archive = new ZipArchive(memStream, ZipArchiveMode.Create, true)) { var logFile = archive.CreateEntry("AlarmLogs.txt", CompressionLevel.Optimal); using (var entryStream = logFile.Open()) using (var compressedStream = new MemoryStream()) { foreach (Envelope <string> record in records) { var byteArray = Encoding.UTF8.GetBytes(record.Data + "\n"); compressedStream.Write(byteArray, 0, byteArray.Length); } compressedStream.Seek(0, SeekOrigin.Begin); compressedStream.CopyTo(entryStream); } } var batchBytes = memStream.Length; request.InputStream = memStream; // Send the file to S3 while (true) { var utcNow = DateTime.UtcNow; long elapsedMilliseconds = Utility.GetElapsedMilliseconds(); try { var response = await S3Client.PutObjectAsync(request); _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetSuccess(); _recordsAttempted += records.Count; _bytesAttempted += batchBytes; _logger?.LogDebug("S3Sink id {0} successfully sent {1} logs, compressed to {2} bytes", Id, records.Count, batchBytes); _recordsSuccess += records.Count; await SaveBookmarks(records); break; } catch (AmazonS3Exception ex) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); // Retry PutObjectRequest if possibe if (ex.Retryable != null) { if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts)) { _logger?.LogWarning("S3Sink id {0} attempt={1} exception={2}. Will retry.", Id, _throttle.ConsecutiveErrorCount, ex.Message); _recoverableServiceErrors++; _recordsFailedRecoverable += records.Count; break; } } _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } catch (Exception) { _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds; _throttle.SetError(); _recordsFailedNonrecoverable += records.Count; _nonrecoverableServiceErrors++; throw; } } } catch (Exception ex) { _logger?.LogError("S3Sink id {0} exception (attempt={1}): {2}", Id, _throttle.ConsecutiveErrorCount, ex.ToMinimized()); } PublishMetrics(MetricsConstants.S3_PREFIX); }