예제 #1
0
        public void TestThrottleStates()
        {
            Throttle throttle = new Throttle(new TokenBucket(1000, 2000));

            throttle.SetError();
            throttle.SetError();
            Assert.Equal(2, throttle.ConsecutiveErrorCount);
            throttle.SetSuccess();
            Assert.Equal(0, throttle.ConsecutiveErrorCount);
        }
        /// <inheritdoc/>
        protected override async Task <PutMetricDataResponse> SendRequestAsync(PutMetricDataRequest putMetricDataRequest)
        {
            // Failover
            if (_failoverSinkEnabled)
            {
                // Failover to Secondary Region
                _ = FailOverToSecondaryRegion(_throttle);
            }

            PutMetricDataResponse response;

            try
            {
                response = await CloudWatchClient.PutMetricDataAsync(putMetricDataRequest);

                _throttle.SetSuccess();
            }
            catch (Exception)
            {
                _throttle.SetError();
                throw;
            }

            return(response);
        }
예제 #3
0
        private async Task ProcessingTask(CancellationToken stopToken)
        {
            var events = new List <Envelope <InputLogEvent> >();

            while (!stopToken.IsCancellationRequested)
            {
                try
                {
                    if (events.Count == 0)
                    {
                        await _queue.GetNextBatchAsync(events, _bufferIntervalMs, stopToken);

                        events.Sort((r1, r2) => r1.Timestamp.CompareTo(r2.Timestamp));
                    }

                    RemoveEmptyRecords(events);

                    await ThrottleAsync(events, stopToken);

                    await SendBatchAsync(events, stopToken);

                    PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX);
                }
                catch (OperationCanceledException) when(stopToken.IsCancellationRequested)
                {
                    break;
                }
                catch (Exception ex)
                {
                    _throttle.SetError();

                    if (IsRecoverableException(ex))
                    {
                        _logger.LogWarning("Encountered recoverable error of type {0}", ex.GetType());
                        Interlocked.Increment(ref _recoverableServiceErrors);
                        Interlocked.Add(ref _recordsFailedRecoverable, events.Count);

                        if (_throttle.ConsecutiveErrorCount > _maxAttempts && _queue.HasSecondaryQueue())
                        {
                            // Re-queue into the secondary queue when we've reached attempt limits
                            // This means that records will be out of order
                            _logger.LogDebug("Requeueing to secondary queue");
                            await _queue.PushSecondaryAsync(events);

                            events.Clear();
                        }
                        continue;
                    }

                    _logger.LogError(ex, "Error sending events (attempt={0})", _throttle.ConsecutiveErrorCount);
                    Interlocked.Increment(ref _nonrecoverableServiceErrors);
                    Interlocked.Add(ref _recordsFailedNonrecoverable, events.Count);
                    events.Clear();
                    continue;
                }
            }
        }
        protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes)
        {
            // If the sink has been stopped, don't keep trying to write to the file.
            if (_isStopped)
            {
                return;
            }

            // Obtain the fileLock semaphore to ensure that only a single process can call this method at any given time.
            // This does not lock the file itself; we rely on the OS to manage file locking when we open the file stream.
            await _fileLock.WaitAsync();

            try
            {
                // Create a stream writer on a new FileStream object pointing to the target output file.
                // This method will fail if the OS already has a lock on the file, which will trigger throttling behavior.
                // This allows us to effectively reproduce throttling at the sink level, and also helps debug issues outside of the IDE.
                using (var sw = new StreamWriter(new FileStream(FilePath, FileMode.Append, FileAccess.Write, FileShare.Read)))
                {
                    foreach (var record in records)
                    {
                        await sw.WriteLineAsync(record.Data.Message);
                    }

                    await sw.FlushAsync();
                }

                await   SaveBookmarks(records);

                Throttle.SetSuccess();
            }
            catch (Exception)
            {
                // If the sink has been stopped, don't keep trying to write to the file.
                if (_isStopped)
                {
                    return;
                }

                // This is the magic that reproduces the throttling behavior.
                // We call the SetError method on the throttle object and requeue the events.
                // This uses the same logic as the CloudWatchLogs sink.
                Throttle.SetError();
                _buffer.Requeue(records, Throttle.ConsecutiveErrorCount < _maxAttempts);
            }
            finally
            {
                // Release the semaphore. This doesn't release any locks on the file.
                _fileLock.Release();
            }
        }
        protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes)
        {
            if (records == null || records.Count == 0)
            {
                return;
            }

            try
            {
                _logger?.LogDebug($"CloudWatchLogsSink client {this.Id} sending {records.Count} records {batchBytes} bytes.");
                DateTime timestamp     = records[0].Timestamp;
                string   logStreamName = ResolveTimestampInLogStreamName(timestamp);
                if (string.IsNullOrEmpty(_sequenceToken))
                {
                    await GetSequenceTokenAsync(logStreamName);
                }

                var request = new PutLogEventsRequest
                {
                    LogGroupName  = _logGroupName,
                    LogStreamName = logStreamName,
                    SequenceToken = _sequenceToken,
                    LogEvents     = records
                                    .Select(e => e.Data)
                                    .OrderBy(e => e.Timestamp) //Added sort here in case messages are from multiple streams and they are not merged in order
                                    .ToList()
                };

                bool attemptedCreatingLogStream = false;
                int  invalidSequenceTokenCount  = 0;
                while (true)
                {
                    DateTime utcNow = DateTime.UtcNow;
                    _clientLatency = (long)records.Average(r => (utcNow - r.Timestamp).TotalMilliseconds);

                    long elapsedMilliseconds = Utility.GetElapsedMilliseconds();
                    try
                    {
                        PutLogEventsResponse response = await _client.PutLogEventsAsync(request);

                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetSuccess();
                        _sequenceToken = response.NextSequenceToken;
                        RejectedLogEventsInfo rejectedLogEventsInfo = response.RejectedLogEventsInfo;
                        _recordsAttempted += records.Count;
                        _bytesAttempted   += batchBytes;
                        if (rejectedLogEventsInfo != null)
                        {
                            StringBuilder sb = new StringBuilder();
                            sb.Append($"CloudWatchLogsSink client {this.Id} some of the logs where rejected.");
                            sb.Append($" ExpiredLogEventEndIndex {rejectedLogEventsInfo.ExpiredLogEventEndIndex}");
                            sb.Append($" TooNewLogEventStartIndex {rejectedLogEventsInfo.TooNewLogEventStartIndex}");
                            sb.Append($" TooOldLogEventEndIndex {rejectedLogEventsInfo.TooOldLogEventEndIndex}");
                            _logger?.LogError(sb.ToString());
                            long recordCount = records.Count - rejectedLogEventsInfo.ExpiredLogEventEndIndex - rejectedLogEventsInfo.TooOldLogEventEndIndex;
                            if (rejectedLogEventsInfo.TooOldLogEventEndIndex > 0)
                            {
                                recordCount -= records.Count - rejectedLogEventsInfo.TooNewLogEventStartIndex;
                            }
                            _recordsSuccess += recordCount;
                            _recordsFailedNonrecoverable += (records.Count - recordCount);
                        }
                        else
                        {
                            _recordsSuccess += records.Count;
                            _logger?.LogDebug($"CloudWatchLogsSink client {this.Id} successfully sent {records.Count} records {batchBytes} bytes.");
                        }

                        this.SaveBookmarks(records);

                        break;
                    }
                    catch (ResourceNotFoundException)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();
                        //Logstream does not exist
                        if (attemptedCreatingLogStream)
                        {
                            _nonrecoverableServiceErrors++;
                            _recordsFailedNonrecoverable += records.Count;
                            throw;
                        }
                        else
                        {
                            _recoverableServiceErrors++;
                            _recordsFailedRecoverable += records.Count;
                            await CreateLogStreamAsync(logStreamName);
                        }
                    }
                    catch (AmazonCloudWatchLogsException ex)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();
                        if (ex is InvalidSequenceTokenException || ex is ServiceUnavailableException)
                        {
                            if (ex is InvalidSequenceTokenException invalidSequenceTokenException)
                            {
                                invalidSequenceTokenCount++;
                                _sequenceToken = invalidSequenceTokenException.GetExpectedSequenceToken();
                                //Sometimes we get a sequence token just say "null". This is obviously invalid
                                if ("null".Equals(_sequenceToken))
                                {
                                    _sequenceToken = null;
                                }
                                else if (_sequenceToken != null && invalidSequenceTokenCount < 2)
                                {
                                    continue; //Immediately try so that the sequence token does not become invalid again
                                }
                            }
                            if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts))
                            {
                                _logger?.LogWarning($"CloudWatchLogsSink client {this.Id} attempt={_throttle.ConsecutiveErrorCount} exception={ex.Message}. Will retry.");
                                _recoverableServiceErrors++;
                                _recordsFailedRecoverable += records.Count;
                                break;
                            }
                        }
                        _recordsFailedNonrecoverable += records.Count;
                        _nonrecoverableServiceErrors++;
                        throw;
                    }
                    catch (Exception)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();
                        _recordsFailedNonrecoverable += records.Count;
                        _nonrecoverableServiceErrors++;
                        throw;
                    }
                }
            }
            catch (Exception ex)
            {
                _logger?.LogError($"CloudWatchLogsSink client {this.Id} exception (attempt={_throttle.ConsecutiveErrorCount}): {ex.ToMinimized()}");
            }

            PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX);
        }
        protected override async Task OnNextAsync(List <Envelope <InputLogEvent> > records, long batchBytes)
        {
            if (records == null || records.Count == 0)
            {
                return;
            }

            try
            {
                _logger?.LogDebug("CloudWatchLogsSink client {0} sending {1} records {2} bytes.", this.Id, records.Count, batchBytes);

                var logStreamName = this.ResolveTimestampInLogStreamName(records[0].Timestamp);

                // If the sequence token is null, try to get it.
                // If the log stream doesn't exist, create it (by specifying "true" in the second parameter).
                // This should be the only place where a log stream is created.
                // This method will ensure that both the log group and stream exists,
                // so there is no need to handle a ResourceNotFound exception later.
                if (string.IsNullOrEmpty(_sequenceToken))
                {
                    await this.GetSequenceTokenAsync(logStreamName, true);
                }

                var request = new PutLogEventsRequest
                {
                    LogGroupName  = _logGroupName,
                    LogStreamName = logStreamName,
                    SequenceToken = _sequenceToken,
                    LogEvents     = records
                                    .Select(e => e.Data)
                                    .OrderBy(e => e.Timestamp) // Added sort here in case messages are from multiple streams and they are not merged in order
                                    .ToList()
                };

                int invalidSequenceTokenCount = 0;
                while (true)
                {
                    var utcNow = DateTime.UtcNow;
                    _clientLatency = (long)records.Average(r => (utcNow - r.Timestamp).TotalMilliseconds);

                    long elapsedMilliseconds = Utility.GetElapsedMilliseconds();
                    try
                    {
                        var response = await _client.PutLogEventsAsync(request);

                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetSuccess();
                        _sequenceToken     = response.NextSequenceToken;
                        _recordsAttempted += records.Count;
                        _bytesAttempted   += batchBytes;

                        var rejectedLogEventsInfo = response.RejectedLogEventsInfo;
                        if (rejectedLogEventsInfo != null)
                        {
                            // Don't do the expensive string building unless we know the logger isn't null.
                            if (_logger != null)
                            {
                                var sb = new StringBuilder()
                                         .AppendFormat("CloudWatchLogsSink client {0} encountered some rejected logs.", this.Id)
                                         .AppendFormat(" ExpiredLogEventEndIndex {0}", rejectedLogEventsInfo.ExpiredLogEventEndIndex)
                                         .AppendFormat(" TooNewLogEventStartIndex {0}", rejectedLogEventsInfo.TooNewLogEventStartIndex)
                                         .AppendFormat(" TooOldLogEventEndIndex {0}", rejectedLogEventsInfo.TooOldLogEventEndIndex);
                                _logger.LogError(sb.ToString());
                            }

                            var recordCount = records.Count - rejectedLogEventsInfo.ExpiredLogEventEndIndex - rejectedLogEventsInfo.TooOldLogEventEndIndex;
                            if (rejectedLogEventsInfo.TooOldLogEventEndIndex > 0)
                            {
                                recordCount -= records.Count - rejectedLogEventsInfo.TooNewLogEventStartIndex;
                            }

                            _recordsFailedNonrecoverable += (records.Count - recordCount);
                        }

                        _logger?.LogDebug("CloudWatchLogsSink client {0} successfully sent {1} records {2} bytes.", this.Id, records.Count, batchBytes);
                        _recordsSuccess += records.Count;
                        this.SaveBookmarks(records);

                        break;
                    }
                    catch (AmazonCloudWatchLogsException ex)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();

                        // InvalidSequenceTokenExceptions are thrown when a PutLogEvents call doesn't have a valid sequence token.
                        // This is usually recoverable, so we'll try twice before requeuing events.
                        if (ex is InvalidSequenceTokenException invalidSequenceTokenException && invalidSequenceTokenCount < 2)
                        {
                            // Increment the invalid sequence token counter, to limit the "instant retries" that we attempt.
                            invalidSequenceTokenCount++;

                            // The exception from CloudWatch contains the sequence token, so we'll try to parse it out.
                            _sequenceToken = invalidSequenceTokenException.GetExpectedSequenceToken();

                            // Sometimes we get a sequence token with a string value of "null".
                            // This is invalid so we'll fetch it again and retry immediately.
                            // If credentials have expired or this request is being throttled,
                            // the wrapper try/catch will capture it and data will
                            if (AWSConstants.NullString.Equals(_sequenceToken))
                            {
                                _sequenceToken = null;
                                await this.GetSequenceTokenAsync(logStreamName, false);
                            }

                            // Reset the sequence token in the request and immediately retry (without requeuing),
                            // so that the sequence token does not become invalid again.
                            request.SequenceToken = _sequenceToken;
                            continue;
                        }

                        // Retry if one of the following was true:
                        // - The exception was thrown because an invalid sequence token was used (more than twice in a row)
                        // - The service was unavailable (transient error or service outage)
                        // - The security token in the credentials has expired (previously this was handled as an unrecoverable error)
                        if (this.IsRecoverableException(ex))
                        {
                            // Try to requeue the records into the buffer.
                            // This will mean that the events in the buffer are now out of order :(
                            // There's nothing we can do about that short of rewriting all the buffering logic.
                            // Having out of order events isn't that bad, because the service that we're uploading
                            // to will be storing them based on their event time anyway. However, this can affect
                            // the persistent bookmarking behavior, since bookmarks are updated based on the
                            // position/eventId in the last batch sent, not what's currently in the buffer.
                            if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts))
                            {
                                _logger?.LogWarning("CloudWatchLogsSink client {0} attempt={1} exception={2}. Will retry.", this.Id, _throttle.ConsecutiveErrorCount, ex.Message);
                                _recoverableServiceErrors++;
                                _recordsFailedRecoverable += records.Count;
                                break;
                            }
                        }

                        _recordsFailedNonrecoverable += records.Count;
                        _nonrecoverableServiceErrors++;
                        throw;
                    }
                    catch (Exception)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();
                        _recordsFailedNonrecoverable += records.Count;
                        _nonrecoverableServiceErrors++;
                        throw;
                    }
                }
            }
            catch (Exception ex)
            {
                _logger?.LogError("CloudWatchLogsSink client {0} exception (attempt={1}): {2}", this.Id, _throttle.ConsecutiveErrorCount, ex.ToMinimized());
            }

            this.PublishMetrics(MetricsConstants.CLOUDWATCHLOG_PREFIX);
        }
예제 #7
0
        /// <summary>
        /// Send the most recent batch of logs to the configured S3 bucket.
        /// </summary>
        /// <param name="records">List of envelope-wrapped logs.</param>
        public async Task SendBatchAsync(List <Envelope <string> > records)
        {
            try
            {
                _logger?.LogDebug("S3Sink id {0} sending {1} logs", Id, records.Count);

                // Create the PutObject request to send to S3
                var request = new PutObjectRequest
                {
                    BucketName = _bucketName,
                    Key        = await GenerateS3ObjectKey(_filePath, _fileName)
                };

                // Turn the list of records into a stream that can be consumed by S3 and will register as a zipped file
                var memStream = new MemoryStream();
                using (var archive = new ZipArchive(memStream, ZipArchiveMode.Create, true))
                {
                    var logFile = archive.CreateEntry("AlarmLogs.txt", CompressionLevel.Optimal);
                    using (var entryStream = logFile.Open())
                        using (var compressedStream = new MemoryStream())
                        {
                            foreach (Envelope <string> record in records)
                            {
                                var byteArray = Encoding.UTF8.GetBytes(record.Data + "\n");
                                compressedStream.Write(byteArray, 0, byteArray.Length);
                            }
                            compressedStream.Seek(0, SeekOrigin.Begin);
                            compressedStream.CopyTo(entryStream);
                        }
                }
                var batchBytes = memStream.Length;
                request.InputStream = memStream;

                // Send the file to S3
                while (true)
                {
                    var utcNow = DateTime.UtcNow;

                    long elapsedMilliseconds = Utility.GetElapsedMilliseconds();
                    try
                    {
                        var response = await S3Client.PutObjectAsync(request);

                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetSuccess();
                        _recordsAttempted += records.Count;
                        _bytesAttempted   += batchBytes;

                        _logger?.LogDebug("S3Sink id {0} successfully sent {1} logs, compressed to {2} bytes",
                                          Id, records.Count, batchBytes);
                        _recordsSuccess += records.Count;
                        await SaveBookmarks(records);

                        break;
                    }
                    catch (AmazonS3Exception ex)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();

                        // Retry PutObjectRequest if possibe
                        if (ex.Retryable != null)
                        {
                            if (_buffer.Requeue(records, _throttle.ConsecutiveErrorCount < _maxAttempts))
                            {
                                _logger?.LogWarning("S3Sink id {0} attempt={1} exception={2}. Will retry.", Id, _throttle.ConsecutiveErrorCount, ex.Message);
                                _recoverableServiceErrors++;
                                _recordsFailedRecoverable += records.Count;
                                break;
                            }
                        }

                        _recordsFailedNonrecoverable += records.Count;
                        _nonrecoverableServiceErrors++;
                        throw;
                    }
                    catch (Exception)
                    {
                        _latency = Utility.GetElapsedMilliseconds() - elapsedMilliseconds;
                        _throttle.SetError();
                        _recordsFailedNonrecoverable += records.Count;
                        _nonrecoverableServiceErrors++;
                        throw;
                    }
                }
            }
            catch (Exception ex)
            {
                _logger?.LogError("S3Sink id {0} exception (attempt={1}): {2}", Id, _throttle.ConsecutiveErrorCount, ex.ToMinimized());
            }

            PublishMetrics(MetricsConstants.S3_PREFIX);
        }