/* Note about encoding. * The RFC-5424 states that most of the syslog message should be encoded as plain ASCII string * - except values of parameters in StructuredData section; these are allowed to be in Unicode/UTF-8. * Since * any valid ASCII text is valid UTF-8 text * ... we use UTF-8 for reading the payload, so we can read correctly the entire mix. * So in this case the recommendation in the RFC doc is primarily for writers (log producers) * to stay with ASCII most of the time, with occasional values in UTF-8. * We on the other hand, as reader, are 'forgiving', reading the entire text as UTF-8 message. * * About BOM: the RFC doc states that [Message] portion of the payload (the tail part) can start with * the BOM - byte order mark - to indicate Unicode content. * We strip the BOM off when we parse the payload, as it brings troubles if left in the string * - just from past experience, it is invisible, debugger does not show it, but it can break some string * operations. */ // log file version of RunParserLoop private void RunParserLoop(object data) { while (_running) { if (_logFileBuffer.Count == 0) { Thread.Sleep(20); } try { Interlocked.Increment(ref _activeParserLoopCount); var logfileEntries = _logFileBuffer.DequeueMany(MessageBatchSize); foreach (var entry in logfileEntries) { Interlocked.Increment(ref _logFileEntryCount); var text = TransformToRFC(entry); var ctx = new ParserContext(text); // If tryParse returns false, it means it is not syslog at all if (!(entry == "") && _parser.TryParse(ctx)) { Interlocked.Increment(ref _syslogMessageCount); var serverEntry = new ServerSyslogEntry() { Payload = entry, Entry = ctx.Entry, ParseErrorMessages = ctx.ErrorMessages }; OnEntryReceived(serverEntry); // run filter if (Filter != null && !Filter(serverEntry)) { Interlocked.Increment(ref _skippedMessageCount); serverEntry.Ignore = true; } else { Broadcast(serverEntry); } } if (_syslogMessageCount % 10 == 0) { Thread.Yield(); // play nice - yield CPU regularly; note - this is very CPU-heavy loop, no IO } } } catch (Exception ex) { if (!_running) { return; } OnError(ex); } finally { Interlocked.Decrement(ref _activeParserLoopCount); } } } //method
private void Parse(string entry) { // giving CPU a break every so often if (_syslogEntryCount % 10 == 0) { _syslogEntryCount = 0; Thread.Yield(); } // Transform to RFC var text = $"<{_defaultPriority}> {entry}"; // Begin parsing using Microsoft.Syslog var ctx = new ParserContext(text); if (_syslogParser.TryParse(ctx)) { var serverEntry = new ServerSyslogEntry() { Payload = entry, Entry = ctx.Entry, ParseErrorMessages = ctx.ErrorMessages }; var dict = SyslogEntryToDictionaryConverter.Convert(serverEntry); _eventStream.Broadcast(dict); } }
/* Note about encoding. * The RFC-5424 states that most of the syslog message should be encoded as plain ASCII string * - except values of parameters in StructuredData section; these are allowed to be in Unicode/UTF-8. * Since * any valid ASCII text is valid UTF-8 text * ... we use UTF-8 for reading the payload, so we can read correctly the entire mix. * So in this case the recommendation in the RFC doc is primarily for writers (log producers) * to stay with ASCII most of the time, with occasional values in UTF-8. * We on the other hand, as reader, are 'forgiving', reading the entire text as UTF-8 message. * * About BOM: the RFC doc states that [Message] portion of the payload (the tail part) can start with * the BOM - byte order mark - to indicate Unicode content. * We strip the BOM off when we parse the payload, as it brings troubles if left in the string * - just from past experience, it is invisible, debugger does not show it, but it can break some string * operations. */ private void RunParserLoop(object data) { while (_running) { if (_udpBuffer.Count == 0) { Thread.Sleep(20); } try { Interlocked.Increment(ref _activeParserLoopCount); var udpPackets = _udpBuffer.DequeueMany(MessageBatchSize); foreach (var packet in udpPackets) { Interlocked.Increment(ref _udpPacketCount); // See note above about encoding var text = Encoding.UTF8.GetString(packet.Data); var ctx = new ParserContext(text); // If tryParse returns false, it means it is not syslog at all if (_parser.TryParse(ctx)) { Interlocked.Increment(ref _syslogMessageCount); var serverEntry = new ServerSyslogEntry() { UdpPacket = packet, Payload = text, Entry = ctx.Entry, ParseErrorMessages = ctx.ErrorMessages }; OnEntryReceived(serverEntry); // run filter if (Filter != null && !Filter(serverEntry)) { Interlocked.Increment(ref _skippedMessageCount); serverEntry.Ignore = true; } else { Broadcast(serverEntry); } } if (_syslogMessageCount % 10 == 0) { Thread.Yield(); // play nice - yield CPU regularly; note - this is very CPU-heavy loop, no IO } } } catch (Exception ex) { if (!_running) { return; } OnError(ex); } finally { Interlocked.Decrement(ref _activeParserLoopCount); } } } //method