/// <summary> /// Reads a line and parses it into a JSON document using a collection of possible regex matches. Inserts line number/filename/workername properties and standardizes timestamp. /// </summary> /// <param name="reader">Cursor to some sort of string data reader pointed at log file.</param> /// <returns>JObject containing parsed record with all properties with blacklisted values removed and standardized timestamp.</returns> public override JObject ParseLogDocument(TextReader reader) { var line = ReadLine(reader); if (String.IsNullOrWhiteSpace(line)) { return(null); } IDictionary <string, object> fields = FindAndApplyRegexMatch(line); // Give up if we didn't parse any data out of the line if (fields.Count == 0) { return(null); } // Convert timestamp to internal common format. if (fields.ContainsKey("ts")) { fields["ts"] = TimestampStandardizer.Standardize(fields["ts"].ToString()); } // Convert timezone/offset to internal common format. if (fields.ContainsKey("ts_offset")) { fields["ts_offset"] = TimeZoneStandardizer.StandardizeTimeZone(fields["ts_offset"].ToString()); } // Convert dictionary to JSON and strip any properties with values on the blacklist var json = fields.ConvertToJObject().RemovePropertiesWithValue(defaultBlacklistedValues); return(InsertMetadata(json)); }
/// <summary> /// Replaces the raw timestamp in a JObject with a "standardized" version. /// </summary> /// <param name="json">The JSON object to do the timestamp replacement in.</param> protected static JObject ReplaceRawTimestampWithStandardizedTimestamp(JObject json) { // Convert timestamp to internal common format. JToken timestampToken = json["ts"]; if (timestampToken != null) { // If this is a JSON Date object, we need to convert it to .NET DateTime first. string timestampString; if (timestampToken.Type == JTokenType.Date) { DateTime timestamp = (DateTime)timestampToken.ToObject(typeof(DateTime)); timestampString = timestamp.ToString(jsonDateFormatString, CultureInfo.InvariantCulture); } else { timestampString = timestampToken.ToString(); } var standardizedTimestamp = TimestampStandardizer.Standardize(timestampString); timestampToken.Replace(new JValue(standardizedTimestamp)); } return(json); }
/// <summary> /// The basic strategy of the multiline regex parser is to read & append lines until we hit a line that matches one of our LineDelimiterRegexes, then parse everything we've collected into a single document. /// The delimiting line will be buffered for the next time ParseLogDocument is called. /// </summary> public override JObject ParseLogDocument(TextReader reader) { var sb = new StringBuilder(); // Read a line (from the buffer, if it exists); bail out if we can't. LineCounter.Increment(); var line = ReadLine(reader); if (String.IsNullOrWhiteSpace(line)) { return(null); } sb.Append(line); // Keep reading & appending more lines until we hit one that matches a known delimiter pattern bool nextLineIsMatch = false; int nonDocumentLines = 0; while (!nextLineIsMatch) { bufferedLine = ReadLine(reader); // If we failed to read a line, we need to break out if (bufferedLine == null) { break; } // Check if the line we just read is a new log line nextLineIsMatch = IsNewLogLine(bufferedLine); if (!nextLineIsMatch) { sb.Append("\n" + bufferedLine); nonDocumentLines++; bufferedLine = null; } } // Capture groups into dictionary IDictionary <string, object> fields = FindAndApplyRegexMatch(sb.ToString()); // Give up if we didn't parse any data out of the line if (fields.Count == 0) { return(null); } // Convert timestamp to internal common format. if (fields.ContainsKey("ts")) { fields["ts"] = TimestampStandardizer.Standardize(fields["ts"].ToString()); } // Convert timezone/offset to internal common format. if (fields.ContainsKey("ts_offset")) { fields["ts_offset"] = TimeZoneStandardizer.StandardizeTimeZone(fields["ts_offset"].ToString()); } // Convert dictionary to JSON and strip any properties with values on the blacklist var json = InsertMetadata(fields.ConvertToJObject().RemovePropertiesWithValue(defaultBlacklistedValues)); // Update LineCounter to "skip" any multilines we read. LineCounter.IncrementBy(nonDocumentLines); return(json); }