/// <summary>
        /// Reads a line and parses it into a JSON document using a collection of possible regex matches.  Inserts line number/filename/workername properties and standardizes timestamp.
        /// </summary>
        /// <param name="reader">Cursor to some sort of string data reader pointed at log file.</param>
        /// <returns>JObject containing parsed record with all properties with blacklisted values removed and standardized timestamp.</returns>
        public override JObject ParseLogDocument(TextReader reader)
        {
            var line = ReadLine(reader);

            if (String.IsNullOrWhiteSpace(line))
            {
                return(null);
            }

            IDictionary <string, object> fields = FindAndApplyRegexMatch(line);

            // Give up if we didn't parse any data out of the line
            if (fields.Count == 0)
            {
                return(null);
            }

            // Convert timestamp to internal common format.
            if (fields.ContainsKey("ts"))
            {
                fields["ts"] = TimestampStandardizer.Standardize(fields["ts"].ToString());
            }

            // Convert timezone/offset to internal common format.
            if (fields.ContainsKey("ts_offset"))
            {
                fields["ts_offset"] = TimeZoneStandardizer.StandardizeTimeZone(fields["ts_offset"].ToString());
            }

            // Convert dictionary to JSON and strip any properties with values on the blacklist
            var json = fields.ConvertToJObject().RemovePropertiesWithValue(defaultBlacklistedValues);

            return(InsertMetadata(json));
        }
        /// <summary>
        /// Replaces the raw timestamp in a JObject with a "standardized" version.
        /// </summary>
        /// <param name="json">The JSON object to do the timestamp replacement in.</param>
        protected static JObject ReplaceRawTimestampWithStandardizedTimestamp(JObject json)
        {
            // Convert timestamp to internal common format.
            JToken timestampToken = json["ts"];

            if (timestampToken != null)
            {
                // If this is a JSON Date object, we need to convert it to .NET DateTime first.
                string timestampString;
                if (timestampToken.Type == JTokenType.Date)
                {
                    DateTime timestamp = (DateTime)timestampToken.ToObject(typeof(DateTime));
                    timestampString = timestamp.ToString(jsonDateFormatString, CultureInfo.InvariantCulture);
                }
                else
                {
                    timestampString = timestampToken.ToString();
                }

                var standardizedTimestamp = TimestampStandardizer.Standardize(timestampString);
                timestampToken.Replace(new JValue(standardizedTimestamp));
            }

            return(json);
        }
Esempio n. 3
0
        /// <summary>
        /// The basic strategy of the multiline regex parser is to read & append lines until we hit a line that matches one of our LineDelimiterRegexes, then parse everything we've collected into a single document.
        /// The delimiting line will be buffered for the next time ParseLogDocument is called.
        /// </summary>
        public override JObject ParseLogDocument(TextReader reader)
        {
            var sb = new StringBuilder();

            // Read a line (from the buffer, if it exists); bail out if we can't.
            LineCounter.Increment();
            var line = ReadLine(reader);

            if (String.IsNullOrWhiteSpace(line))
            {
                return(null);
            }
            sb.Append(line);

            // Keep reading & appending more lines until we hit one that matches a known delimiter pattern
            bool nextLineIsMatch  = false;
            int  nonDocumentLines = 0;

            while (!nextLineIsMatch)
            {
                bufferedLine = ReadLine(reader);

                // If we failed to read a line, we need to break out
                if (bufferedLine == null)
                {
                    break;
                }

                // Check if the line we just read is a new log line
                nextLineIsMatch = IsNewLogLine(bufferedLine);

                if (!nextLineIsMatch)
                {
                    sb.Append("\n" + bufferedLine);
                    nonDocumentLines++;
                    bufferedLine = null;
                }
            }

            // Capture groups into dictionary
            IDictionary <string, object> fields = FindAndApplyRegexMatch(sb.ToString());

            // Give up if we didn't parse any data out of the line
            if (fields.Count == 0)
            {
                return(null);
            }

            // Convert timestamp to internal common format.
            if (fields.ContainsKey("ts"))
            {
                fields["ts"] = TimestampStandardizer.Standardize(fields["ts"].ToString());
            }

            // Convert timezone/offset to internal common format.
            if (fields.ContainsKey("ts_offset"))
            {
                fields["ts_offset"] = TimeZoneStandardizer.StandardizeTimeZone(fields["ts_offset"].ToString());
            }

            // Convert dictionary to JSON and strip any properties with values on the blacklist
            var json = InsertMetadata(fields.ConvertToJObject().RemovePropertiesWithValue(defaultBlacklistedValues));

            // Update LineCounter to "skip" any multilines we read.
            LineCounter.IncrementBy(nonDocumentLines);

            return(json);
        }