public SegmentStats Segment(string label, IEnumerable <T> stream, int maxWorkingMemoryBytes = 0, ISafeLogger logger = null, IMetricsHost metrics = null, CancellationToken cancellationToken = default) { var count = 0L; var segments = 0; var sw = new StreamWriter(CreateSegment(label, segments)); try { foreach (var item in stream) { var line = _serialize(item); BuiltInMetrics.LineLength <T>(metrics, line.Length); sw.WriteLine(line); count++; if (maxWorkingMemoryBytes == 0 || sw.BaseStream.Length < maxWorkingMemoryBytes) { continue; } sw.Flush(); sw.Close(); segments++; sw = new StreamWriter(CreateSegment(label, segments)); } } finally { sw.Flush(); sw.Close(); } return(new SegmentStats { RecordCount = count, RecordLength = (int)BuiltInMetrics.GetMeanLineLength <T>(metrics) + 1, SegmentCount = segments }); }
// Derived from MimeKit's MimeParser private static long ReadOrCountLines(Stream stream, Encoding encoding, byte[] workingBuffer, NewLine onNewLine, ISafeLogger logger, IMetricsHost metrics, CancellationToken cancellationToken) { var count = 0L; var offset = stream.CanSeek ? stream.Position : 0L; var from = Constants.ReadAheadSize; var to = Constants.ReadAheadSize; var endOfStream = false; var preamble = encoding.GetPreambleBuffer(); unsafe { fixed(byte *buffer = workingBuffer) { if (stream.CanSeek && stream.Position != offset) { stream.Seek(offset, SeekOrigin.Begin); } if (!ReadPreamble(stream, preamble, buffer, workingBuffer, ref from, ref to, ref endOfStream, cancellationToken)) { throw new FormatException(ErrorStrings.UnexpectedEndOfStream); } do { if (ReadAhead(stream, workingBuffer, Constants.ReadAheadSize, 2, ref from, ref to, ref endOfStream, cancellationToken) <= 0) { break; } var position = buffer + from; var end = buffer + to; var startIndex = from; *end = (byte)'\n'; while (position < end) { var alignment = (startIndex + 3) & ~3; var aligned = buffer + alignment; var start = position; var c = *aligned; *aligned = Constants.LineFeed; while (*position != Constants.LineFeed) { position++; } *aligned = c; if (position == aligned && c != Constants.LineFeed) { var dword = (uint *)position; uint mask; do { mask = *dword++ ^ 0x0A0A0A0A; mask = (mask - 0x01010101) & ~mask & 0x80808080; } while (mask == 0); position = (byte *)(dword - 1); while (*position != Constants.LineFeed) { position++; } } var length = (int)(position - start); BuiltInMetrics.BytesPerSecond(metrics, length); if (position < end) { length++; position++; count++; onNewLine?.Invoke(count, false, start, length, encoding); } else if (count == 0 && position == end) { onNewLine?.Invoke(count, false, start, length, encoding); return(1); } else { // line spans across the read-ahead buffer onNewLine?.Invoke(count, true, start, length, encoding); } startIndex += length; } from = startIndex; } while (true); } } return(count); }