Esempio n. 1
0
        public SegmentStats Segment(string label, IEnumerable <T> stream, int maxWorkingMemoryBytes = 0,
                                    ISafeLogger logger = null, IMetricsHost metrics = null, CancellationToken cancellationToken = default)
        {
            var count    = 0L;
            var segments = 0;
            var sw       = new StreamWriter(CreateSegment(label, segments));

            try
            {
                foreach (var item in stream)
                {
                    var line = _serialize(item);
                    BuiltInMetrics.LineLength <T>(metrics, line.Length);

                    sw.WriteLine(line);
                    count++;

                    if (maxWorkingMemoryBytes == 0 || sw.BaseStream.Length < maxWorkingMemoryBytes)
                    {
                        continue;
                    }

                    sw.Flush();
                    sw.Close();
                    segments++;
                    sw = new StreamWriter(CreateSegment(label, segments));
                }
            }
            finally
            {
                sw.Flush();
                sw.Close();
            }

            return(new SegmentStats
            {
                RecordCount = count,
                RecordLength = (int)BuiltInMetrics.GetMeanLineLength <T>(metrics) + 1,
                SegmentCount = segments
            });
        }
Esempio n. 2
0
        // Derived from MimeKit's MimeParser
        private static long ReadOrCountLines(Stream stream, Encoding encoding, byte[] workingBuffer, NewLine onNewLine,
                                             ISafeLogger logger, IMetricsHost metrics, CancellationToken cancellationToken)
        {
            var count       = 0L;
            var offset      = stream.CanSeek ? stream.Position : 0L;
            var from        = Constants.ReadAheadSize;
            var to          = Constants.ReadAheadSize;
            var endOfStream = false;

            var preamble = encoding.GetPreambleBuffer();

            unsafe
            {
                fixed(byte *buffer = workingBuffer)
                {
                    if (stream.CanSeek && stream.Position != offset)
                    {
                        stream.Seek(offset, SeekOrigin.Begin);
                    }

                    if (!ReadPreamble(stream, preamble, buffer, workingBuffer, ref from, ref to, ref endOfStream,
                                      cancellationToken))
                    {
                        throw new FormatException(ErrorStrings.UnexpectedEndOfStream);
                    }

                    do
                    {
                        if (ReadAhead(stream, workingBuffer, Constants.ReadAheadSize, 2, ref from, ref to,
                                      ref endOfStream, cancellationToken) <= 0)
                        {
                            break;
                        }

                        var position   = buffer + from;
                        var end        = buffer + to;
                        var startIndex = from;

                        *end = (byte)'\n';

                        while (position < end)
                        {
                            var alignment = (startIndex + 3) & ~3;
                            var aligned   = buffer + alignment;
                            var start     = position;
                            var c         = *aligned;

                            *aligned = Constants.LineFeed;
                            while (*position != Constants.LineFeed)
                            {
                                position++;
                            }

                            *aligned = c;

                            if (position == aligned && c != Constants.LineFeed)
                            {
                                var  dword = (uint *)position;
                                uint mask;
                                do
                                {
                                    mask = *dword++ ^ 0x0A0A0A0A;
                                    mask = (mask - 0x01010101) & ~mask & 0x80808080;
                                } while (mask == 0);

                                position = (byte *)(dword - 1);
                                while (*position != Constants.LineFeed)
                                {
                                    position++;
                                }
                            }

                            var length = (int)(position - start);

                            BuiltInMetrics.BytesPerSecond(metrics, length);

                            if (position < end)
                            {
                                length++;
                                position++;
                                count++;

                                onNewLine?.Invoke(count, false, start, length, encoding);
                            }
                            else if (count == 0 && position == end)
                            {
                                onNewLine?.Invoke(count, false, start, length, encoding);
                                return(1);
                            }
                            else
                            {
                                // line spans across the read-ahead buffer
                                onNewLine?.Invoke(count, true, start, length, encoding);
                            }

                            startIndex += length;
                        }

                        from = startIndex;
                    } while (true);
                }
            }

            return(count);
        }