/// <summary>Seek to the first line that starts immediately before 'filepos'</summary> private static long FindLineStart(IFileSource file, long filepos, long fileend, byte[] row_delim, Encoding encoding, byte[] buf) { file.Stream.Seek(filepos, SeekOrigin.Begin); // Read a block into 'buf' bool eof; int read = Buffer(file, buf.Length, fileend, encoding, true, buf, out eof); if (read == 0) { return(0); // assume the first character in the file is the start of a line } // Scan for a line start int idx = Misc.FindNextDelim(buf, read - 1, read, row_delim, true); if (idx != -1) { return(file.Stream.Position + idx); // found } if (filepos == read) { return(0); // assume the first character in the file is the start of a line } throw new NoLinesException(read); }
/// <summary>Populate this line from a buffer</summary> public void Read(long addr, byte[] buf, int start, int length, Encoding encoding, byte[] col_delim, List <Highlight> highlights, IEnumerable <Transform> transforms) { LineStartAddr = addr; // Convert the buffer to text RowText = encoding.GetString(buf, start, length); //RowText = RowText.TrimEnd(new[]{'\r','\n'}); - don't do this, its the logs fault if it has weird newlines at the end of each row // Apply any transforms foreach (var tx in transforms) { RowText = tx.Txfm(RowText); } Column.Clear(); // Split the line into columns if (col_delim.Length == 0) // Single column { Column.Add(new Col(RowText, highlights)); } else // Multiple columns { int e, s = start; do { e = Misc.FindNextDelim(buf, s, length, col_delim, false); // Returns one passed the delimiter var col_text = encoding.GetString(buf, s, e - s - (e != length?col_delim.Length:0)); Column.Add(new Col(col_text, highlights)); s = e; }while (e != length); } }
/// <summary>Scan the file from 'filepos' adding whole lines to 'line_index' until 'length' bytes have been read or 'add_line' returns false</summary> /// <param name="file">The file to scan</param> /// <param name="filepos">The position in the file to start scanning from</param> /// <param name="fileend">The current known length of the file</param> /// <param name="backward">The direction to scan</param> /// <param name="length">The number of bytes to scan over</param> /// <param name="add_line">Callback function called with each detected line</param> /// <param name="encoding">The text file encoding</param> /// <param name="row_delim">The bytes that identify an end of line</param> /// <param name="buf">A buffer to use when buffering file data</param> /// <param name="progress">Callback function to report progress and allow the find to abort</param> private static void FindLines(IFileSource file, long filepos, long fileend, bool backward, long length, AddLineFunc add_line, Encoding encoding, byte[] row_delim, byte[] buf, ProgressFunc progress) { long scanned = 0, read_addr = filepos; for (;;) { // Progress update if (progress != null && !progress(scanned, length)) { return; } // Seek to the start position file.Stream.Seek(read_addr, SeekOrigin.Begin); // Buffer the contents of the file in 'buf'. long remaining = length - scanned; bool eof; int read = Buffer(file, remaining, fileend, encoding, backward, buf, out eof); if (read == 0) { break; } // Set iterator limits. // 'i' is where to start scanning from // 'iend' is the end of the range to scan // 'ilast' is the start of the last line found // 'base_addr' is the file offset from which buf was read int i = backward ? read - 1 : 0; int iend = backward ? -1 : read; int lasti = backward ? read : 0; long base_addr = backward ? file.Stream.Position : file.Stream.Position - read; // If we're searching backwards and 'i' is at the end of a line, // we don't want to count that as the first found line so adjust 'i'. // If not however, then 'i' is partway through a line or at the end // of a file without a row delimiter at the end and we want to include // this (possibly partial) line. if (backward && IsRowDelim(buf, read - row_delim.Length, row_delim)) { i -= row_delim.Length; } // Scan the buffer for lines for (i = Misc.FindNextDelim(buf, i, read, row_delim, backward); i != iend; i = Misc.FindNextDelim(buf, i, read, row_delim, backward)) { // 'i' points to the start of a line, // 'lasti' points to the start of the last line we found // Get the range in buf containing the line RangeI line = backward ? new RangeI(i, lasti - row_delim.Length) : new RangeI(lasti, i - row_delim.Length); // Pass the detected line to the callback if (!add_line(line, base_addr, fileend, buf, encoding)) { return; } lasti = i; if (backward) { i -= row_delim.Length + 1; } } // From 'lasti' to the end (or start in the backwards case) of the buffer represents // a (possibly partial) line. If we read a full buffer load last time, then we'll go // round again trying to read another buffer load, starting from 'lasti'. if (read == buf.Length) { // Make sure we're always making progress long scan_increment = backward ? (read - lasti) : lasti; if (scan_increment == 0) // No lines detected in this block { throw new NoLinesException(read); } scanned += scan_increment; read_addr = filepos + (backward ? -scanned : +scanned); } // Otherwise, we're read to the end (or start) of the file, or to the limit 'length'. // What's left in the buffer may be a partial line. else { // 'i' points to 'iend', // 'lasti' points to the start of the last line we found // Get the range in buf containing the line RangeI line = backward ? new RangeI(i + 1, lasti - row_delim.Length) : new RangeI(lasti, i - (IsRowDelim(buf, i - row_delim.Length, row_delim) ? row_delim.Length : 0)); // ReSharper disable RedundantJumpStatement // Pass the detected line to the callback if (!add_line(line, base_addr, fileend, buf, encoding)) { return; } // ReSharper restore RedundantJumpStatement break; } } }