예제 #1
0
        /// <summary>Does the donkey work of searching for a pattern.
        /// Returns the byte address of the first match.</summary>
        private static void DoFind(Pattern pat, long start, bool backward, BLIData d, Func <RangeI, bool> on_found)
        {
            using (d.file)
            {
                var         line      = new Line();
                AddLineFunc test_line = (line_rng, baddr, fend, bf, enc) =>
                {
                    // Ignore blanks?
                    if (line_rng.Empty && d.ignore_blanks)
                    {
                        return(true);
                    }

                    // Parse the line from the buffer
                    line.Read(baddr + line_rng.Beg, bf, (int)line_rng.Beg, (int)line_rng.Size, d.encoding, d.col_delim, null, d.transforms);

                    // Keep searching while the text is filtered out or doesn't match the pattern
                    if (!PassesFilters(line.RowText, d.filters) || !pat.IsMatch(line.RowText))
                    {
                        return(true);
                    }

                    // Found a match
                    return(on_found(new RangeI(baddr + line_rng.Beg, baddr + line_rng.End)));
                };

                // Search for files
                var  line_buf = new byte[d.max_line_length];
                long count    = backward ? start - 0 : d.fileend - start;
                FindLines(d.file, start, d.fileend, backward, count, test_line, d.encoding, d.row_delim, line_buf, d.progress);
            }
        }
예제 #2
0
        /// <summary>Export 'filepath' to 'outp'.</summary>
        /// <param name="d">A copy of the data needed to do the export</param>
        /// <param name="ranges">Byte ranges within the input file to export</param>
        /// <param name="row_delimiter">The row delimiter to use in the output file (robitised)</param>
        /// <param name="col_delimiter">The column delimiter to use in the output file (robitised)</param>
        /// <param name="outp">The output stream to write the exported result to</param>
        private static void DoExport(BLIData d, IEnumerable <RangeI> ranges, string row_delimiter, string col_delimiter, StreamWriter outp)
        {
            var line = new Line();

            // Call back for adding lines to the export result
            AddLineFunc add_line = (line_rng, baddr, fend, bf, enc) =>
            {
                if (line_rng.Empty && d.ignore_blanks)
                {
                    return(true);
                }

                // Parse the line from the buffer
                line.Read(baddr + line_rng.Beg, bf, (int)line_rng.Beg, (int)line_rng.Size, d.encoding, d.col_delim, null, d.transforms);

                // Keep searching while the text is filtered out or doesn't match the pattern
                if (!PassesFilters(line.RowText, d.filters))
                {
                    return(true);
                }

                // Write to the output file
                outp.Write(string.Join(col_delimiter, line.Column));
                outp.Write(row_delimiter);
                return(true);
            };

            byte[] buf = new byte[d.max_line_length];
            foreach (var rng in ranges)
            {
                // Find the start of a line (grow the range if necessary)
                var r = new RangeI(Math_.Clamp(rng.Beg, 0, d.file.Stream.Length), Math_.Clamp(rng.End, 0, d.file.Stream.Length));
                r.Beg = FindLineStart(d.file, r.Beg, r.End, d.row_delim, d.encoding, buf);

                // Read lines and write them to the export file
                FindLines(d.file, r.Beg, r.End, false, r.Size, add_line, d.encoding, d.row_delim, buf, d.progress);
            }
        }
예제 #3
0
        /// <summary>Scan the file from 'filepos' adding whole lines to 'line_index' until 'length' bytes have been read or 'add_line' returns false. Returns true if not interrupted</summary>
        private static bool FindLines(Stream src, long filepos, long fileend, bool backward, long length, AddLineFunc add_line, Encoding encoding, byte[] line_end, byte[] buf, ProgressFunc progress)
        {
            long scanned = 0, read_addr = filepos;

            for (; ;)
            {
                // Progress update
                if (!progress(scanned, length))
                {
                    return(false);
                }

                // Seek to the start position
                src.Seek(read_addr, SeekOrigin.Begin);

                // Buffer the contents of the file in 'buf'.
                var remaining = length - scanned;
                var read      = Buffer(src, remaining, fileend, encoding, backward, buf, out var eof);
                if (read == 0)
                {
                    break;
                }

                // Set iterator limits.
                // 'i' is where to start scanning from
                // 'iend' is the end of the range to scan
                // 'ilast' is the start of the last line found
                // 'base_addr' is the file offset from which buf was read
                var i         = backward ? read - 1 : 0;
                var iend      = backward ? -1 : read;
                var lasti     = backward ? read : 0;
                var base_addr = backward ? src.Position : src.Position - read;

                // If we're searching backwards and 'i' is at the end of a line,
                // we don't want to count that as the first found line so adjust 'i'.
                // If not however, then 'i' is partway through a line or at the end
                // of a file without a 'line_end' at the end and we want to include
                // this (possibly partial) line.
                if (backward && IsLineEnd(buf, read - line_end.Length, line_end))
                {
                    i -= line_end.Length;
                }

                // Scan the buffer for lines
                for (i = Tools.FindNextDelim(buf, i, read, line_end, backward); i != iend; i = Tools.FindNextDelim(buf, i, read, line_end, backward))
                {
                    // 'i' points to the start of a line,
                    // 'lasti' points to the start of the last line we found
                    // Get the range in buf containing the line
                    var line = backward
                                                ? new RangeI(i, lasti - line_end.Length)
                                                : new RangeI(lasti, i - line_end.Length);

                    // Pass the detected line to the callback
                    if (!add_line(line, base_addr, fileend, buf, encoding))
                    {
                        return(false);
                    }

                    lasti = i;
                    if (backward)
                    {
                        i -= line_end.Length + 1;
                    }
                }

                // From 'lasti' to the end (or start in the backwards case) of the buffer represents
                // a (possibly partial) line. If we read a full buffer load last time, then we'll go
                // round again trying to read another buffer load, starting from 'lasti'.
                if (read == buf.Length)
                {
                    // Make sure we're always making progress
                    var scan_increment = backward ? (read - lasti) : lasti;
                    if (scan_increment == 0)                     // No lines detected in this block
                    {
                        throw new NoLinesException(read);
                    }

                    scanned  += scan_increment;
                    read_addr = filepos + (backward ? -scanned : +scanned);
                }
                // Otherwise, we've read to the end (or start) of the file, or to the limit 'length'.
                // What's left in the buffer may be a partial line.
                else
                {
                    // 'i' points to 'iend',
                    // 'lasti' points to the start of the last line we found
                    // Get the range in buf containing the line
                    var line = backward
                                                ? new RangeI(i + 1, lasti - line_end.Length)
                                                : new RangeI(lasti, i - (IsLineEnd(buf, i - line_end.Length, line_end) ? line_end.Length : 0));

                    // Pass the detected line to the callback
                    if (!add_line(line, base_addr, fileend, buf, encoding))
                    {
                        return(false);
                    }

                    break;
                }
            }
            return(true);
        }
예제 #4
0
        /// <summary>Scan the file from 'filepos' adding whole lines to 'line_index' until 'length' bytes have been read or 'add_line' returns false</summary>
        /// <param name="file">The file to scan</param>
        /// <param name="filepos">The position in the file to start scanning from</param>
        /// <param name="fileend">The current known length of the file</param>
        /// <param name="backward">The direction to scan</param>
        /// <param name="length">The number of bytes to scan over</param>
        /// <param name="add_line">Callback function called with each detected line</param>
        /// <param name="encoding">The text file encoding</param>
        /// <param name="row_delim">The bytes that identify an end of line</param>
        /// <param name="buf">A buffer to use when buffering file data</param>
        /// <param name="progress">Callback function to report progress and allow the find to abort</param>
        private static void FindLines(IFileSource file, long filepos, long fileend, bool backward, long length, AddLineFunc add_line, Encoding encoding, byte[] row_delim, byte[] buf, ProgressFunc progress)
        {
            long scanned = 0, read_addr = filepos;

            for (;;)
            {
                // Progress update
                if (progress != null && !progress(scanned, length))
                {
                    return;
                }

                // Seek to the start position
                file.Stream.Seek(read_addr, SeekOrigin.Begin);

                // Buffer the contents of the file in 'buf'.
                long remaining = length - scanned; bool eof;
                int  read = Buffer(file, remaining, fileend, encoding, backward, buf, out eof);
                if (read == 0)
                {
                    break;
                }

                // Set iterator limits.
                // 'i' is where to start scanning from
                // 'iend' is the end of the range to scan
                // 'ilast' is the start of the last line found
                // 'base_addr' is the file offset from which buf was read
                int  i         = backward ? read - 1 : 0;
                int  iend      = backward ? -1 : read;
                int  lasti     = backward ? read : 0;
                long base_addr = backward ? file.Stream.Position : file.Stream.Position - read;

                // If we're searching backwards and 'i' is at the end of a line,
                // we don't want to count that as the first found line so adjust 'i'.
                // If not however, then 'i' is partway through a line or at the end
                // of a file without a row delimiter at the end and we want to include
                // this (possibly partial) line.
                if (backward && IsRowDelim(buf, read - row_delim.Length, row_delim))
                {
                    i -= row_delim.Length;
                }

                // Scan the buffer for lines
                for (i = Misc.FindNextDelim(buf, i, read, row_delim, backward); i != iend; i = Misc.FindNextDelim(buf, i, read, row_delim, backward))
                {
                    // 'i' points to the start of a line,
                    // 'lasti' points to the start of the last line we found
                    // Get the range in buf containing the line
                    RangeI line = backward
                                                ? new RangeI(i, lasti - row_delim.Length)
                                                : new RangeI(lasti, i - row_delim.Length);

                    // Pass the detected line to the callback
                    if (!add_line(line, base_addr, fileend, buf, encoding))
                    {
                        return;
                    }

                    lasti = i;
                    if (backward)
                    {
                        i -= row_delim.Length + 1;
                    }
                }

                // From 'lasti' to the end (or start in the backwards case) of the buffer represents
                // a (possibly partial) line. If we read a full buffer load last time, then we'll go
                // round again trying to read another buffer load, starting from 'lasti'.
                if (read == buf.Length)
                {
                    // Make sure we're always making progress
                    long scan_increment = backward ? (read - lasti) : lasti;
                    if (scan_increment == 0)                     // No lines detected in this block
                    {
                        throw new NoLinesException(read);
                    }

                    scanned  += scan_increment;
                    read_addr = filepos + (backward ? -scanned : +scanned);
                }
                // Otherwise, we're read to the end (or start) of the file, or to the limit 'length'.
                // What's left in the buffer may be a partial line.
                else
                {
                    // 'i' points to 'iend',
                    // 'lasti' points to the start of the last line we found
                    // Get the range in buf containing the line
                    RangeI line = backward
                                                ? new RangeI(i + 1, lasti - row_delim.Length)
                                                : new RangeI(lasti, i - (IsRowDelim(buf, i - row_delim.Length, row_delim) ? row_delim.Length : 0));

                    // ReSharper disable RedundantJumpStatement
                    // Pass the detected line to the callback
                    if (!add_line(line, base_addr, fileend, buf, encoding))
                    {
                        return;
                    }
                    // ReSharper restore RedundantJumpStatement

                    break;
                }
            }
        }
예제 #5
0
        /// <summary>The grunt work of building the new line index.</summary>
        private static void BuildLineIndexAsync(BLIData d, Action <BLIData, RangeI, List <RangeI>, Exception> on_complete)
        {
            // This method runs in a background thread
            // All we're doing here is loading data around 'd.filepos' so that there are an equal number
            // of lines on either side. This can be optimised however because the existing range of
            // cached data probably overlaps the range we want loaded.
            try
            {
                Log.Write(ELogLevel.Info, "BLIAsync", $"build started. (id {d.build_issue}, reload {d.reload})");
                if (BuildCancelled(d.build_issue))
                {
                    return;
                }
                using (d.file)
                {
                    // A temporary buffer for reading sections of the file
                    var buf = new byte[d.max_line_length];

                    // Seek to the first line that starts immediately before 'filepos'
                    d.filepos = FindLineStart(d.file, d.filepos, d.fileend, d.row_delim, d.encoding, buf);
                    if (BuildCancelled(d.build_issue))
                    {
                        return;
                    }

                    // Determine the range to scan and the number of lines in each direction
                    var scan_backward = (d.fileend - d.filepos) > (d.filepos - 0);                     // scan in the most bound direction first
                    var scan_range    = CalcBufferRange(d.filepos, d.fileend, d.file_buffer_size);
                    var line_range    = CalcLineRange(d.line_cache_count);
                    var bwd_lines     = line_range.Begi;
                    var fwd_lines     = line_range.Endi;

                    // Incremental loading - only load what isn't already cached.
                    // If the 'filepos' is left of the cache centre, try to extent in left direction first.
                    // If the scan range in that direction is empty, try extending at the other end. The
                    // aim is to try to get d.line_index_count as close to d.line_cache_count as possible
                    // without loading data that is already cached.
                    #region Incremental loading
                    if (!d.reload && !d.cached_whole_line_range.Empty)
                    {
                        // Determine the direction the cached range is moving based on where 'filepos' is relative
                        // to the current cache centre and which range contains an valid area to be scanned.
                        // With incremental scans we can only update one side of the cache because the returned line index has to
                        // be a contiguous block of lines. This means one of 'bwd_lines' or 'fwd_lines' must be zero.
                        var Lrange = new RangeI(scan_range.Beg, d.cached_whole_line_range.Beg);
                        var Rrange = new RangeI(d.cached_whole_line_range.End, scan_range.End);
                        var dir    =
                            (!Lrange.Empty && !Rrange.Empty) ? Math.Sign(2 * d.filepos_line_index - d.line_cache_count) :
                            (!Lrange.Empty) ? -1 :
                            (!Rrange.Empty) ? +1 :
                            0;

                        // Determine the number of lines to scan, based on direction
                        if (dir < 0)
                        {
                            scan_backward = true;
                            scan_range    = Lrange;
                            bwd_lines    -= Math_.Clamp(d.filepos_line_index - 0, 0, bwd_lines);
                            fwd_lines     = 0;
                        }
                        else if (dir > 0)
                        {
                            scan_backward = false;
                            scan_range    = Rrange;
                            bwd_lines     = 0;
                            fwd_lines    -= Math_.Clamp(d.line_index_count - d.filepos_line_index - 1, 0, fwd_lines);
                        }
                        else if (dir == 0)
                        {
                            bwd_lines  = 0;
                            fwd_lines  = 0;
                            scan_range = RangeI.Zero;
                        }
                    }
                    #endregion

                    Debug.Assert(bwd_lines + fwd_lines <= d.line_cache_count);

                    // Build the collection of line byte ranges to add to the cache
                    var line_index = new List <RangeI>();
                    if (bwd_lines != 0 || fwd_lines != 0)
                    {
                        // Line index buffers for collecting the results
                        var fwd_line_buf = new List <RangeI>();
                        var bwd_line_buf = new List <RangeI>();

                        // Data used in the 'add_line' callback. Updated for forward and backward passes
                        var lbd = new LineBufferData
                        {
                            line_buf   = null,                           // pointer to either 'fwd_line_buf' or 'bwd_line_buf'
                            line_limit = 0,                              // Caps the number of lines read for each of the forward and backward searches
                        };

                        // Callback for adding line byte ranges to a line buffer
                        AddLineFunc add_line = (line, baddr, fend, bf, enc) =>
                        {
                            if (line.Empty && d.ignore_blanks)
                            {
                                return(true);
                            }

                            // Test 'text' against each filter to see if it's included
                            // Note: not caching this string because we want to read immediate data
                            // from the file to pick up file changes.
                            string text = d.encoding.GetString(buf, (int)line.Beg, (int)line.Size);
                            if (!PassesFilters(text, d.filters))
                            {
                                return(true);
                            }

                            // Convert the byte range to a file range
                            line = line.Shift(baddr);
                            Debug.Assert(new RangeI(0, d.fileend).Contains(line));
                            lbd.line_buf.Add(line);
                            Debug.Assert(lbd.line_buf.Count <= lbd.line_limit);
                            return((fwd_line_buf.Count + bwd_line_buf.Count) < lbd.line_limit);
                        };

                        // Callback for updating progress
                        ProgressFunc progress = (scanned, length) =>
                        {
                            int numer = fwd_line_buf.Count + bwd_line_buf.Count, denom = lbd.line_limit;
                            return(d.progress(numer, denom) && !BuildCancelled(d.build_issue));
                        };

                        // Scan twice, starting in the direction of the smallest range so that any
                        // unused cache space is used by the search in the other direction
                        var scan_from = Math_.Clamp(d.filepos, scan_range.Beg, scan_range.End);
                        for (int a = 0; a != 2; ++a, scan_backward = !scan_backward)
                        {
                            if (BuildCancelled(d.build_issue))
                            {
                                return;
                            }

                            lbd.line_buf    = scan_backward ? bwd_line_buf : fwd_line_buf;
                            lbd.line_limit += scan_backward ? bwd_lines : fwd_lines;
                            if ((bwd_line_buf.Count + fwd_line_buf.Count) < lbd.line_limit)
                            {
                                var length = scan_backward ? scan_from - scan_range.Beg : scan_range.End - scan_from;
                                FindLines(d.file, scan_from, d.fileend, scan_backward, length, add_line, d.encoding, d.row_delim, buf, progress);
                            }
                        }

                        // Scanning backward adds lines to the line index in reverse order.
                        bwd_line_buf.Reverse();

                        // 'line_index' should be a contiguous block of byte offset ranges for
                        // the lines around 'd.filepos'. If 'd.reload' is false, then the line
                        // index will only contain byte offset ranges that are not currently cached.
                        line_index.Capacity = bwd_line_buf.Count + fwd_line_buf.Count;
                        line_index.AddRange(bwd_line_buf);
                        line_index.AddRange(fwd_line_buf);
                    }

                    // Job done
                    on_complete(d, scan_range, line_index, null);
                }
            }
            catch (Exception ex)
            {
                on_complete(d, RangeI.Zero, null, ex);
            }
        }