Beispiel #1
0
        private PTable(string filename,
                       Guid id,
                       int bufferSize        = 8096,
                       int maxReadingThreads = TFConsts.ReadIndexReaderCount,
                       int depth             = 16)
        {
            if (!File.Exists(filename))
            {
                throw new CorruptIndexException(new PTableNotFoundException(filename));
            }

            _id   = id;
            _size = new FileInfo(filename).Length - PTableHeader.Size - MD5Size;

            _filename = filename;
            File.SetAttributes(_filename, FileAttributes.ReadOnly);
            File.SetAttributes(_filename, FileAttributes.Temporary);
            File.SetAttributes(_filename, FileAttributes.NotContentIndexed);

            _bufferSize        = bufferSize;
            _maxReadingThreads = maxReadingThreads;
            _buffer            = new byte[16];
            _bufferPtr         = GCHandle.Alloc(_buffer, GCHandleType.Pinned);

            using (var stream = File.OpenRead(filename))
                using (var reader = new BinaryReader(stream))
                {
                    PTableHeader.FromStream(reader);
                }

            for (int i = 0; i < _maxReadingThreads; i++)
            {
                var s = new FileStream(_filename, FileMode.Open, FileAccess.Read, FileShare.Read, 16, FileOptions.RandomAccess);
                _streams.Enqueue(s);
            }

            try
            {
                _midpoints = PopulateCache(depth);
            }
            catch (PossibleToHandleOutOfMemoryException)
            {
                Log.Info("Was unable to create midpoints for ptable. Performance hit possible OOM Exception.");
            }
        }
Beispiel #2
0
        public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16)
        {
            if (table == null)
            {
                throw new ArgumentNullException("table");
            }
            if (filename == null)
            {
                throw new ArgumentNullException("filename");
            }

            Log.Trace("Started dumping MemTable [{0}] into PTable...", table.Id);
            using (var f = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, 8096,
                                          FileOptions.SequentialScan))
            {
                f.SetLength(PTableHeader.Size + (table.Count << 4) + MD5Size); // EXACT SIZE
                f.Seek(0, SeekOrigin.Begin);

                var md5    = MD5.Create();
                var buffer = new byte[16];
                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                    using (var b = new BufferedStream(cs, 65536))
                    {
                        // WRITE HEADER
                        var headerBytes = new PTableHeader(Version).AsByteArray();
                        cs.Write(headerBytes, 0, headerBytes.Length);

                        // WRITE INDEX ENTRIES
                        foreach (var record in table.IterateAllInOrder())
                        {
                            var x = record;
                            AppendRecordTo(b, x.Bytes, buffer);
                        }
                        b.Flush();
                        cs.FlushFinalBlock();

                        // WRITE MD5
                        var hash = md5.Hash;
                        f.Write(hash, 0, hash.Length);
                    }
                f.Close();
                Log.Trace("Done dumping MemTable [{0}].", table.Id);
            }
            return(new PTable(filename, table.Id, depth: cacheDepth));
        }
Beispiel #3
0
        public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16)
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize = table.Version == PTableVersions.Index32Bit ? PTable.IndexEntry32Size : PTable.IndexEntry64Size;

            var sw = Stopwatch.StartNew();

            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None,
                                           DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                fs.SetLength(PTableHeader.Size + indexEntrySize * (long)table.Count + MD5Size); // EXACT SIZE
                fs.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(table.Version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            // WRITE INDEX ENTRIES
                            var buffer = new byte[indexEntrySize];
                            foreach (var record in table.IterateAllInOrder())
                            {
                                var rec = record;
                                AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize);
                            }
                            bs.Flush();
                            cs.FlushFinalBlock();

                            // WRITE MD5
                            var hash = md5.Hash;
                            fs.Write(hash, 0, hash.Length);
                        }
            }
            Log.Trace("Dumped MemTable [{0}, {1} entries] in {2}.", table.Id, table.Count, sw.Elapsed);
            return(new PTable(filename, table.Id, depth: cacheDepth));
        }
        public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16)
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            //Log.Trace("Started dumping MemTable [{0}] into PTable...", table.Id);
            var sw = Stopwatch.StartNew();
            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None,
                                           DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                fs.SetLength(PTableHeader.Size + IndexEntrySize * (long)table.Count + MD5Size); // EXACT SIZE
                fs.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write))
                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                {
                    // WRITE HEADER
                    var headerBytes = new PTableHeader(Version).AsByteArray();
                    cs.Write(headerBytes, 0, headerBytes.Length);

                    // WRITE INDEX ENTRIES
                    var buffer = new byte[IndexEntrySize];
                    foreach (var record in table.IterateAllInOrder())
                    {
                        var rec = record;
                        AppendRecordTo(bs, rec.Bytes, buffer);
                    }
                    bs.Flush();
                    cs.FlushFinalBlock();

                    // WRITE MD5
                    var hash = md5.Hash;
                    fs.Write(hash, 0, hash.Length);
                }
            }
            Log.Trace("Dumped MemTable [{0}, {1} entries] in {2}.", table.Id, table.Count, sw.Elapsed);
            return new PTable(filename, table.Id, depth: cacheDepth);
        }
        public static void CreatePTableFile(string filename, long ptableSize, int indexEntrySize, int cacheDepth = 16)
        {
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var sw = Stopwatch.StartNew();
            var tableId = Guid.NewGuid();
            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None,
                                           DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                fs.SetLength((long)ptableSize);
                fs.Seek(0, SeekOrigin.Begin);

                var recordCount = (long)((ptableSize - PTableHeader.Size - PTable.MD5Size) / (long)indexEntrySize);
                using (var md5 = MD5.Create())
                using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write))
                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                {
                    // WRITE HEADER
                    var headerBytes = new PTableHeader(Version).AsByteArray();
                    cs.Write(headerBytes, 0, headerBytes.Length);

                    // WRITE INDEX ENTRIES
                    var buffer = new byte[indexEntrySize];
                    for (long i = 0; i < recordCount; i++)
                    {
                        bs.Write(buffer, 0, indexEntrySize);
                    }
                    bs.Flush();
                    cs.FlushFinalBlock();

                    // WRITE MD5
                    var hash = md5.Hash;
                    fs.Write(hash, 0, hash.Length);
                }
            }
            Console.WriteLine("Created PTable File[{0}, size of {1}] in {2}.", tableId, ptableSize, sw.Elapsed);
        }
Beispiel #6
0
        private PTable(string filename,
                       Guid id,
                       int initialReaders = ESConsts.PTableInitialReaderCount,
                       int maxReaders     = ESConsts.PTableMaxReaderCount,
                       int depth          = 16)
        {
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.NotEmptyGuid(id, "id");
            Ensure.Positive(maxReaders, "maxReaders");
            Ensure.Nonnegative(depth, "depth");

            if (!File.Exists(filename))
            {
                throw new CorruptIndexException(new PTableNotFoundException(filename));
            }

            _id       = id;
            _filename = filename;

            var sw = Stopwatch.StartNew();

            Log.Trace("Loading PTable '{0}' started...", Filename);

            _size = new FileInfo(_filename).Length - PTableHeader.Size - MD5Size;
            File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed);

            _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id),
                                                   initialReaders,
                                                   maxReaders,
                                                   () => new WorkItem(filename, DefaultBufferSize),
                                                   workItem => workItem.Dispose(),
                                                   pool => OnAllWorkItemsDisposed());

            var readerWorkItem = GetWorkItem();

            try
            {
                readerWorkItem.Stream.Seek(0, SeekOrigin.Begin);
                var header = PTableHeader.FromStream(readerWorkItem.Stream);
                if (header.Version != Version)
                {
                    throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version));
                }
            }
            catch (Exception)
            {
                Dispose();
                throw;
            }
            finally
            {
                ReturnWorkItem(readerWorkItem);
            }

            try
            {
                _midpoints = CacheMidpoints(depth);
            }
            catch (PossibleToHandleOutOfMemoryException)
            {
                Log.Error("Was unable to create midpoints for PTable '{0}' ({1} entries, depth {2} requested). "
                          + "Performance hit possible. OOM Exception.", Filename, Count, depth);
            }
            Log.Trace("Loading PTable '{0}' ({1} entries, cache depth {2}) done in {3}.", Filename, Count, depth, sw.Elapsed);
        }
Beispiel #7
0
        private static PTable MergeTo2 <TStreamId>(IList <PTable> tables, long numIndexEntries, int indexEntrySize,
                                                   string outputFile,
                                                   Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt,
                                                   Func <IndexEntry, Tuple <TStreamId, bool> > readRecord,
                                                   byte version, int initialReaders, int maxReaders,
                                                   int cacheDepth, bool skipIndexVerify,
                                                   bool useBloomFilter, int lruCacheSize)
        {
            Log.Debug("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);
            var enumerators = tables
                              .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try {
                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count)))
                        using (var md5 = MD5.Create())
                            using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                    // WRITE HEADER
                                    var headerBytes = new PTableHeader(version).AsByteArray();
                                    cs.Write(headerBytes, 0, headerBytes.Length);

                                    // WRITE INDEX ENTRIES
                                    var  buffer                = new byte[indexEntrySize];
                                    long indexEntry            = 0L;
                                    var  requiredMidpointCount =
                                        GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                    using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                    var        enum1      = enumerators[0];
                                    var        enum2      = enumerators[1];
                                    bool       available1 = enum1.MoveNext();
                                    bool       available2 = enum2.MoveNext();
                                    IndexEntry current;
                                    ulong?     previousHash = null;
                                    while (available1 || available2)
                                    {
                                        var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version,
                                                                    enum1.Current.Position);
                                        var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version,
                                                                    enum2.Current.Position);

                                        if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                        {
                                            current    = entry1;
                                            available1 = enum1.MoveNext();
                                        }
                                        else
                                        {
                                            current    = entry2;
                                            available2 = enum2.MoveNext();
                                        }

                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        if (version >= PTableVersions.IndexV4 &&
                                            IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                        {
                                            midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version),
                                                                       indexEntry));
                                        }

                                        // WRITE BLOOM FILTER ENTRY
                                        if (bloomFilter != null && current.Stream != previousHash)
                                        {
                                            // upgradeHash has already ensured the hash is in the right format for the target
                                            var streamHash = current.Stream;
                                            bloomFilter.Add(GetSpan(ref streamHash));
                                            previousHash = current.Stream;
                                        }

                                        indexEntry++;
                                        dumpedEntryCount++;
                                    }

                                    //WRITE MIDPOINTS
                                    if (version >= PTableVersions.IndexV4)
                                    {
                                        if (dumpedEntryCount != numIndexEntries)
                                        {
                                            //if index entries have been removed, compute the midpoints again
                                            numIndexEntries       = dumpedEntryCount;
                                            requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                            ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries,
                                                             requiredMidpointCount, midpoints);
                                        }

                                        WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    bloomFilter?.Flush();
                                    bs.Flush();
                                    cs.FlushFinalBlock();

                                    f.SetLength(f.Position + MD5Size);

                                    // WRITE MD5
                                    var hash = md5.Hash;
                                    f.Write(hash, 0, hash.Length);
                                    f.FlushToDisk();
                                }
                }

                Log.Debug(
                    "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                    watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
            } finally {
                foreach (var enumerator in enumerators)
                {
                    enumerator.Dispose();
                }
            }
        }
Beispiel #8
0
        public static PTable FromMemtable(IMemTable table, string filename, int initialReaders, int maxReaders,
                                          int cacheDepth       = 16,
                                          bool skipIndexVerify = false,
                                          bool useBloomFilter  = true,
                                          int lruCacheSize     = 1_000_000)
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            int  indexEntrySize   = GetIndexEntrySize(table.Version);
            long dumpedEntryCount = 0;

            var sw = Stopwatch.StartNew();

            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None,
                                           DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                var fileSize = GetFileSizeUpToIndexEntries(table.Count, table.Version);
                fs.SetLength(fileSize);
                fs.Seek(0, SeekOrigin.Begin);

                using (var bloomFilter = ConstructBloomFilter(useBloomFilter, filename, table.Count))
                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(table.Version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                // WRITE INDEX ENTRIES
                                var buffer  = new byte[indexEntrySize];
                                var records = table.IterateAllInOrder();
                                var requiredMidpointCount = GetRequiredMidpointCountCached(table.Count, table.Version, cacheDepth);
                                using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                long  indexEntry   = 0L;
                                ulong?previousHash = null;
                                foreach (var rec in records)
                                {
                                    AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize);
                                    dumpedEntryCount += 1;
                                    if (table.Version >= PTableVersions.IndexV4 &&
                                        IsMidpointIndex(indexEntry, table.Count, requiredMidpointCount))
                                    {
                                        midpoints.Add(new Midpoint(new IndexEntryKey(rec.Stream, rec.Version), indexEntry));
                                    }

                                    // WRITE BLOOM FILTER ENTRY
                                    if (bloomFilter != null && rec.Stream != previousHash)
                                    {
                                        // we are creating a PTable of the same version as the Memtable. therefore the hash is the right format
                                        var streamHash = rec.Stream;
                                        bloomFilter.Add(GetSpan(ref streamHash));
                                        previousHash = rec.Stream;
                                    }

                                    indexEntry++;
                                }

                                //WRITE MIDPOINTS
                                if (table.Version >= PTableVersions.IndexV4)
                                {
                                    var numIndexEntries = table.Count;
                                    if (dumpedEntryCount != numIndexEntries)
                                    {
                                        //if index entries have been removed, compute the midpoints again
                                        numIndexEntries       = dumpedEntryCount;
                                        requiredMidpointCount =
                                            GetRequiredMidpointCount(numIndexEntries, table.Version, cacheDepth);
                                        ComputeMidpoints(bs, fs, table.Version, indexEntrySize, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    WriteMidpointsTo(bs, fs, table.Version, indexEntrySize, buffer, dumpedEntryCount,
                                                     numIndexEntries, requiredMidpointCount, midpoints);
                                }

                                bloomFilter?.Flush();
                                bs.Flush();
                                cs.FlushFinalBlock();

                                // WRITE MD5
                                var hash = md5.Hash;
                                fs.SetLength(fs.Position + MD5Size);
                                fs.Write(hash, 0, hash.Length);
                                fs.FlushToDisk();
                            }
            }

            Log.Debug("Dumped MemTable [{id}, {table} entries] in {elapsed}.", table.Id, table.Count, sw.Elapsed);
            return(new PTable(filename, table.Id, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
        }
Beispiel #9
0
        private PTable(string filename,
                       Guid id,
                       int initialReaders,
                       int maxReaders,
                       int depth            = 16,
                       bool skipIndexVerify = false)
        {
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.NotEmptyGuid(id, "id");
            Ensure.Positive(maxReaders, "maxReaders");
            Ensure.Nonnegative(depth, "depth");

            if (!File.Exists(filename))
            {
                throw new CorruptIndexException(new PTableNotFoundException(filename));
            }

            _id       = id;
            _filename = filename;

            Log.Verbose("Loading " + (skipIndexVerify ? "" : "and Verification ") + "of PTable '{pTable}' started...",
                        Path.GetFileName(Filename));
            var sw = Stopwatch.StartNew();

            _size = new FileInfo(_filename).Length;

            File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed);

            _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id),
                                                   initialReaders,
                                                   maxReaders,
                                                   () => new WorkItem(filename, DefaultBufferSize),
                                                   workItem => workItem.Dispose(),
                                                   pool => OnAllWorkItemsDisposed());

            var readerWorkItem = GetWorkItem();

            try {
                readerWorkItem.Stream.Seek(0, SeekOrigin.Begin);
                var header = PTableHeader.FromStream(readerWorkItem.Stream);
                if ((header.Version != PTableVersions.IndexV1) &&
                    (header.Version != PTableVersions.IndexV2) &&
                    (header.Version != PTableVersions.IndexV3) &&
                    (header.Version != PTableVersions.IndexV4))
                {
                    throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version));
                }
                _version = header.Version;

                if (_version == PTableVersions.IndexV1)
                {
                    _indexEntrySize = IndexEntryV1Size;
                    _indexKeySize   = IndexKeyV1Size;
                }

                if (_version == PTableVersions.IndexV2)
                {
                    _indexEntrySize = IndexEntryV2Size;
                    _indexKeySize   = IndexKeyV2Size;
                }

                if (_version == PTableVersions.IndexV3)
                {
                    _indexEntrySize = IndexEntryV3Size;
                    _indexKeySize   = IndexKeyV3Size;
                }

                if (_version >= PTableVersions.IndexV4)
                {
                    //read the PTable footer
                    var previousPosition = readerWorkItem.Stream.Position;
                    readerWorkItem.Stream.Seek(readerWorkItem.Stream.Length - MD5Size - PTableFooter.GetSize(_version),
                                               SeekOrigin.Begin);
                    var footer = PTableFooter.FromStream(readerWorkItem.Stream);
                    if (footer.Version != header.Version)
                    {
                        throw new CorruptIndexException(
                                  String.Format("PTable header/footer version mismatch: {0}/{1}", header.Version,
                                                footer.Version), new InvalidFileException("Invalid PTable file."));
                    }

                    if (_version == PTableVersions.IndexV4)
                    {
                        _indexEntrySize = IndexEntryV4Size;
                        _indexKeySize   = IndexKeyV4Size;
                    }
                    else
                    {
                        throw new InvalidOperationException("Unknown PTable version: " + _version);
                    }

                    _midpointsCached    = footer.NumMidpointsCached;
                    _midpointsCacheSize = _midpointsCached * _indexEntrySize;
                    readerWorkItem.Stream.Seek(previousPosition, SeekOrigin.Begin);
                }

                long indexEntriesTotalSize = (_size - PTableHeader.Size - _midpointsCacheSize -
                                              PTableFooter.GetSize(_version) - MD5Size);

                if (indexEntriesTotalSize < 0)
                {
                    throw new CorruptIndexException(String.Format(
                                                        "Total size of index entries < 0: {0}. _size: {1}, header size: {2}, _midpointsCacheSize: {3}, footer size: {4}, md5 size: {5}",
                                                        indexEntriesTotalSize, _size, PTableHeader.Size, _midpointsCacheSize,
                                                        PTableFooter.GetSize(_version), MD5Size));
                }
                else if (indexEntriesTotalSize % _indexEntrySize != 0)
                {
                    throw new CorruptIndexException(String.Format(
                                                        "Total size of index entries: {0} is not divisible by index entry size: {1}",
                                                        indexEntriesTotalSize, _indexEntrySize));
                }

                _count = indexEntriesTotalSize / _indexEntrySize;

                if (_version >= PTableVersions.IndexV4 && _count > 0 && _midpointsCached > 0 && _midpointsCached < 2)
                {
                    //if there is at least 1 index entry with version>=4 and there are cached midpoints, there should always be at least 2 midpoints cached
                    throw new CorruptIndexException(String.Format(
                                                        "Less than 2 midpoints cached in PTable. Index entries: {0}, Midpoints cached: {1}", _count,
                                                        _midpointsCached));
                }
                else if (_count >= 2 && _midpointsCached > _count)
                {
                    //if there are at least 2 index entries, midpoints count should be at most the number of index entries
                    throw new CorruptIndexException(String.Format(
                                                        "More midpoints cached in PTable than index entries. Midpoints: {0} , Index entries: {1}",
                                                        _midpointsCached, _count));
                }

                if (Count == 0)
                {
                    _minEntry = new IndexEntryKey(ulong.MaxValue, long.MaxValue);
                    _maxEntry = new IndexEntryKey(ulong.MinValue, long.MinValue);
                }
                else
                {
                    var minEntry = ReadEntry(_indexEntrySize, Count - 1, readerWorkItem, _version);
                    _minEntry = new IndexEntryKey(minEntry.Stream, minEntry.Version);
                    var maxEntry = ReadEntry(_indexEntrySize, 0, readerWorkItem, _version);
                    _maxEntry = new IndexEntryKey(maxEntry.Stream, maxEntry.Version);
                }
            } catch (Exception) {
                Dispose();
                throw;
            } finally {
                ReturnWorkItem(readerWorkItem);
            }

            int calcdepth = 0;

            try {
                calcdepth  = GetDepth(_count * _indexEntrySize, depth);
                _midpoints = CacheMidpointsAndVerifyHash(calcdepth, skipIndexVerify);
            } catch (PossibleToHandleOutOfMemoryException) {
                Log.Error(
                    "Unable to create midpoints for PTable '{pTable}' ({count} entries, depth {depth} requested). "
                    + "Performance hit will occur. OOM Exception.", Path.GetFileName(Filename), Count, depth);
            }

            Log.Verbose(
                "Loading PTable (Version: {version}) '{pTable}' ({count} entries, cache depth {depth}) done in {elapsed}.",
                _version, Path.GetFileName(Filename), Count, calcdepth, sw.Elapsed);
        }
Beispiel #10
0
        public static PTable MergeTo <TStreamId>(IList <PTable> tables, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash,
                                                 Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version,
                                                 int initialReaders, int maxReaders,
                                                 int cacheDepth       = 16,
                                                 bool skipIndexVerify = false,
                                                 bool useBloomFilter  = true,
                                                 int lruCacheSize     = 1_000_000)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize = GetIndexEntrySize(version);

            long numIndexEntries = 0;

            for (var i = 0; i < tables.Count; i++)
            {
                numIndexEntries += tables[i].Count;
            }

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);

            if (tables.Count == 2)
            {
                return(MergeTo2(tables, numIndexEntries, indexEntrySize, outputFile, upgradeHash, existsAt, readRecord,
                                version, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));        // special case
            }
            Log.Debug("PTables merge started.");
            var watch = Stopwatch.StartNew();

            var enumerators = tables
                              .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try {
                for (int i = 0; i < enumerators.Count; i++)
                {
                    if (!enumerators[i].MoveNext())
                    {
                        enumerators[i].Dispose();
                        enumerators.RemoveAt(i);
                        i--;
                    }
                }

                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count)))
                        using (var md5 = MD5.Create())
                            using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                    // WRITE HEADER
                                    var headerBytes = new PTableHeader(version).AsByteArray();
                                    cs.Write(headerBytes, 0, headerBytes.Length);

                                    var  buffer                = new byte[indexEntrySize];
                                    long indexEntry            = 0L;
                                    var  requiredMidpointCount =
                                        GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                    using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                    // WRITE INDEX ENTRIES
                                    ulong?previousHash = null;
                                    while (enumerators.Count > 0)
                                    {
                                        var idx     = GetMaxOf(enumerators);
                                        var current = enumerators[idx].Current;
                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        if (version >= PTableVersions.IndexV4 &&
                                            IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                        {
                                            midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version),
                                                                       indexEntry));
                                        }

                                        // WRITE BLOOM FILTER ENTRY
                                        if (bloomFilter != null && current.Stream != previousHash)
                                        {
                                            // upgradeHash has already ensured the hash is in the right format for the target
                                            var streamHash = current.Stream;
                                            bloomFilter.Add(GetSpan(ref streamHash));
                                            previousHash = current.Stream;
                                        }

                                        indexEntry++;
                                        dumpedEntryCount++;

                                        if (!enumerators[idx].MoveNext())
                                        {
                                            enumerators[idx].Dispose();
                                            enumerators.RemoveAt(idx);
                                        }
                                    }

                                    //WRITE MIDPOINTS
                                    if (version >= PTableVersions.IndexV4)
                                    {
                                        if (dumpedEntryCount != numIndexEntries)
                                        {
                                            //if index entries have been removed, compute the midpoints again
                                            numIndexEntries       = dumpedEntryCount;
                                            requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                            ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries,
                                                             requiredMidpointCount, midpoints);
                                        }

                                        WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    bloomFilter?.Flush();
                                    bs.Flush();
                                    cs.FlushFinalBlock();

                                    f.FlushToDisk();
                                    f.SetLength(f.Position + MD5Size);

                                    // WRITE MD5
                                    var hash = md5.Hash;
                                    f.Write(hash, 0, hash.Length);
                                    f.FlushToDisk();
                                }
                }

                Log.Debug(
                    "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                    watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
            } finally {
                foreach (var enumerableTable in enumerators)
                {
                    enumerableTable.Dispose();
                }
            }
        }
        public static PTable MergeTo(IList<PTable> tables,
                                     string outputFile,
                                     Func<IndexEntry, bool> isHashCollision,
                                     int cacheDepth = 16)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.NotNull(isHashCollision, "isHashCollision");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList();
            var fileSize = GetFileSize(tables); // approximate file size
            if (enumerators.Count == 2)
                return MergeTo2(enumerators, fileSize, outputFile, isHashCollision, cacheDepth); // special case

            Log.Trace("PTables merge started.");
            var watch = Stopwatch.StartNew();

            for (int i = 0; i < enumerators.Count; i++)
            {
                if (!enumerators[i].MoveNext())
                {
                    enumerators[i].Dispose();
                    enumerators.RemoveAt(i);
                    i--;
                }
            }

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1024 * 1024, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                using (var bs = new BufferedStream(cs, 65536))
                {
                    // WRITE HEADER
                    var headerBytes = new PTableHeader(Version).AsByteArray();
                    cs.Write(headerBytes, 0, headerBytes.Length);

                    uint lastDeleted = uint.MaxValue;
                    var buffer = new byte[IndexEntrySize];
                    // WRITE INDEX ENTRIES
                    while (enumerators.Count > 0)
                    {
                        var idx = GetMaxOf(enumerators);
                        var current = enumerators[idx].Current;
                        if (current.Version == EventNumber.DeletedStream && !isHashCollision(current))
                        {
                            lastDeleted = current.Stream;
                            AppendRecordTo(bs, current.Bytes, buffer);
                        }
                        else
                        {
                            if (lastDeleted != current.Stream || current.Version == 0) // we keep 0th event for hash collision detection
                                AppendRecordTo(bs, current.Bytes, buffer);
                        }
                        if (!enumerators[idx].MoveNext())
                        {
                            enumerators[idx].Dispose();
                            enumerators.RemoveAt(idx);
                        }
                    }
                    bs.Flush();
                    cs.FlushFinalBlock();

                    f.SetLength(f.Position + MD5Size);

                    // WRITE MD5
                    var hash = md5.Hash;
                    f.Write(hash, 0, hash.Length);
                }
            }
            Log.Trace("PTables merge finished in " + watch.Elapsed);
            return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth);
        }
Beispiel #12
0
        public static PTable MergeTo(IList <PTable> tables, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth = 16)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize = version == PTableVersions.Index32Bit ? PTable.IndexEntry32Size : IndexEntry64Size;

            var fileSize = GetFileSize(tables, indexEntrySize); // approximate file size

            if (tables.Count == 2)
            {
                return(MergeTo2(tables, fileSize, indexEntrySize, outputFile, upgradeHash, readRecord, version, cacheDepth)); // special case
            }
            Log.Trace("PTables merge started.");
            var watch = Stopwatch.StartNew();

            var enumerators = tables.Select(table => new EnumerablePTable(table, table.IterateAllInOrder().GetEnumerator())).ToList();

            for (int i = 0; i < enumerators.Count; i++)
            {
                if (!enumerators[i].MoveNext())
                {
                    enumerators[i].Dispose();
                    enumerators.RemoveAt(i);
                    i--;
                }
            }

            long dumpedEntryCount = 0;

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            var buffer = new byte[indexEntrySize];
                            // WRITE INDEX ENTRIES
                            while (enumerators.Count > 0)
                            {
                                var idx     = GetMaxOf(enumerators, version, upgradeHash, readRecord);
                                var current = enumerators[idx].Current;
                                var item    = readRecord(current); //Possibly doing another read if the entry was read in GetMaxOf
                                if (item.Item2)
                                {
                                    if (version == PTableVersions.Index64Bit && enumerators[idx].Table.Version == PTableVersions.Index32Bit)
                                    {
                                        current.Stream = upgradeHash(item.Item1, current.Stream);
                                    }
                                    AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                    dumpedEntryCount += 1;
                                }
                                if (!enumerators[idx].MoveNext())
                                {
                                    enumerators[idx].Dispose();
                                    enumerators.RemoveAt(idx);
                                }
                            }
                            bs.Flush();
                            cs.FlushFinalBlock();

                            f.FlushToDisk();
                            f.SetLength(f.Position + MD5Size);

                            // WRITE MD5
                            var hash = md5.Hash;
                            f.Write(hash, 0, hash.Length);
                            f.FlushToDisk();
                        }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth));
        }
        private static PTable MergeTo2(List<IEnumerator<IndexEntry>> enumerators,
                                       long fileSize,
                                       string outputFile,
                                       Func<IndexEntry, bool> isHashCollision,
                                       int cacheDepth)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1024 * 1024, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                using (var bs = new BufferedStream(cs, 65536))
                {
                    // WRITE HEADER
                    var headerBytes = new PTableHeader(Version).AsByteArray();
                    cs.Write(headerBytes, 0, headerBytes.Length);

                    // WRITE INDEX ENTRIES
                    uint lastDeleted = uint.MaxValue;
                    var buffer = new byte[IndexEntrySize];
                    var enum1 = enumerators[0];
                    var enum2 = enumerators[1];
                    bool available1 = enum1.MoveNext();
                    bool available2 = enum2.MoveNext();
                    IndexEntry current;
                    while (available1 || available2)
                    {
                        if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0))
                        {
                            current = enum1.Current;
                            available1 = enum1.MoveNext();
                        }
                        else
                        {
                            current = enum2.Current;
                            available2 = enum2.MoveNext();
                        }

                        if (current.Version == EventNumber.DeletedStream && !isHashCollision(current))
                        {
                            lastDeleted = current.Stream;
                            AppendRecordTo(bs, current.Bytes, buffer);
                        }
                        else
                        {
                            if (lastDeleted != current.Stream || current.Version == 0) // we keep 0th event for hash collision detection
                                AppendRecordTo(bs, current.Bytes, buffer);
                        }
                    }
                    bs.Flush();
                    cs.FlushFinalBlock();

                    f.SetLength(f.Position + MD5Size);

                    // WRITE MD5
                    var hash = md5.Hash;
                    f.Write(hash, 0, hash.Length);
                }
            }
            Log.Trace("PTables merge finished in {0}.", watch.Elapsed);
            return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth);
        }
Beispiel #14
0
        private static PTable MergeTo2(IList <PTable> tables, long fileSize, int indexEntrySize, string outputFile,
                                       Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, Tuple <string, bool> > readRecord,
                                       byte version, int cacheDepth)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var  enumerators      = tables.Select(table => new EnumerablePTable(table, table.IterateAllInOrder().GetEnumerator())).ToList();
            long dumpedEntryCount = 0;

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            // WRITE INDEX ENTRIES
                            var        buffer     = new byte[indexEntrySize];
                            var        enum1      = enumerators[0];
                            var        enum2      = enumerators[1];
                            bool       available1 = enum1.MoveNext();
                            bool       available2 = enum2.MoveNext();
                            IndexEntry current;
                            bool       restart;
                            do
                            {
                                restart = false;
                                while (available1 || available2)
                                {
                                    var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position);
                                    var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position);
                                    if (version == PTableVersions.Index64Bit && enumerators[0].Table.Version == PTableVersions.Index32Bit)
                                    {
                                        var res = readRecord(entry1);
                                        if (!res.Item2)
                                        {
                                            available1 = enum1.MoveNext();
                                            restart    = true;
                                            break;
                                        }
                                        entry1.Stream = upgradeHash(res.Item1, entry1.Stream);
                                    }
                                    if (version == PTableVersions.Index64Bit && enumerators[1].Table.Version == PTableVersions.Index32Bit)
                                    {
                                        var res = readRecord(entry2);
                                        if (!res.Item2)
                                        {
                                            available2 = enum2.MoveNext();
                                            restart    = true;
                                            break;
                                        }
                                        entry2.Stream = upgradeHash(res.Item1, entry2.Stream);
                                    }

                                    if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                    {
                                        current    = entry1;
                                        available1 = enum1.MoveNext();
                                    }
                                    else
                                    {
                                        current    = entry2;
                                        available2 = enum2.MoveNext();
                                    }

                                    //Possibly doing another read if the record was read during the upgrade process
                                    var item = readRecord(current);
                                    if (item.Item2)
                                    {
                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        dumpedEntryCount += 1;
                                    }
                                }
                            } while (restart);
                            bs.Flush();
                            cs.FlushFinalBlock();

                            f.SetLength(f.Position + MD5Size);

                            // WRITE MD5
                            var hash = md5.Hash;
                            f.Write(hash, 0, hash.Length);
                            f.FlushToDisk();
                        }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return(new PTable(outputFile, Guid.NewGuid(), version, depth: cacheDepth));
        }
        public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16, bool skipIndexVerify = false)
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            int  indexEntrySize   = GetIndexEntrySize(table.Version);
            long dumpedEntryCount = 0;

            var sw = Stopwatch.StartNew();

            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None,
                                           DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                var fileSize = GetFileSizeUpToIndexEntries(table.Count, table.Version);
                fs.SetLength(fileSize);
                fs.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(table.Version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            // WRITE INDEX ENTRIES
                            var             buffer                = new byte[indexEntrySize];
                            var             records               = table.IterateAllInOrder();
                            List <Midpoint> midpoints             = new List <Midpoint>();
                            var             requiredMidpointCount = GetRequiredMidpointCountCached(table.Count, table.Version, cacheDepth);

                            long indexEntry = 0L;
                            foreach (var rec in records)
                            {
                                AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize);
                                dumpedEntryCount += 1;
                                if (table.Version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, table.Count, requiredMidpointCount))
                                {
                                    midpoints.Add(new Midpoint(new IndexEntryKey(rec.Stream, rec.Version), indexEntry));
                                }
                                indexEntry++;
                            }

                            //WRITE MIDPOINTS
                            if (table.Version >= PTableVersions.IndexV4)
                            {
                                var numIndexEntries = table.Count;
                                if (dumpedEntryCount != numIndexEntries)
                                {
                                    //if index entries have been removed, compute the midpoints again
                                    numIndexEntries       = dumpedEntryCount;
                                    requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, table.Version, cacheDepth);
                                    midpoints             = ComputeMidpoints(bs, fs, table.Version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints);
                                }

                                WriteMidpointsTo(bs, fs, table.Version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints);
                            }

                            bs.Flush();
                            cs.FlushFinalBlock();

                            // WRITE MD5
                            var hash = md5.Hash;
                            fs.SetLength(fs.Position + MD5Size);
                            fs.Write(hash, 0, hash.Length);
                            fs.FlushToDisk();
                        }
            }
            Log.Trace("Dumped MemTable [{id}, {table} entries] in {elapsed}.", table.Id, table.Count, sw.Elapsed);
            return(new PTable(filename, table.Id, depth: cacheDepth, skipIndexVerify: skipIndexVerify));
        }
        public static PTable Scavenged(PTable table, string outputFile, Func <string, ulong, ulong> upgradeHash,
                                       Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, out long spaceSaved,
                                       int cacheDepth = 16, bool skipIndexVerify = false, CancellationToken ct = default(CancellationToken))
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize  = GetIndexEntrySize(version);
            var numIndexEntries = table.Count;

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);

            Log.Trace("PTables scavenge started with {numIndexEntries} entries.", numIndexEntries);
            var  watch     = Stopwatch.StartNew();
            long keptCount = 0L;
            long droppedCount;

            try
            {
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan))
                {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                            {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                // WRITE SCAVENGED INDEX ENTRIES
                                var buffer = new byte[indexEntrySize];
                                using (var enumerator = new EnumerableTable(version, table, upgradeHash, existsAt, readRecord))
                                {
                                    while (enumerator.MoveNext())
                                    {
                                        ct.ThrowIfCancellationRequested();
                                        if (existsAt(enumerator.Current))
                                        {
                                            AppendRecordTo(bs, buffer, version, enumerator.Current, indexEntrySize);
                                            keptCount++;
                                        }
                                    }
                                }

                                // We calculate this as the EnumerableTable can silently drop entries too.
                                droppedCount = numIndexEntries - keptCount;

                                var forceKeep = version > table.Version;

                                if (droppedCount == 0 && !forceKeep)
                                {
                                    Log.Trace(
                                        "PTable scavenge finished in {elapsed}. No entries removed so not keeping scavenged table.",
                                        watch.Elapsed);

                                    try
                                    {
                                        bs.Close();
                                        File.Delete(outputFile);
                                    }
                                    catch (Exception ex)
                                    {
                                        Log.ErrorException(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile);
                                    }

                                    spaceSaved = 0;
                                    return(null);
                                }

                                if (droppedCount == 0 && forceKeep)
                                {
                                    Log.Trace("Keeping scavenged index even though it isn't smaller; version upgraded.");
                                }

                                //CALCULATE AND WRITE MIDPOINTS
                                if (version >= PTableVersions.IndexV4)
                                {
                                    var requiredMidpointCount = GetRequiredMidpointCount(keptCount, version, cacheDepth);
                                    var midpoints             = ComputeMidpoints(bs, f, version, indexEntrySize, keptCount, requiredMidpointCount, new List <Midpoint>(), ct);
                                    WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, keptCount, keptCount, requiredMidpointCount, midpoints);
                                }

                                bs.Flush();
                                cs.FlushFinalBlock();

                                f.FlushToDisk();
                                f.SetLength(f.Position + MD5Size);

                                // WRITE MD5
                                var hash = md5.Hash;
                                f.Write(hash, 0, hash.Length);
                                f.FlushToDisk();
                            }
                }

                Log.Trace("PTable scavenge finished in {elapsed} ({droppedCount} entries removed, {keptCount} remaining).", watch.Elapsed,
                          droppedCount, keptCount);
                var scavengedTable = new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth,
                                                skipIndexVerify: skipIndexVerify);
                spaceSaved = table._size - scavengedTable._size;
                return(scavengedTable);
            }
            catch (Exception)
            {
                try
                {
                    File.Delete(outputFile);
                }
                catch (Exception ex)
                {
                    Log.ErrorException(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile);
                }
                throw;
            }
        }
        private static PTable MergeTo2(IList<PTable> tables, long fileSize, string outputFile,
                                       Func<IndexEntry, bool> recordExistsAt, int cacheDepth)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList();
            long dumpedEntryCount = 0;
            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                {
                    // WRITE HEADER
                    var headerBytes = new PTableHeader(Version).AsByteArray();
                    cs.Write(headerBytes, 0, headerBytes.Length);

                    // WRITE INDEX ENTRIES
                    var buffer = new byte[IndexEntrySize];
                    var enum1 = enumerators[0];
                    var enum2 = enumerators[1];
                    bool available1 = enum1.MoveNext();
                    bool available2 = enum2.MoveNext();
                    IndexEntry current;
                    while (available1 || available2)
                    {
                        if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0))
                        {
                            current = enum1.Current;
                            available1 = enum1.MoveNext();
                        }
                        else
                        {
                            current = enum2.Current;
                            available2 = enum2.MoveNext();
                        }

                        if (recordExistsAt(current))
                        {
                            AppendRecordTo(bs, current.Bytes, buffer);
                            dumpedEntryCount += 1;
                        }
                    }
                    bs.Flush();
                    cs.FlushFinalBlock();

                    f.SetLength(f.Position + MD5Size);

                    // WRITE MD5
                    var hash = md5.Hash;
                    f.Write(hash, 0, hash.Length);
                    f.FlushToDisk();
                }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth);
        }
Beispiel #18
0
        public static PTable MergeTo(IList <PTable> tables,
                                     string outputFile,
                                     Func <IndexEntry, bool> isHashCollision,
                                     int cacheDepth = 16)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.NotNull(isHashCollision, "isHashCollision");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var fileSize = GetFileSize(tables); // approximate file size

            if (tables.Count == 2)
            {
                return(MergeTo2(tables, fileSize, outputFile, isHashCollision, cacheDepth)); // special case
            }
            Log.Trace("PTables merge started.");
            var watch = Stopwatch.StartNew();

            var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList();

            for (int i = 0; i < enumerators.Count; i++)
            {
                if (!enumerators[i].MoveNext())
                {
                    enumerators[i].Dispose();
                    enumerators.RemoveAt(i);
                    i--;
                }
            }

            long dumpedEntryCount = 0;

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(Version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            uint lastDeleted = uint.MaxValue;
                            var  buffer      = new byte[IndexEntrySize];
                            // WRITE INDEX ENTRIES
                            while (enumerators.Count > 0)
                            {
                                var idx     = GetMaxOf(enumerators);
                                var current = enumerators[idx].Current;
                                if (current.Version == EventNumber.DeletedStream && !isHashCollision(current))
                                {
                                    lastDeleted = current.Stream;
                                    AppendRecordTo(bs, current.Bytes, buffer);
                                    dumpedEntryCount += 1;
                                }
                                else
                                {
                                    if (lastDeleted != current.Stream || current.Version == 0) // we keep 0th event for hash collision detection
                                    {
                                        AppendRecordTo(bs, current.Bytes, buffer);
                                        dumpedEntryCount += 1;
                                    }
                                }
                                if (!enumerators[idx].MoveNext())
                                {
                                    enumerators[idx].Dispose();
                                    enumerators.RemoveAt(idx);
                                }
                            }
                            bs.Flush();
                            cs.FlushFinalBlock();

                            f.FlushToDisk();
                            f.SetLength(f.Position + MD5Size);

                            // WRITE MD5
                            var hash = md5.Hash;
                            f.Write(hash, 0, hash.Length);
                            f.FlushToDisk();
                        }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth));
        }
Beispiel #19
0
        private PTable(string filename,
                       Guid id,
                       int initialReaders = ESConsts.PTableInitialReaderCount,
                       int maxReaders     = ESConsts.PTableMaxReaderCount,
                       int depth          = 16)
        {
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.NotEmptyGuid(id, "id");
            Ensure.Positive(maxReaders, "maxReaders");
            Ensure.Nonnegative(depth, "depth");

            if (!File.Exists(filename))
            {
                throw new CorruptIndexException(new PTableNotFoundException(filename));
            }

            _id       = id;
            _filename = filename;

            Log.Trace("Loading and Verification of PTable '{0}' started...", Path.GetFileName(Filename));
            var sw = Stopwatch.StartNew();

            _size = new FileInfo(_filename).Length;

            File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed);

            _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id),
                                                   initialReaders,
                                                   maxReaders,
                                                   () => new WorkItem(filename, DefaultBufferSize),
                                                   workItem => workItem.Dispose(),
                                                   pool => OnAllWorkItemsDisposed());

            var readerWorkItem = GetWorkItem();

            try
            {
                readerWorkItem.Stream.Seek(0, SeekOrigin.Begin);
                var header = PTableHeader.FromStream(readerWorkItem.Stream);
                if ((header.Version != PTableVersions.Index32Bit) &&
                    (header.Version != PTableVersions.Index64Bit))
                {
                    throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version));
                }
                _version = header.Version;

                if (_version == PTableVersions.Index32Bit)
                {
                    _indexEntrySize = IndexEntry32Size;
                    _indexKeySize   = IndexKey32Size;
                }
                if (_version == PTableVersions.Index64Bit)
                {
                    _indexEntrySize = IndexEntry64Size;
                    _indexKeySize   = IndexKey64Size;
                }
                _count = ((_size - PTableHeader.Size - MD5Size) / _indexEntrySize);

                if (Count == 0)
                {
                    _minEntry = new IndexEntryKey(ulong.MaxValue, int.MaxValue);
                    _maxEntry = new IndexEntryKey(ulong.MinValue, int.MinValue);
                }
                else
                {
                    var minEntry = ReadEntry(_indexEntrySize, Count - 1, readerWorkItem, _version);
                    _minEntry = new IndexEntryKey(minEntry.Stream, minEntry.Version);
                    var maxEntry = ReadEntry(_indexEntrySize, 0, readerWorkItem, _version);
                    _maxEntry = new IndexEntryKey(maxEntry.Stream, maxEntry.Version);
                }
            }
            catch (Exception)
            {
                Dispose();
                throw;
            }
            finally
            {
                ReturnWorkItem(readerWorkItem);
            }
            int calcdepth = 0;

            try
            {
                calcdepth  = GetDepth(_size, depth);
                _midpoints = CacheMidpointsAndVerifyHash(calcdepth);
            }
            catch (PossibleToHandleOutOfMemoryException)
            {
                Log.Error("Unable to create midpoints for PTable '{0}' ({1} entries, depth {2} requested). "
                          + "Performance hit will occur. OOM Exception.", Path.GetFileName(Filename), Count, depth);
            }
            Log.Trace("Loading PTable (Version: {0}) '{1}' ({2} entries, cache depth {3}) done in {4}.",
                      _version, Path.GetFileName(Filename), Count, calcdepth, sw.Elapsed);
        }
Beispiel #20
0
        private static PTable MergeTo2(List <IEnumerator <IndexEntry> > enumerators,
                                       long fileSize,
                                       string outputFile,
                                       Func <IndexEntry, bool> isHashCollision,
                                       int cacheDepth)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var  watch       = Stopwatch.StartNew();
            var  bytes       = new byte[16];
            uint lastdeleted = uint.MaxValue;
            var  md5         = MD5.Create();

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1000000, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                    using (var b = new BufferedStream(cs, 65536))
                    {
                        // WRITE HEADER
                        var headerBytes = new PTableHeader(Version).AsByteArray();
                        cs.Write(headerBytes, 0, headerBytes.Length);

                        // WRITE INDEX ENTRIES
                        var        enum1      = enumerators[0];
                        var        enum2      = enumerators[1];
                        bool       available1 = enum1.MoveNext();
                        bool       available2 = enum2.MoveNext();
                        IndexEntry current;
                        while (available1 || available2)
                        {
                            if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0))
                            {
                                current    = enum1.Current;
                                available1 = enum1.MoveNext();
                            }
                            else
                            {
                                current    = enum2.Current;
                                available2 = enum2.MoveNext();
                            }

                            if (current.Version == int.MaxValue && !isHashCollision(current))
                            {
                                lastdeleted = current.Stream;
                                AppendRecordTo(b, current.Bytes, bytes);
                            }
                            else
                            {
                                if (lastdeleted != current.Stream)
                                {
                                    AppendRecordTo(b, current.Bytes, bytes);
                                }
                            }
                        }
                        f.SetLength(f.Position + MD5Size);
                        b.Flush();
                        cs.FlushFinalBlock();

                        // WRITE MD5
                        var hash = md5.Hash;
                        f.Write(hash, 0, hash.Length);
                    }
            }
            Log.Trace("PTables merge finished in " + watch.Elapsed);
            return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth));
        }
Beispiel #21
0
        public static PTable MergeTo(ICollection <PTable> tables,
                                     string outputFile,
                                     Func <IndexEntry, bool> isHashCollision,
                                     int cacheDepth = 16)
        {
            var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList();
            var fileSize    = GetFileSize(tables); // approximate file size

            if (enumerators.Count == 2)
            {
                return(MergeTo2(enumerators, fileSize, outputFile, isHashCollision, cacheDepth));
            }
            Log.Trace("PTables merge started.");
            var watch = new Stopwatch();

            watch.Start();
            for (int i = 0; i < enumerators.Count; i++)
            {
                if (!enumerators[i].MoveNext())
                {
                    enumerators[i].Dispose();
                    enumerators.RemoveAt(i);
                    i--;
                }
            }
            var  bytes       = new byte[16];
            uint lastdeleted = uint.MaxValue;
            var  md5         = MD5.Create();

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1024 * 1024,
                                          FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                    using (var b = new BufferedStream(cs, 65536))
                    {
                        // WRITE HEADER
                        var headerBytes = new PTableHeader(Version).AsByteArray();
                        cs.Write(headerBytes, 0, headerBytes.Length);

                        // WRITE INDEX ENTRIES
                        while (enumerators.Count > 0)
                        {
                            var idx     = GetMaxOf(enumerators);
                            var current = enumerators[idx].Current;
                            if (current.Version == int.MaxValue && !isHashCollision(current))
                            {
                                lastdeleted = current.Stream;
                                AppendRecordTo(b, current.Bytes, bytes);
                            }
                            else
                            {
                                if (lastdeleted != current.Stream)
                                {
                                    AppendRecordTo(b, current.Bytes, bytes);
                                }
                            }
                            if (!enumerators[idx].MoveNext())
                            {
                                enumerators[idx].Dispose();
                                enumerators.RemoveAt(idx);
                            }
                        }
                        f.SetLength(f.Position + MD5Size);
                        b.Flush();
                        cs.FlushFinalBlock();

                        // WRITE MD5
                        var hash = md5.Hash;
                        f.Write(hash, 0, hash.Length);
                    }
            }
            Log.Trace("PTables merge finished in " + watch.Elapsed);
            return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth));
        }
        private static PTable MergeTo2(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile,
                                       Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord,
                                       byte version, int cacheDepth, bool skipIndexVerify)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);
            var enumerators = tables.Select(table => new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try
            {
                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan))
                {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                            {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                // WRITE INDEX ENTRIES
                                var             buffer                = new byte[indexEntrySize];
                                long            indexEntry            = 0L;
                                List <Midpoint> midpoints             = new List <Midpoint>();
                                var             requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                var             enum1      = enumerators[0];
                                var             enum2      = enumerators[1];
                                bool            available1 = enum1.MoveNext();
                                bool            available2 = enum2.MoveNext();
                                IndexEntry      current;
                                while (available1 || available2)
                                {
                                    var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position);
                                    var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position);

                                    if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                    {
                                        current    = entry1;
                                        available1 = enum1.MoveNext();
                                    }
                                    else
                                    {
                                        current    = entry2;
                                        available2 = enum2.MoveNext();
                                    }

                                    AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                    if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                    {
                                        midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry));
                                    }
                                    indexEntry++;
                                    dumpedEntryCount++;
                                }

                                //WRITE MIDPOINTS
                                if (version >= PTableVersions.IndexV4)
                                {
                                    if (dumpedEntryCount != numIndexEntries)
                                    {
                                        //if index entries have been removed, compute the midpoints again
                                        numIndexEntries       = dumpedEntryCount;
                                        requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                        midpoints             = ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints);
                                    }
                                    WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints);
                                }

                                bs.Flush();
                                cs.FlushFinalBlock();

                                f.SetLength(f.Position + MD5Size);

                                // WRITE MD5
                                var hash = md5.Hash;
                                f.Write(hash, 0, hash.Length);
                                f.FlushToDisk();
                            }
                }
                Log.Trace("PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                          watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify));
            }
            finally
            {
                foreach (var enumerator in enumerators)
                {
                    enumerator.Dispose();
                }
            }
        }
        public static PTable MergeTo(IList<PTable> tables, string outputFile, Func<IndexEntry, bool> recordExistsAt, int cacheDepth = 16)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var fileSize = GetFileSize(tables); // approximate file size
            if (tables.Count == 2)
                return MergeTo2(tables, fileSize, outputFile, recordExistsAt, cacheDepth); // special case

            Log.Trace("PTables merge started.");
            var watch = Stopwatch.StartNew();

            var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList();
            for (int i = 0; i < enumerators.Count; i++)
            {
                if (!enumerators[i].MoveNext())
                {
                    enumerators[i].Dispose();
                    enumerators.RemoveAt(i);
                    i--;
                }
            }

            long dumpedEntryCount = 0;
            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                {
                    // WRITE HEADER
                    var headerBytes = new PTableHeader(Version).AsByteArray();
                    cs.Write(headerBytes, 0, headerBytes.Length);

                    var buffer = new byte[IndexEntrySize];
                    // WRITE INDEX ENTRIES
                    while (enumerators.Count > 0)
                    {
                        var idx = GetMaxOf(enumerators);
                        var current = enumerators[idx].Current;
                        if (recordExistsAt(current))
                        {
                            AppendRecordTo(bs, current.Bytes, buffer);
                            dumpedEntryCount += 1;
                        }
                        if (!enumerators[idx].MoveNext())
                        {
                            enumerators[idx].Dispose();
                            enumerators.RemoveAt(idx);
                        }
                    }
                    bs.Flush();
                    cs.FlushFinalBlock();

                    f.FlushToDisk();
                    f.SetLength(f.Position + MD5Size);

                    // WRITE MD5
                    var hash = md5.Hash;
                    f.Write(hash, 0, hash.Length);
                    f.FlushToDisk();
                }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth);
        }
        public static PTable MergeTo(IList <PTable> tables, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth = 16, bool skipIndexVerify = false)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize = GetIndexEntrySize(version);

            long numIndexEntries = 0;

            for (var i = 0; i < tables.Count; i++)
            {
                numIndexEntries += tables[i].Count;
            }

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);

            if (tables.Count == 2)
            {
                return(MergeTo2(tables, numIndexEntries, indexEntrySize, outputFile, upgradeHash, existsAt, readRecord, version, cacheDepth, skipIndexVerify)); // special case
            }
            Log.Trace("PTables merge started.");
            var watch = Stopwatch.StartNew();

            var enumerators = tables.Select(table => new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try
            {
                for (int i = 0; i < enumerators.Count; i++)
                {
                    if (!enumerators[i].MoveNext())
                    {
                        enumerators[i].Dispose();
                        enumerators.RemoveAt(i);
                        i--;
                    }
                }

                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan))
                {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                            {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                var             buffer                = new byte[indexEntrySize];
                                long            indexEntry            = 0L;
                                List <Midpoint> midpoints             = new List <Midpoint>();
                                var             requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                // WRITE INDEX ENTRIES
                                while (enumerators.Count > 0)
                                {
                                    var idx     = GetMaxOf(enumerators);
                                    var current = enumerators[idx].Current;
                                    AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                    if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                    {
                                        midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry));
                                    }
                                    indexEntry++;
                                    dumpedEntryCount++;

                                    if (!enumerators[idx].MoveNext())
                                    {
                                        enumerators[idx].Dispose();
                                        enumerators.RemoveAt(idx);
                                    }
                                }

                                //WRITE MIDPOINTS
                                if (version >= PTableVersions.IndexV4)
                                {
                                    if (dumpedEntryCount != numIndexEntries)
                                    {
                                        //if index entries have been removed, compute the midpoints again
                                        numIndexEntries       = dumpedEntryCount;
                                        requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                        midpoints             = ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints);
                                    }
                                    WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints);
                                }

                                bs.Flush();
                                cs.FlushFinalBlock();

                                f.FlushToDisk();
                                f.SetLength(f.Position + MD5Size);

                                // WRITE MD5
                                var hash = md5.Hash;
                                f.Write(hash, 0, hash.Length);
                                f.FlushToDisk();
                            }
                }
                Log.Trace("PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                          watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify));
            }
            finally
            {
                foreach (var enumerableTable in enumerators)
                {
                    enumerableTable.Dispose();
                }
            }
        }
Beispiel #25
0
        private static PTable MergeTo2(IList <PTable> tables, long fileSize, string outputFile,
                                       Func <IndexEntry, bool> recordExistsAt, int cacheDepth)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var  enumerators      = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList();
            long dumpedEntryCount = 0;

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(Version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            // WRITE INDEX ENTRIES
                            var        buffer     = new byte[IndexEntrySize];
                            var        enum1      = enumerators[0];
                            var        enum2      = enumerators[1];
                            bool       available1 = enum1.MoveNext();
                            bool       available2 = enum2.MoveNext();
                            IndexEntry current;
                            while (available1 || available2)
                            {
                                if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0))
                                {
                                    current    = enum1.Current;
                                    available1 = enum1.MoveNext();
                                }
                                else
                                {
                                    current    = enum2.Current;
                                    available2 = enum2.MoveNext();
                                }

                                if (recordExistsAt(current))
                                {
                                    AppendRecordTo(bs, current.Bytes, buffer);
                                    dumpedEntryCount += 1;
                                }
                            }
                            bs.Flush();
                            cs.FlushFinalBlock();

                            f.SetLength(f.Position + MD5Size);

                            // WRITE MD5
                            var hash = md5.Hash;
                            f.Write(hash, 0, hash.Length);
                            f.FlushToDisk();
                        }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth));
        }