Example #1
0
        public void throw_argument_exception()
        {
            Assert.Throws <ArgumentException>(() => {
                var unused = new UnmanagedMemoryAppendOnlyList <int>(0);
            });

            Assert.Throws <ArgumentException>(() => {
                var unused = new UnmanagedMemoryAppendOnlyList <int>(-1);
            });

            Assert.Throws <ArgumentException>(() => {
                var unused = new UnmanagedMemoryAppendOnlyList <int>(-2);
            });
        }
        private static void WriteMidpointsTo(BufferedStream bs, FileStream fs, byte version, int indexEntrySize,
                                             byte[] buffer, long dumpedEntryCount, long numIndexEntries, long requiredMidpointCount,
                                             UnmanagedMemoryAppendOnlyList <Midpoint> midpoints)
        {
            //WRITE MIDPOINT ENTRIES

            //special case, when there is a single index entry, we need two midpoints
            if (numIndexEntries == 1 && midpoints.Count == 1)
            {
                midpoints.Add(new Midpoint(midpoints[0].Key, midpoints[0].ItemIndex));
            }

            var midpointsWritten = 0;

            if (dumpedEntryCount == numIndexEntries && requiredMidpointCount == midpoints.Count)
            {
                //if these values don't match, something is wrong
                bs.Flush();
                long fileSizeUpToMidpointEntries = GetFileSizeUpToMidpointEntries(fs.Position, midpoints.Count, version);
                fs.SetLength(fileSizeUpToMidpointEntries);
                for (var i = 0; i < midpoints.Count; i++)
                {
                    AppendMidpointRecordTo(bs, buffer, version, midpoints[i], indexEntrySize);
                }

                midpointsWritten = midpoints.Count;
                Log.Debug("Cached {midpointsWritten} index midpoints to PTable", midpointsWritten);
            }
            else
            {
                Log.Debug(
                    "Not caching index midpoints to PTable due to count mismatch. Table entries: {numIndexEntries} / Dumped entries: {dumpedEntryCount}, Required midpoint count: {requiredMidpointCount} /  Actual midpoint count: {midpoints}",
                    numIndexEntries, dumpedEntryCount, requiredMidpointCount, midpoints.Count);
            }

            bs.Flush();
            fs.SetLength(fs.Position + PTableFooter.GetSize(version));
            var footerBytes = new PTableFooter(version, (uint)midpointsWritten).AsByteArray();

            bs.Write(footerBytes, 0, footerBytes.Length);
            bs.Flush();
        }
Example #3
0
        public static PTable FromMemtable(IMemTable table, string filename, int initialReaders, int maxReaders,
                                          int cacheDepth       = 16,
                                          bool skipIndexVerify = false,
                                          bool useBloomFilter  = true,
                                          int lruCacheSize     = 1_000_000)
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            int  indexEntrySize   = GetIndexEntrySize(table.Version);
            long dumpedEntryCount = 0;

            var sw = Stopwatch.StartNew();

            using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None,
                                           DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                var fileSize = GetFileSizeUpToIndexEntries(table.Count, table.Version);
                fs.SetLength(fileSize);
                fs.Seek(0, SeekOrigin.Begin);

                using (var bloomFilter = ConstructBloomFilter(useBloomFilter, filename, table.Count))
                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(table.Version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                // WRITE INDEX ENTRIES
                                var buffer  = new byte[indexEntrySize];
                                var records = table.IterateAllInOrder();
                                var requiredMidpointCount = GetRequiredMidpointCountCached(table.Count, table.Version, cacheDepth);
                                using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                long  indexEntry   = 0L;
                                ulong?previousHash = null;
                                foreach (var rec in records)
                                {
                                    AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize);
                                    dumpedEntryCount += 1;
                                    if (table.Version >= PTableVersions.IndexV4 &&
                                        IsMidpointIndex(indexEntry, table.Count, requiredMidpointCount))
                                    {
                                        midpoints.Add(new Midpoint(new IndexEntryKey(rec.Stream, rec.Version), indexEntry));
                                    }

                                    // WRITE BLOOM FILTER ENTRY
                                    if (bloomFilter != null && rec.Stream != previousHash)
                                    {
                                        // we are creating a PTable of the same version as the Memtable. therefore the hash is the right format
                                        var streamHash = rec.Stream;
                                        bloomFilter.Add(GetSpan(ref streamHash));
                                        previousHash = rec.Stream;
                                    }

                                    indexEntry++;
                                }

                                //WRITE MIDPOINTS
                                if (table.Version >= PTableVersions.IndexV4)
                                {
                                    var numIndexEntries = table.Count;
                                    if (dumpedEntryCount != numIndexEntries)
                                    {
                                        //if index entries have been removed, compute the midpoints again
                                        numIndexEntries       = dumpedEntryCount;
                                        requiredMidpointCount =
                                            GetRequiredMidpointCount(numIndexEntries, table.Version, cacheDepth);
                                        ComputeMidpoints(bs, fs, table.Version, indexEntrySize, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    WriteMidpointsTo(bs, fs, table.Version, indexEntrySize, buffer, dumpedEntryCount,
                                                     numIndexEntries, requiredMidpointCount, midpoints);
                                }

                                bloomFilter?.Flush();
                                bs.Flush();
                                cs.FlushFinalBlock();

                                // WRITE MD5
                                var hash = md5.Hash;
                                fs.SetLength(fs.Position + MD5Size);
                                fs.Write(hash, 0, hash.Length);
                                fs.FlushToDisk();
                            }
            }

            Log.Debug("Dumped MemTable [{id}, {table} entries] in {elapsed}.", table.Id, table.Count, sw.Elapsed);
            return(new PTable(filename, table.Id, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
        }
Example #4
0
        private static PTable MergeTo2 <TStreamId>(IList <PTable> tables, long numIndexEntries, int indexEntrySize,
                                                   string outputFile,
                                                   Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt,
                                                   Func <IndexEntry, Tuple <TStreamId, bool> > readRecord,
                                                   byte version, int initialReaders, int maxReaders,
                                                   int cacheDepth, bool skipIndexVerify,
                                                   bool useBloomFilter, int lruCacheSize)
        {
            Log.Debug("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);
            var enumerators = tables
                              .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try {
                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count)))
                        using (var md5 = MD5.Create())
                            using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                    // WRITE HEADER
                                    var headerBytes = new PTableHeader(version).AsByteArray();
                                    cs.Write(headerBytes, 0, headerBytes.Length);

                                    // WRITE INDEX ENTRIES
                                    var  buffer                = new byte[indexEntrySize];
                                    long indexEntry            = 0L;
                                    var  requiredMidpointCount =
                                        GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                    using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                    var        enum1      = enumerators[0];
                                    var        enum2      = enumerators[1];
                                    bool       available1 = enum1.MoveNext();
                                    bool       available2 = enum2.MoveNext();
                                    IndexEntry current;
                                    ulong?     previousHash = null;
                                    while (available1 || available2)
                                    {
                                        var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version,
                                                                    enum1.Current.Position);
                                        var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version,
                                                                    enum2.Current.Position);

                                        if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                        {
                                            current    = entry1;
                                            available1 = enum1.MoveNext();
                                        }
                                        else
                                        {
                                            current    = entry2;
                                            available2 = enum2.MoveNext();
                                        }

                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        if (version >= PTableVersions.IndexV4 &&
                                            IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                        {
                                            midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version),
                                                                       indexEntry));
                                        }

                                        // WRITE BLOOM FILTER ENTRY
                                        if (bloomFilter != null && current.Stream != previousHash)
                                        {
                                            // upgradeHash has already ensured the hash is in the right format for the target
                                            var streamHash = current.Stream;
                                            bloomFilter.Add(GetSpan(ref streamHash));
                                            previousHash = current.Stream;
                                        }

                                        indexEntry++;
                                        dumpedEntryCount++;
                                    }

                                    //WRITE MIDPOINTS
                                    if (version >= PTableVersions.IndexV4)
                                    {
                                        if (dumpedEntryCount != numIndexEntries)
                                        {
                                            //if index entries have been removed, compute the midpoints again
                                            numIndexEntries       = dumpedEntryCount;
                                            requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                            ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries,
                                                             requiredMidpointCount, midpoints);
                                        }

                                        WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    bloomFilter?.Flush();
                                    bs.Flush();
                                    cs.FlushFinalBlock();

                                    f.SetLength(f.Position + MD5Size);

                                    // WRITE MD5
                                    var hash = md5.Hash;
                                    f.Write(hash, 0, hash.Length);
                                    f.FlushToDisk();
                                }
                }

                Log.Debug(
                    "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                    watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
            } finally {
                foreach (var enumerator in enumerators)
                {
                    enumerator.Dispose();
                }
            }
        }
Example #5
0
        public static PTable MergeTo <TStreamId>(IList <PTable> tables, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash,
                                                 Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version,
                                                 int initialReaders, int maxReaders,
                                                 int cacheDepth       = 16,
                                                 bool skipIndexVerify = false,
                                                 bool useBloomFilter  = true,
                                                 int lruCacheSize     = 1_000_000)
        {
            Ensure.NotNull(tables, "tables");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize = GetIndexEntrySize(version);

            long numIndexEntries = 0;

            for (var i = 0; i < tables.Count; i++)
            {
                numIndexEntries += tables[i].Count;
            }

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);

            if (tables.Count == 2)
            {
                return(MergeTo2(tables, numIndexEntries, indexEntrySize, outputFile, upgradeHash, existsAt, readRecord,
                                version, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));        // special case
            }
            Log.Debug("PTables merge started.");
            var watch = Stopwatch.StartNew();

            var enumerators = tables
                              .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try {
                for (int i = 0; i < enumerators.Count; i++)
                {
                    if (!enumerators[i].MoveNext())
                    {
                        enumerators[i].Dispose();
                        enumerators.RemoveAt(i);
                        i--;
                    }
                }

                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count)))
                        using (var md5 = MD5.Create())
                            using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                    // WRITE HEADER
                                    var headerBytes = new PTableHeader(version).AsByteArray();
                                    cs.Write(headerBytes, 0, headerBytes.Length);

                                    var  buffer                = new byte[indexEntrySize];
                                    long indexEntry            = 0L;
                                    var  requiredMidpointCount =
                                        GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                    using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                    // WRITE INDEX ENTRIES
                                    ulong?previousHash = null;
                                    while (enumerators.Count > 0)
                                    {
                                        var idx     = GetMaxOf(enumerators);
                                        var current = enumerators[idx].Current;
                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        if (version >= PTableVersions.IndexV4 &&
                                            IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                        {
                                            midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version),
                                                                       indexEntry));
                                        }

                                        // WRITE BLOOM FILTER ENTRY
                                        if (bloomFilter != null && current.Stream != previousHash)
                                        {
                                            // upgradeHash has already ensured the hash is in the right format for the target
                                            var streamHash = current.Stream;
                                            bloomFilter.Add(GetSpan(ref streamHash));
                                            previousHash = current.Stream;
                                        }

                                        indexEntry++;
                                        dumpedEntryCount++;

                                        if (!enumerators[idx].MoveNext())
                                        {
                                            enumerators[idx].Dispose();
                                            enumerators.RemoveAt(idx);
                                        }
                                    }

                                    //WRITE MIDPOINTS
                                    if (version >= PTableVersions.IndexV4)
                                    {
                                        if (dumpedEntryCount != numIndexEntries)
                                        {
                                            //if index entries have been removed, compute the midpoints again
                                            numIndexEntries       = dumpedEntryCount;
                                            requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                            ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries,
                                                             requiredMidpointCount, midpoints);
                                        }

                                        WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    bloomFilter?.Flush();
                                    bs.Flush();
                                    cs.FlushFinalBlock();

                                    f.FlushToDisk();
                                    f.SetLength(f.Position + MD5Size);

                                    // WRITE MD5
                                    var hash = md5.Hash;
                                    f.Write(hash, 0, hash.Length);
                                    f.FlushToDisk();
                                }
                }

                Log.Debug(
                    "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                    watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
            } finally {
                foreach (var enumerableTable in enumerators)
                {
                    enumerableTable.Dispose();
                }
            }
        }
Example #6
0
        private PTable(string filename,
                       Guid id,
                       int initialReaders,
                       int maxReaders,
                       int depth            = 16,
                       bool skipIndexVerify = false)
        {
            Ensure.NotNullOrEmpty(filename, "filename");
            Ensure.NotEmptyGuid(id, "id");
            Ensure.Positive(maxReaders, "maxReaders");
            Ensure.Nonnegative(depth, "depth");

            if (!File.Exists(filename))
            {
                throw new CorruptIndexException(new PTableNotFoundException(filename));
            }

            _id       = id;
            _filename = filename;

            Log.Debug("Loading " + (skipIndexVerify ? "" : "and Verification ") + "of PTable '{pTable}' started...",
                      Path.GetFileName(Filename));
            var sw = Stopwatch.StartNew();

            _size = new FileInfo(_filename).Length;

            File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed);

            _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id),
                                                   initialReaders,
                                                   maxReaders,
                                                   () => new WorkItem(filename, DefaultBufferSize),
                                                   workItem => workItem.Dispose(),
                                                   pool => OnAllWorkItemsDisposed());

            var readerWorkItem = GetWorkItem();

            try {
                readerWorkItem.Stream.Seek(0, SeekOrigin.Begin);
                var header = PTableHeader.FromStream(readerWorkItem.Stream);
                if ((header.Version != PTableVersions.IndexV1) &&
                    (header.Version != PTableVersions.IndexV2) &&
                    (header.Version != PTableVersions.IndexV3) &&
                    (header.Version != PTableVersions.IndexV4))
                {
                    throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version));
                }
                _version = header.Version;

                if (_version == PTableVersions.IndexV1)
                {
                    _indexEntrySize = IndexEntryV1Size;
                    _indexKeySize   = IndexKeyV1Size;
                }

                if (_version == PTableVersions.IndexV2)
                {
                    _indexEntrySize = IndexEntryV2Size;
                    _indexKeySize   = IndexKeyV2Size;
                }

                if (_version == PTableVersions.IndexV3)
                {
                    _indexEntrySize = IndexEntryV3Size;
                    _indexKeySize   = IndexKeyV3Size;
                }

                if (_version >= PTableVersions.IndexV4)
                {
                    //read the PTable footer
                    var previousPosition = readerWorkItem.Stream.Position;
                    readerWorkItem.Stream.Seek(readerWorkItem.Stream.Length - MD5Size - PTableFooter.GetSize(_version),
                                               SeekOrigin.Begin);
                    var footer = PTableFooter.FromStream(readerWorkItem.Stream);
                    if (footer.Version != header.Version)
                    {
                        throw new CorruptIndexException(
                                  String.Format("PTable header/footer version mismatch: {0}/{1}", header.Version,
                                                footer.Version), new InvalidFileException("Invalid PTable file."));
                    }

                    if (_version == PTableVersions.IndexV4)
                    {
                        _indexEntrySize = IndexEntryV4Size;
                        _indexKeySize   = IndexKeyV4Size;
                    }
                    else
                    {
                        throw new InvalidOperationException("Unknown PTable version: " + _version);
                    }

                    _midpointsCached    = footer.NumMidpointsCached;
                    _midpointsCacheSize = _midpointsCached * _indexEntrySize;
                    readerWorkItem.Stream.Seek(previousPosition, SeekOrigin.Begin);
                }

                long indexEntriesTotalSize = (_size - PTableHeader.Size - _midpointsCacheSize -
                                              PTableFooter.GetSize(_version) - MD5Size);

                if (indexEntriesTotalSize < 0)
                {
                    throw new CorruptIndexException(String.Format(
                                                        "Total size of index entries < 0: {0}. _size: {1}, header size: {2}, _midpointsCacheSize: {3}, footer size: {4}, md5 size: {5}",
                                                        indexEntriesTotalSize, _size, PTableHeader.Size, _midpointsCacheSize,
                                                        PTableFooter.GetSize(_version), MD5Size));
                }
                else if (indexEntriesTotalSize % _indexEntrySize != 0)
                {
                    throw new CorruptIndexException(String.Format(
                                                        "Total size of index entries: {0} is not divisible by index entry size: {1}",
                                                        indexEntriesTotalSize, _indexEntrySize));
                }

                _count = indexEntriesTotalSize / _indexEntrySize;

                if (_version >= PTableVersions.IndexV4 && _count > 0 && _midpointsCached > 0 && _midpointsCached < 2)
                {
                    //if there is at least 1 index entry with version>=4 and there are cached midpoints, there should always be at least 2 midpoints cached
                    throw new CorruptIndexException(String.Format(
                                                        "Less than 2 midpoints cached in PTable. Index entries: {0}, Midpoints cached: {1}", _count,
                                                        _midpointsCached));
                }
                else if (_count >= 2 && _midpointsCached > _count)
                {
                    //if there are at least 2 index entries, midpoints count should be at most the number of index entries
                    throw new CorruptIndexException(String.Format(
                                                        "More midpoints cached in PTable than index entries. Midpoints: {0} , Index entries: {1}",
                                                        _midpointsCached, _count));
                }

                if (Count == 0)
                {
                    _minEntry = new IndexEntryKey(ulong.MaxValue, long.MaxValue);
                    _maxEntry = new IndexEntryKey(ulong.MinValue, long.MinValue);
                }
                else
                {
                    var minEntry = ReadEntry(_indexEntrySize, Count - 1, readerWorkItem, _version);
                    _minEntry = new IndexEntryKey(minEntry.Stream, minEntry.Version);
                    var maxEntry = ReadEntry(_indexEntrySize, 0, readerWorkItem, _version);
                    _maxEntry = new IndexEntryKey(maxEntry.Stream, maxEntry.Version);
                }
            } catch (Exception) {
                Dispose();
                throw;
            } finally {
                ReturnWorkItem(readerWorkItem);
            }

            int calcdepth = 0;

            try {
                calcdepth  = GetDepth(_count * _indexEntrySize, depth);
                _midpoints = CacheMidpointsAndVerifyHash(calcdepth, skipIndexVerify);
            } catch (PossibleToHandleOutOfMemoryException) {
                Log.Error(
                    "Unable to create midpoints for PTable '{pTable}' ({count} entries, depth {depth} requested). "
                    + "Performance hit will occur. OOM Exception.", Path.GetFileName(Filename), Count, depth);
            }

            Log.Debug(
                "Loading PTable (Version: {version}) '{pTable}' ({count} entries, cache depth {depth}) done in {elapsed}.",
                _version, Path.GetFileName(Filename), Count, calcdepth, sw.Elapsed);
        }
Example #7
0
        internal UnmanagedMemoryAppendOnlyList <Midpoint> CacheMidpointsAndVerifyHash(int depth, bool skipIndexVerify)
        {
            var buffer = new byte[4096];

            if (depth < 0 || depth > 30)
            {
                throw new ArgumentOutOfRangeException("depth");
            }
            var count = Count;

            if (count == 0 || depth == 0)
            {
                return(null);
            }

            if (skipIndexVerify)
            {
                Log.Debug("Disabling Verification of PTable");
            }

            Stream   stream   = null;
            WorkItem workItem = null;

            if (Runtime.IsUnixOrMac)
            {
                workItem = GetWorkItem();
                stream   = workItem.Stream;
            }
            else
            {
                stream = UnbufferedFileStream.Create(_filename, FileMode.Open, FileAccess.Read, FileShare.Read, false,
                                                     4096, 4096, false, 4096);
            }

            UnmanagedMemoryAppendOnlyList <Midpoint> midpoints = null;

            try {
                using (MD5 md5 = MD5.Create()) {
                    int midpointsCount;
                    try {
                        midpointsCount = (int)Math.Max(2L, Math.Min((long)1 << depth, count));
                        midpoints      = new UnmanagedMemoryAppendOnlyList <Midpoint>(midpointsCount);
                    } catch (OutOfMemoryException exc) {
                        throw new PossibleToHandleOutOfMemoryException("Failed to allocate memory for Midpoint cache.",
                                                                       exc);
                    }

                    if (skipIndexVerify && (_version >= PTableVersions.IndexV4))
                    {
                        if (_midpointsCached == midpointsCount)
                        {
                            //index verification is disabled and cached midpoints with the same depth requested are available
                            //so, we can load them directly from the PTable file
                            Log.Debug("Loading {midpointsCached} cached midpoints from PTable", _midpointsCached);
                            long startOffset = stream.Length - MD5Size - PTableFooter.GetSize(_version) -
                                               _midpointsCacheSize;
                            stream.Seek(startOffset, SeekOrigin.Begin);
                            for (int k = 0; k < (int)_midpointsCached; k++)
                            {
                                stream.Read(buffer, 0, _indexEntrySize);
                                IndexEntryKey key;
                                long          index;
                                if (_version == PTableVersions.IndexV4)
                                {
                                    key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 8),
                                                            BitConverter.ToInt64(buffer, 0));
                                    index = BitConverter.ToInt64(buffer, 8 + 8);
                                }
                                else
                                {
                                    throw new InvalidOperationException("Unknown PTable version: " + _version);
                                }

                                midpoints.Add(new Midpoint(key, index));

                                if (k > 0)
                                {
                                    if (midpoints[k].Key.GreaterThan(midpoints[k - 1].Key))
                                    {
                                        throw new CorruptIndexException(String.Format(
                                                                            "Index entry key for midpoint {0} (stream: {1}, version: {2}) < index entry key for midpoint {3} (stream: {4}, version: {5})",
                                                                            k - 1, midpoints[k - 1].Key.Stream, midpoints[k - 1].Key.Version, k,
                                                                            midpoints[k].Key.Stream, midpoints[k].Key.Version));
                                    }
                                    else if (midpoints[k - 1].ItemIndex > midpoints[k].ItemIndex)
                                    {
                                        throw new CorruptIndexException(String.Format(
                                                                            "Item index for midpoint {0} ({1}) > Item index for midpoint {2} ({3})",
                                                                            k - 1, midpoints[k - 1].ItemIndex, k, midpoints[k].ItemIndex));
                                    }
                                }
                            }

                            return(midpoints);
                        }
                        else
                        {
                            Log.Debug(
                                "Skipping loading of cached midpoints from PTable due to count mismatch, cached midpoints: {midpointsCached} / required midpoints: {midpointsCount}",
                                _midpointsCached, midpointsCount);
                        }
                    }

                    if (!skipIndexVerify)
                    {
                        stream.Seek(0, SeekOrigin.Begin);
                        stream.Read(buffer, 0, PTableHeader.Size);
                        md5.TransformBlock(buffer, 0, PTableHeader.Size, null, 0);
                    }

                    long previousNextIndex = long.MinValue;
                    var  previousKey       = new IndexEntryKey(long.MaxValue, long.MaxValue);
                    for (int k = 0; k < midpointsCount; ++k)
                    {
                        long nextIndex = GetMidpointIndex(k, count, midpointsCount);
                        if (previousNextIndex != nextIndex)
                        {
                            if (!skipIndexVerify)
                            {
                                ReadUntilWithMd5(PTableHeader.Size + _indexEntrySize * nextIndex, stream, md5);
                                stream.Read(buffer, 0, _indexKeySize);
                                md5.TransformBlock(buffer, 0, _indexKeySize, null, 0);
                            }
                            else
                            {
                                stream.Seek(PTableHeader.Size + _indexEntrySize * nextIndex, SeekOrigin.Begin);
                                stream.Read(buffer, 0, _indexKeySize);
                            }

                            IndexEntryKey key;
                            if (_version == PTableVersions.IndexV1)
                            {
                                key = new IndexEntryKey(BitConverter.ToUInt32(buffer, 4),
                                                        BitConverter.ToInt32(buffer, 0));
                            }
                            else if (_version == PTableVersions.IndexV2)
                            {
                                key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 4),
                                                        BitConverter.ToInt32(buffer, 0));
                            }
                            else
                            {
                                key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 8),
                                                        BitConverter.ToInt64(buffer, 0));
                            }

                            midpoints.Add(new Midpoint(key, nextIndex));
                            previousNextIndex = nextIndex;
                            previousKey       = key;
                        }
                        else
                        {
                            midpoints.Add(new Midpoint(previousKey, previousNextIndex));
                        }

                        if (k > 0)
                        {
                            if (midpoints[k].Key.GreaterThan(midpoints[k - 1].Key))
                            {
                                throw new CorruptIndexException(String.Format(
                                                                    "Index entry key for midpoint {0} (stream: {1}, version: {2}) < index entry key for midpoint {3} (stream: {4}, version: {5})",
                                                                    k - 1, midpoints[k - 1].Key.Stream, midpoints[k - 1].Key.Version, k,
                                                                    midpoints[k].Key.Stream, midpoints[k].Key.Version));
                            }
                            else if (midpoints[k - 1].ItemIndex > midpoints[k].ItemIndex)
                            {
                                throw new CorruptIndexException(String.Format(
                                                                    "Item index for midpoint {0} ({1}) > Item index for midpoint {2} ({3})", k - 1,
                                                                    midpoints[k - 1].ItemIndex, k, midpoints[k].ItemIndex));
                            }
                        }
                    }

                    if (!skipIndexVerify)
                    {
                        ReadUntilWithMd5(stream.Length - MD5Size, stream, md5);
                        //verify hash (should be at stream.length - MD5Size)
                        md5.TransformFinalBlock(Empty.ByteArray, 0, 0);
                        var fileHash = new byte[MD5Size];
                        stream.Read(fileHash, 0, MD5Size);
                        ValidateHash(md5.Hash, fileHash);
                    }

                    return(midpoints);
                }
            } catch (PossibleToHandleOutOfMemoryException) {
                midpoints?.Dispose();
                throw;
            } catch {
                midpoints?.Dispose();
                Dispose();
                throw;
            } finally {
                if (Runtime.IsUnixOrMac)
                {
                    if (workItem != null)
                    {
                        ReturnWorkItem(workItem);
                    }
                }
                else
                {
                    if (stream != null)
                    {
                        stream.Dispose();
                    }
                }
            }
        }
Example #8
0
 public void SetUp()
 {
     _list = new UnmanagedMemoryAppendOnlyList <int>(_maxCapacity);
 }
        private static void ComputeMidpoints(BufferedStream bs, FileStream fs, byte version,
                                             int indexEntrySize, long numIndexEntries, long requiredMidpointCount, UnmanagedMemoryAppendOnlyList <Midpoint> midpoints,
                                             CancellationToken ct = default(CancellationToken))
        {
            int indexKeySize;

            if (version == PTableVersions.IndexV4)
            {
                indexKeySize = IndexKeyV4Size;
            }
            else
            {
                throw new InvalidOperationException("Unknown PTable version: " + version);
            }

            midpoints.Clear();
            bs.Flush();
            byte[] buffer = new byte[indexKeySize];

            var previousFileStreamPosition = fs.Position;

            long          previousIndex = -1;
            IndexEntryKey previousKey   = new IndexEntryKey(0, 0);

            for (int k = 0; k < requiredMidpointCount; k++)
            {
                ct.ThrowIfCancellationRequested();

                long index = GetMidpointIndex(k, numIndexEntries, requiredMidpointCount);
                if (index == previousIndex)
                {
                    midpoints.Add(new Midpoint(previousKey, previousIndex));
                }
                else
                {
                    fs.Seek(PTableHeader.Size + index * indexEntrySize, SeekOrigin.Begin);
                    fs.Read(buffer, 0, indexKeySize);
                    IndexEntryKey key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 8),
                                                          BitConverter.ToInt64(buffer, 0));
                    midpoints.Add(new Midpoint(key, index));
                    previousIndex = index;
                    previousKey   = key;
                }
            }

            fs.Seek(previousFileStreamPosition, SeekOrigin.Begin);
        }
Example #10
0
        public static PTable Scavenged(PTable table, string outputFile, Func <string, ulong, ulong> upgradeHash,
                                       Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version,
                                       out long spaceSaved,
                                       int initialReaders, int maxReaders,
                                       int cacheDepth       = 16, bool skipIndexVerify = false,
                                       CancellationToken ct = default(CancellationToken))
        {
            Ensure.NotNull(table, "table");
            Ensure.NotNullOrEmpty(outputFile, "outputFile");
            Ensure.Nonnegative(cacheDepth, "cacheDepth");

            var indexEntrySize  = GetIndexEntrySize(version);
            var numIndexEntries = table.Count;

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);

            Log.Debug("PTables scavenge started with {numIndexEntries} entries.", numIndexEntries);
            var  watch     = Stopwatch.StartNew();
            long keptCount = 0L;
            long droppedCount;

            try {
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                // WRITE SCAVENGED INDEX ENTRIES
                                var buffer = new byte[indexEntrySize];
                                using (var enumerator =
                                           new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)) {
                                    while (enumerator.MoveNext())
                                    {
                                        ct.ThrowIfCancellationRequested();
                                        if (existsAt(enumerator.Current))
                                        {
                                            AppendRecordTo(bs, buffer, version, enumerator.Current, indexEntrySize);
                                            keptCount++;
                                        }
                                    }
                                }

                                // We calculate this as the EnumerableTable can silently drop entries too.
                                droppedCount = numIndexEntries - keptCount;

                                var forceKeep = version > table.Version;

                                if (droppedCount == 0 && !forceKeep)
                                {
                                    Log.Debug(
                                        "PTable scavenge finished in {elapsed}. No entries removed so not keeping scavenged table.",
                                        watch.Elapsed);

                                    try {
                                        bs.Close();
                                        File.Delete(outputFile);
                                    } catch (Exception ex) {
                                        Log.Error(ex, "Unable to delete unwanted scavenged PTable: {outputFile}",
                                                  outputFile);
                                    }

                                    spaceSaved = 0;
                                    return(null);
                                }

                                if (droppedCount == 0 && forceKeep)
                                {
                                    Log.Debug("Keeping scavenged index even though it isn't smaller; version upgraded.");
                                }

                                //CALCULATE AND WRITE MIDPOINTS
                                if (version >= PTableVersions.IndexV4)
                                {
                                    var requiredMidpointCount = GetRequiredMidpointCount(keptCount, version, cacheDepth);
                                    using var midpoints =
                                              new UnmanagedMemoryAppendOnlyList <Midpoint>(
                                                  (int)requiredMidpointCount + MidpointsOverflowSafetyNet);
                                    ComputeMidpoints(bs, f, version, indexEntrySize, keptCount,
                                                     requiredMidpointCount, midpoints, ct);
                                    WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, keptCount, keptCount,
                                                     requiredMidpointCount, midpoints);
                                }

                                bs.Flush();
                                cs.FlushFinalBlock();

                                f.FlushToDisk();
                                f.SetLength(f.Position + MD5Size);

                                // WRITE MD5
                                var hash = md5.Hash;
                                f.Write(hash, 0, hash.Length);
                                f.FlushToDisk();
                            }
                }

                Log.Debug(
                    "PTable scavenge finished in {elapsed} ({droppedCount} entries removed, {keptCount} remaining).",
                    watch.Elapsed,
                    droppedCount, keptCount);
                var scavengedTable = new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify);
                spaceSaved = table._size - scavengedTable._size;
                return(scavengedTable);
            } catch (Exception) {
                try {
                    File.Delete(outputFile);
                } catch (Exception ex) {
                    Log.Error(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile);
                }

                throw;
            }
        }