public void throw_argument_exception() { Assert.Throws <ArgumentException>(() => { var unused = new UnmanagedMemoryAppendOnlyList <int>(0); }); Assert.Throws <ArgumentException>(() => { var unused = new UnmanagedMemoryAppendOnlyList <int>(-1); }); Assert.Throws <ArgumentException>(() => { var unused = new UnmanagedMemoryAppendOnlyList <int>(-2); }); }
private static void WriteMidpointsTo(BufferedStream bs, FileStream fs, byte version, int indexEntrySize, byte[] buffer, long dumpedEntryCount, long numIndexEntries, long requiredMidpointCount, UnmanagedMemoryAppendOnlyList <Midpoint> midpoints) { //WRITE MIDPOINT ENTRIES //special case, when there is a single index entry, we need two midpoints if (numIndexEntries == 1 && midpoints.Count == 1) { midpoints.Add(new Midpoint(midpoints[0].Key, midpoints[0].ItemIndex)); } var midpointsWritten = 0; if (dumpedEntryCount == numIndexEntries && requiredMidpointCount == midpoints.Count) { //if these values don't match, something is wrong bs.Flush(); long fileSizeUpToMidpointEntries = GetFileSizeUpToMidpointEntries(fs.Position, midpoints.Count, version); fs.SetLength(fileSizeUpToMidpointEntries); for (var i = 0; i < midpoints.Count; i++) { AppendMidpointRecordTo(bs, buffer, version, midpoints[i], indexEntrySize); } midpointsWritten = midpoints.Count; Log.Debug("Cached {midpointsWritten} index midpoints to PTable", midpointsWritten); } else { Log.Debug( "Not caching index midpoints to PTable due to count mismatch. Table entries: {numIndexEntries} / Dumped entries: {dumpedEntryCount}, Required midpoint count: {requiredMidpointCount} / Actual midpoint count: {midpoints}", numIndexEntries, dumpedEntryCount, requiredMidpointCount, midpoints.Count); } bs.Flush(); fs.SetLength(fs.Position + PTableFooter.GetSize(version)); var footerBytes = new PTableFooter(version, (uint)midpointsWritten).AsByteArray(); bs.Write(footerBytes, 0, footerBytes.Length); bs.Flush(); }
public static PTable FromMemtable(IMemTable table, string filename, int initialReaders, int maxReaders, int cacheDepth = 16, bool skipIndexVerify = false, bool useBloomFilter = true, int lruCacheSize = 1_000_000) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(filename, "filename"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); int indexEntrySize = GetIndexEntrySize(table.Version); long dumpedEntryCount = 0; var sw = Stopwatch.StartNew(); using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { var fileSize = GetFileSizeUpToIndexEntries(table.Count, table.Version); fs.SetLength(fileSize); fs.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, filename, table.Count)) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(table.Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; var records = table.IterateAllInOrder(); var requiredMidpointCount = GetRequiredMidpointCountCached(table.Count, table.Version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); long indexEntry = 0L; ulong?previousHash = null; foreach (var rec in records) { AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize); dumpedEntryCount += 1; if (table.Version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, table.Count, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(rec.Stream, rec.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && rec.Stream != previousHash) { // we are creating a PTable of the same version as the Memtable. therefore the hash is the right format var streamHash = rec.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = rec.Stream; } indexEntry++; } //WRITE MIDPOINTS if (table.Version >= PTableVersions.IndexV4) { var numIndexEntries = table.Count; if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, table.Version, cacheDepth); ComputeMidpoints(bs, fs, table.Version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, fs, table.Version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; fs.SetLength(fs.Position + MD5Size); fs.Write(hash, 0, hash.Length); fs.FlushToDisk(); } } Log.Debug("Dumped MemTable [{id}, {table} entries] in {elapsed}.", table.Id, table.Count, sw.Elapsed); return(new PTable(filename, table.Id, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); }
private static PTable MergeTo2 <TStreamId>(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version, int initialReaders, int maxReaders, int cacheDepth, bool skipIndexVerify, bool useBloomFilter, int lruCacheSize) { Log.Debug("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); var enumerators = tables .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count))) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; long indexEntry = 0L; var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; ulong? previousHash = null; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && current.Stream != previousHash) { // upgradeHash has already ensured the hash is in the right format for the target var streamHash = current.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = current.Stream; } indexEntry++; dumpedEntryCount++; } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Debug( "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); } finally { foreach (var enumerator in enumerators) { enumerator.Dispose(); } } }
public static PTable MergeTo <TStreamId>(IList <PTable> tables, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version, int initialReaders, int maxReaders, int cacheDepth = 16, bool skipIndexVerify = false, bool useBloomFilter = true, int lruCacheSize = 1_000_000) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = GetIndexEntrySize(version); long numIndexEntries = 0; for (var i = 0; i < tables.Count; i++) { numIndexEntries += tables[i].Count; } var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); if (tables.Count == 2) { return(MergeTo2(tables, numIndexEntries, indexEntrySize, outputFile, upgradeHash, existsAt, readRecord, version, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); // special case } Log.Debug("PTables merge started."); var watch = Stopwatch.StartNew(); var enumerators = tables .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count))) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); var buffer = new byte[indexEntrySize]; long indexEntry = 0L; var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); // WRITE INDEX ENTRIES ulong?previousHash = null; while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && current.Stream != previousHash) { // upgradeHash has already ensured the hash is in the right format for the target var streamHash = current.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = current.Stream; } indexEntry++; dumpedEntryCount++; if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Debug( "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); } finally { foreach (var enumerableTable in enumerators) { enumerableTable.Dispose(); } } }
private PTable(string filename, Guid id, int initialReaders, int maxReaders, int depth = 16, bool skipIndexVerify = false) { Ensure.NotNullOrEmpty(filename, "filename"); Ensure.NotEmptyGuid(id, "id"); Ensure.Positive(maxReaders, "maxReaders"); Ensure.Nonnegative(depth, "depth"); if (!File.Exists(filename)) { throw new CorruptIndexException(new PTableNotFoundException(filename)); } _id = id; _filename = filename; Log.Debug("Loading " + (skipIndexVerify ? "" : "and Verification ") + "of PTable '{pTable}' started...", Path.GetFileName(Filename)); var sw = Stopwatch.StartNew(); _size = new FileInfo(_filename).Length; File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed); _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id), initialReaders, maxReaders, () => new WorkItem(filename, DefaultBufferSize), workItem => workItem.Dispose(), pool => OnAllWorkItemsDisposed()); var readerWorkItem = GetWorkItem(); try { readerWorkItem.Stream.Seek(0, SeekOrigin.Begin); var header = PTableHeader.FromStream(readerWorkItem.Stream); if ((header.Version != PTableVersions.IndexV1) && (header.Version != PTableVersions.IndexV2) && (header.Version != PTableVersions.IndexV3) && (header.Version != PTableVersions.IndexV4)) { throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version)); } _version = header.Version; if (_version == PTableVersions.IndexV1) { _indexEntrySize = IndexEntryV1Size; _indexKeySize = IndexKeyV1Size; } if (_version == PTableVersions.IndexV2) { _indexEntrySize = IndexEntryV2Size; _indexKeySize = IndexKeyV2Size; } if (_version == PTableVersions.IndexV3) { _indexEntrySize = IndexEntryV3Size; _indexKeySize = IndexKeyV3Size; } if (_version >= PTableVersions.IndexV4) { //read the PTable footer var previousPosition = readerWorkItem.Stream.Position; readerWorkItem.Stream.Seek(readerWorkItem.Stream.Length - MD5Size - PTableFooter.GetSize(_version), SeekOrigin.Begin); var footer = PTableFooter.FromStream(readerWorkItem.Stream); if (footer.Version != header.Version) { throw new CorruptIndexException( String.Format("PTable header/footer version mismatch: {0}/{1}", header.Version, footer.Version), new InvalidFileException("Invalid PTable file.")); } if (_version == PTableVersions.IndexV4) { _indexEntrySize = IndexEntryV4Size; _indexKeySize = IndexKeyV4Size; } else { throw new InvalidOperationException("Unknown PTable version: " + _version); } _midpointsCached = footer.NumMidpointsCached; _midpointsCacheSize = _midpointsCached * _indexEntrySize; readerWorkItem.Stream.Seek(previousPosition, SeekOrigin.Begin); } long indexEntriesTotalSize = (_size - PTableHeader.Size - _midpointsCacheSize - PTableFooter.GetSize(_version) - MD5Size); if (indexEntriesTotalSize < 0) { throw new CorruptIndexException(String.Format( "Total size of index entries < 0: {0}. _size: {1}, header size: {2}, _midpointsCacheSize: {3}, footer size: {4}, md5 size: {5}", indexEntriesTotalSize, _size, PTableHeader.Size, _midpointsCacheSize, PTableFooter.GetSize(_version), MD5Size)); } else if (indexEntriesTotalSize % _indexEntrySize != 0) { throw new CorruptIndexException(String.Format( "Total size of index entries: {0} is not divisible by index entry size: {1}", indexEntriesTotalSize, _indexEntrySize)); } _count = indexEntriesTotalSize / _indexEntrySize; if (_version >= PTableVersions.IndexV4 && _count > 0 && _midpointsCached > 0 && _midpointsCached < 2) { //if there is at least 1 index entry with version>=4 and there are cached midpoints, there should always be at least 2 midpoints cached throw new CorruptIndexException(String.Format( "Less than 2 midpoints cached in PTable. Index entries: {0}, Midpoints cached: {1}", _count, _midpointsCached)); } else if (_count >= 2 && _midpointsCached > _count) { //if there are at least 2 index entries, midpoints count should be at most the number of index entries throw new CorruptIndexException(String.Format( "More midpoints cached in PTable than index entries. Midpoints: {0} , Index entries: {1}", _midpointsCached, _count)); } if (Count == 0) { _minEntry = new IndexEntryKey(ulong.MaxValue, long.MaxValue); _maxEntry = new IndexEntryKey(ulong.MinValue, long.MinValue); } else { var minEntry = ReadEntry(_indexEntrySize, Count - 1, readerWorkItem, _version); _minEntry = new IndexEntryKey(minEntry.Stream, minEntry.Version); var maxEntry = ReadEntry(_indexEntrySize, 0, readerWorkItem, _version); _maxEntry = new IndexEntryKey(maxEntry.Stream, maxEntry.Version); } } catch (Exception) { Dispose(); throw; } finally { ReturnWorkItem(readerWorkItem); } int calcdepth = 0; try { calcdepth = GetDepth(_count * _indexEntrySize, depth); _midpoints = CacheMidpointsAndVerifyHash(calcdepth, skipIndexVerify); } catch (PossibleToHandleOutOfMemoryException) { Log.Error( "Unable to create midpoints for PTable '{pTable}' ({count} entries, depth {depth} requested). " + "Performance hit will occur. OOM Exception.", Path.GetFileName(Filename), Count, depth); } Log.Debug( "Loading PTable (Version: {version}) '{pTable}' ({count} entries, cache depth {depth}) done in {elapsed}.", _version, Path.GetFileName(Filename), Count, calcdepth, sw.Elapsed); }
internal UnmanagedMemoryAppendOnlyList <Midpoint> CacheMidpointsAndVerifyHash(int depth, bool skipIndexVerify) { var buffer = new byte[4096]; if (depth < 0 || depth > 30) { throw new ArgumentOutOfRangeException("depth"); } var count = Count; if (count == 0 || depth == 0) { return(null); } if (skipIndexVerify) { Log.Debug("Disabling Verification of PTable"); } Stream stream = null; WorkItem workItem = null; if (Runtime.IsUnixOrMac) { workItem = GetWorkItem(); stream = workItem.Stream; } else { stream = UnbufferedFileStream.Create(_filename, FileMode.Open, FileAccess.Read, FileShare.Read, false, 4096, 4096, false, 4096); } UnmanagedMemoryAppendOnlyList <Midpoint> midpoints = null; try { using (MD5 md5 = MD5.Create()) { int midpointsCount; try { midpointsCount = (int)Math.Max(2L, Math.Min((long)1 << depth, count)); midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>(midpointsCount); } catch (OutOfMemoryException exc) { throw new PossibleToHandleOutOfMemoryException("Failed to allocate memory for Midpoint cache.", exc); } if (skipIndexVerify && (_version >= PTableVersions.IndexV4)) { if (_midpointsCached == midpointsCount) { //index verification is disabled and cached midpoints with the same depth requested are available //so, we can load them directly from the PTable file Log.Debug("Loading {midpointsCached} cached midpoints from PTable", _midpointsCached); long startOffset = stream.Length - MD5Size - PTableFooter.GetSize(_version) - _midpointsCacheSize; stream.Seek(startOffset, SeekOrigin.Begin); for (int k = 0; k < (int)_midpointsCached; k++) { stream.Read(buffer, 0, _indexEntrySize); IndexEntryKey key; long index; if (_version == PTableVersions.IndexV4) { key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 8), BitConverter.ToInt64(buffer, 0)); index = BitConverter.ToInt64(buffer, 8 + 8); } else { throw new InvalidOperationException("Unknown PTable version: " + _version); } midpoints.Add(new Midpoint(key, index)); if (k > 0) { if (midpoints[k].Key.GreaterThan(midpoints[k - 1].Key)) { throw new CorruptIndexException(String.Format( "Index entry key for midpoint {0} (stream: {1}, version: {2}) < index entry key for midpoint {3} (stream: {4}, version: {5})", k - 1, midpoints[k - 1].Key.Stream, midpoints[k - 1].Key.Version, k, midpoints[k].Key.Stream, midpoints[k].Key.Version)); } else if (midpoints[k - 1].ItemIndex > midpoints[k].ItemIndex) { throw new CorruptIndexException(String.Format( "Item index for midpoint {0} ({1}) > Item index for midpoint {2} ({3})", k - 1, midpoints[k - 1].ItemIndex, k, midpoints[k].ItemIndex)); } } } return(midpoints); } else { Log.Debug( "Skipping loading of cached midpoints from PTable due to count mismatch, cached midpoints: {midpointsCached} / required midpoints: {midpointsCount}", _midpointsCached, midpointsCount); } } if (!skipIndexVerify) { stream.Seek(0, SeekOrigin.Begin); stream.Read(buffer, 0, PTableHeader.Size); md5.TransformBlock(buffer, 0, PTableHeader.Size, null, 0); } long previousNextIndex = long.MinValue; var previousKey = new IndexEntryKey(long.MaxValue, long.MaxValue); for (int k = 0; k < midpointsCount; ++k) { long nextIndex = GetMidpointIndex(k, count, midpointsCount); if (previousNextIndex != nextIndex) { if (!skipIndexVerify) { ReadUntilWithMd5(PTableHeader.Size + _indexEntrySize * nextIndex, stream, md5); stream.Read(buffer, 0, _indexKeySize); md5.TransformBlock(buffer, 0, _indexKeySize, null, 0); } else { stream.Seek(PTableHeader.Size + _indexEntrySize * nextIndex, SeekOrigin.Begin); stream.Read(buffer, 0, _indexKeySize); } IndexEntryKey key; if (_version == PTableVersions.IndexV1) { key = new IndexEntryKey(BitConverter.ToUInt32(buffer, 4), BitConverter.ToInt32(buffer, 0)); } else if (_version == PTableVersions.IndexV2) { key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 4), BitConverter.ToInt32(buffer, 0)); } else { key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 8), BitConverter.ToInt64(buffer, 0)); } midpoints.Add(new Midpoint(key, nextIndex)); previousNextIndex = nextIndex; previousKey = key; } else { midpoints.Add(new Midpoint(previousKey, previousNextIndex)); } if (k > 0) { if (midpoints[k].Key.GreaterThan(midpoints[k - 1].Key)) { throw new CorruptIndexException(String.Format( "Index entry key for midpoint {0} (stream: {1}, version: {2}) < index entry key for midpoint {3} (stream: {4}, version: {5})", k - 1, midpoints[k - 1].Key.Stream, midpoints[k - 1].Key.Version, k, midpoints[k].Key.Stream, midpoints[k].Key.Version)); } else if (midpoints[k - 1].ItemIndex > midpoints[k].ItemIndex) { throw new CorruptIndexException(String.Format( "Item index for midpoint {0} ({1}) > Item index for midpoint {2} ({3})", k - 1, midpoints[k - 1].ItemIndex, k, midpoints[k].ItemIndex)); } } } if (!skipIndexVerify) { ReadUntilWithMd5(stream.Length - MD5Size, stream, md5); //verify hash (should be at stream.length - MD5Size) md5.TransformFinalBlock(Empty.ByteArray, 0, 0); var fileHash = new byte[MD5Size]; stream.Read(fileHash, 0, MD5Size); ValidateHash(md5.Hash, fileHash); } return(midpoints); } } catch (PossibleToHandleOutOfMemoryException) { midpoints?.Dispose(); throw; } catch { midpoints?.Dispose(); Dispose(); throw; } finally { if (Runtime.IsUnixOrMac) { if (workItem != null) { ReturnWorkItem(workItem); } } else { if (stream != null) { stream.Dispose(); } } } }
public void SetUp() { _list = new UnmanagedMemoryAppendOnlyList <int>(_maxCapacity); }
private static void ComputeMidpoints(BufferedStream bs, FileStream fs, byte version, int indexEntrySize, long numIndexEntries, long requiredMidpointCount, UnmanagedMemoryAppendOnlyList <Midpoint> midpoints, CancellationToken ct = default(CancellationToken)) { int indexKeySize; if (version == PTableVersions.IndexV4) { indexKeySize = IndexKeyV4Size; } else { throw new InvalidOperationException("Unknown PTable version: " + version); } midpoints.Clear(); bs.Flush(); byte[] buffer = new byte[indexKeySize]; var previousFileStreamPosition = fs.Position; long previousIndex = -1; IndexEntryKey previousKey = new IndexEntryKey(0, 0); for (int k = 0; k < requiredMidpointCount; k++) { ct.ThrowIfCancellationRequested(); long index = GetMidpointIndex(k, numIndexEntries, requiredMidpointCount); if (index == previousIndex) { midpoints.Add(new Midpoint(previousKey, previousIndex)); } else { fs.Seek(PTableHeader.Size + index * indexEntrySize, SeekOrigin.Begin); fs.Read(buffer, 0, indexKeySize); IndexEntryKey key = new IndexEntryKey(BitConverter.ToUInt64(buffer, 8), BitConverter.ToInt64(buffer, 0)); midpoints.Add(new Midpoint(key, index)); previousIndex = index; previousKey = key; } } fs.Seek(previousFileStreamPosition, SeekOrigin.Begin); }
public static PTable Scavenged(PTable table, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, out long spaceSaved, int initialReaders, int maxReaders, int cacheDepth = 16, bool skipIndexVerify = false, CancellationToken ct = default(CancellationToken)) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = GetIndexEntrySize(version); var numIndexEntries = table.Count; var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); Log.Debug("PTables scavenge started with {numIndexEntries} entries.", numIndexEntries); var watch = Stopwatch.StartNew(); long keptCount = 0L; long droppedCount; try { using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE SCAVENGED INDEX ENTRIES var buffer = new byte[indexEntrySize]; using (var enumerator = new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)) { while (enumerator.MoveNext()) { ct.ThrowIfCancellationRequested(); if (existsAt(enumerator.Current)) { AppendRecordTo(bs, buffer, version, enumerator.Current, indexEntrySize); keptCount++; } } } // We calculate this as the EnumerableTable can silently drop entries too. droppedCount = numIndexEntries - keptCount; var forceKeep = version > table.Version; if (droppedCount == 0 && !forceKeep) { Log.Debug( "PTable scavenge finished in {elapsed}. No entries removed so not keeping scavenged table.", watch.Elapsed); try { bs.Close(); File.Delete(outputFile); } catch (Exception ex) { Log.Error(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile); } spaceSaved = 0; return(null); } if (droppedCount == 0 && forceKeep) { Log.Debug("Keeping scavenged index even though it isn't smaller; version upgraded."); } //CALCULATE AND WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { var requiredMidpointCount = GetRequiredMidpointCount(keptCount, version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>( (int)requiredMidpointCount + MidpointsOverflowSafetyNet); ComputeMidpoints(bs, f, version, indexEntrySize, keptCount, requiredMidpointCount, midpoints, ct); WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, keptCount, keptCount, requiredMidpointCount, midpoints); } bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Debug( "PTable scavenge finished in {elapsed} ({droppedCount} entries removed, {keptCount} remaining).", watch.Elapsed, droppedCount, keptCount); var scavengedTable = new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify); spaceSaved = table._size - scavengedTable._size; return(scavengedTable); } catch (Exception) { try { File.Delete(outputFile); } catch (Exception ex) { Log.Error(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile); } throw; } }