private PTable(string filename, Guid id, int bufferSize = 8096, int maxReadingThreads = TFConsts.ReadIndexReaderCount, int depth = 16) { if (!File.Exists(filename)) { throw new CorruptIndexException(new PTableNotFoundException(filename)); } _id = id; _size = new FileInfo(filename).Length - PTableHeader.Size - MD5Size; _filename = filename; File.SetAttributes(_filename, FileAttributes.ReadOnly); File.SetAttributes(_filename, FileAttributes.Temporary); File.SetAttributes(_filename, FileAttributes.NotContentIndexed); _bufferSize = bufferSize; _maxReadingThreads = maxReadingThreads; _buffer = new byte[16]; _bufferPtr = GCHandle.Alloc(_buffer, GCHandleType.Pinned); using (var stream = File.OpenRead(filename)) using (var reader = new BinaryReader(stream)) { PTableHeader.FromStream(reader); } for (int i = 0; i < _maxReadingThreads; i++) { var s = new FileStream(_filename, FileMode.Open, FileAccess.Read, FileShare.Read, 16, FileOptions.RandomAccess); _streams.Enqueue(s); } try { _midpoints = PopulateCache(depth); } catch (PossibleToHandleOutOfMemoryException) { Log.Info("Was unable to create midpoints for ptable. Performance hit possible OOM Exception."); } }
public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16) { if (table == null) { throw new ArgumentNullException("table"); } if (filename == null) { throw new ArgumentNullException("filename"); } Log.Trace("Started dumping MemTable [{0}] into PTable...", table.Id); using (var f = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, 8096, FileOptions.SequentialScan)) { f.SetLength(PTableHeader.Size + (table.Count << 4) + MD5Size); // EXACT SIZE f.Seek(0, SeekOrigin.Begin); var md5 = MD5.Create(); var buffer = new byte[16]; using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var b = new BufferedStream(cs, 65536)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES foreach (var record in table.IterateAllInOrder()) { var x = record; AppendRecordTo(b, x.Bytes, buffer); } b.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); } f.Close(); Log.Trace("Done dumping MemTable [{0}].", table.Id); } return(new PTable(filename, table.Id, depth: cacheDepth)); }
public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(filename, "filename"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = table.Version == PTableVersions.Index32Bit ? PTable.IndexEntry32Size : PTable.IndexEntry64Size; var sw = Stopwatch.StartNew(); using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { fs.SetLength(PTableHeader.Size + indexEntrySize * (long)table.Count + MD5Size); // EXACT SIZE fs.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(table.Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; foreach (var record in table.IterateAllInOrder()) { var rec = record; AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize); } bs.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; fs.Write(hash, 0, hash.Length); } } Log.Trace("Dumped MemTable [{0}, {1} entries] in {2}.", table.Id, table.Count, sw.Elapsed); return(new PTable(filename, table.Id, depth: cacheDepth)); }
public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(filename, "filename"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); //Log.Trace("Started dumping MemTable [{0}] into PTable...", table.Id); var sw = Stopwatch.StartNew(); using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { fs.SetLength(PTableHeader.Size + IndexEntrySize * (long)table.Count + MD5Size); // EXACT SIZE fs.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[IndexEntrySize]; foreach (var record in table.IterateAllInOrder()) { var rec = record; AppendRecordTo(bs, rec.Bytes, buffer); } bs.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; fs.Write(hash, 0, hash.Length); } } Log.Trace("Dumped MemTable [{0}, {1} entries] in {2}.", table.Id, table.Count, sw.Elapsed); return new PTable(filename, table.Id, depth: cacheDepth); }
public static void CreatePTableFile(string filename, long ptableSize, int indexEntrySize, int cacheDepth = 16) { Ensure.NotNullOrEmpty(filename, "filename"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var sw = Stopwatch.StartNew(); var tableId = Guid.NewGuid(); using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { fs.SetLength((long)ptableSize); fs.Seek(0, SeekOrigin.Begin); var recordCount = (long)((ptableSize - PTableHeader.Size - PTable.MD5Size) / (long)indexEntrySize); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; for (long i = 0; i < recordCount; i++) { bs.Write(buffer, 0, indexEntrySize); } bs.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; fs.Write(hash, 0, hash.Length); } } Console.WriteLine("Created PTable File[{0}, size of {1}] in {2}.", tableId, ptableSize, sw.Elapsed); }
private PTable(string filename, Guid id, int initialReaders = ESConsts.PTableInitialReaderCount, int maxReaders = ESConsts.PTableMaxReaderCount, int depth = 16) { Ensure.NotNullOrEmpty(filename, "filename"); Ensure.NotEmptyGuid(id, "id"); Ensure.Positive(maxReaders, "maxReaders"); Ensure.Nonnegative(depth, "depth"); if (!File.Exists(filename)) { throw new CorruptIndexException(new PTableNotFoundException(filename)); } _id = id; _filename = filename; var sw = Stopwatch.StartNew(); Log.Trace("Loading PTable '{0}' started...", Filename); _size = new FileInfo(_filename).Length - PTableHeader.Size - MD5Size; File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed); _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id), initialReaders, maxReaders, () => new WorkItem(filename, DefaultBufferSize), workItem => workItem.Dispose(), pool => OnAllWorkItemsDisposed()); var readerWorkItem = GetWorkItem(); try { readerWorkItem.Stream.Seek(0, SeekOrigin.Begin); var header = PTableHeader.FromStream(readerWorkItem.Stream); if (header.Version != Version) { throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version)); } } catch (Exception) { Dispose(); throw; } finally { ReturnWorkItem(readerWorkItem); } try { _midpoints = CacheMidpoints(depth); } catch (PossibleToHandleOutOfMemoryException) { Log.Error("Was unable to create midpoints for PTable '{0}' ({1} entries, depth {2} requested). " + "Performance hit possible. OOM Exception.", Filename, Count, depth); } Log.Trace("Loading PTable '{0}' ({1} entries, cache depth {2}) done in {3}.", Filename, Count, depth, sw.Elapsed); }
private static PTable MergeTo2 <TStreamId>(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version, int initialReaders, int maxReaders, int cacheDepth, bool skipIndexVerify, bool useBloomFilter, int lruCacheSize) { Log.Debug("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); var enumerators = tables .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count))) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; long indexEntry = 0L; var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; ulong? previousHash = null; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && current.Stream != previousHash) { // upgradeHash has already ensured the hash is in the right format for the target var streamHash = current.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = current.Stream; } indexEntry++; dumpedEntryCount++; } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Debug( "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); } finally { foreach (var enumerator in enumerators) { enumerator.Dispose(); } } }
public static PTable FromMemtable(IMemTable table, string filename, int initialReaders, int maxReaders, int cacheDepth = 16, bool skipIndexVerify = false, bool useBloomFilter = true, int lruCacheSize = 1_000_000) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(filename, "filename"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); int indexEntrySize = GetIndexEntrySize(table.Version); long dumpedEntryCount = 0; var sw = Stopwatch.StartNew(); using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { var fileSize = GetFileSizeUpToIndexEntries(table.Count, table.Version); fs.SetLength(fileSize); fs.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, filename, table.Count)) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(table.Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; var records = table.IterateAllInOrder(); var requiredMidpointCount = GetRequiredMidpointCountCached(table.Count, table.Version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); long indexEntry = 0L; ulong?previousHash = null; foreach (var rec in records) { AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize); dumpedEntryCount += 1; if (table.Version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, table.Count, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(rec.Stream, rec.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && rec.Stream != previousHash) { // we are creating a PTable of the same version as the Memtable. therefore the hash is the right format var streamHash = rec.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = rec.Stream; } indexEntry++; } //WRITE MIDPOINTS if (table.Version >= PTableVersions.IndexV4) { var numIndexEntries = table.Count; if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, table.Version, cacheDepth); ComputeMidpoints(bs, fs, table.Version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, fs, table.Version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; fs.SetLength(fs.Position + MD5Size); fs.Write(hash, 0, hash.Length); fs.FlushToDisk(); } } Log.Debug("Dumped MemTable [{id}, {table} entries] in {elapsed}.", table.Id, table.Count, sw.Elapsed); return(new PTable(filename, table.Id, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); }
private PTable(string filename, Guid id, int initialReaders, int maxReaders, int depth = 16, bool skipIndexVerify = false) { Ensure.NotNullOrEmpty(filename, "filename"); Ensure.NotEmptyGuid(id, "id"); Ensure.Positive(maxReaders, "maxReaders"); Ensure.Nonnegative(depth, "depth"); if (!File.Exists(filename)) { throw new CorruptIndexException(new PTableNotFoundException(filename)); } _id = id; _filename = filename; Log.Verbose("Loading " + (skipIndexVerify ? "" : "and Verification ") + "of PTable '{pTable}' started...", Path.GetFileName(Filename)); var sw = Stopwatch.StartNew(); _size = new FileInfo(_filename).Length; File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed); _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id), initialReaders, maxReaders, () => new WorkItem(filename, DefaultBufferSize), workItem => workItem.Dispose(), pool => OnAllWorkItemsDisposed()); var readerWorkItem = GetWorkItem(); try { readerWorkItem.Stream.Seek(0, SeekOrigin.Begin); var header = PTableHeader.FromStream(readerWorkItem.Stream); if ((header.Version != PTableVersions.IndexV1) && (header.Version != PTableVersions.IndexV2) && (header.Version != PTableVersions.IndexV3) && (header.Version != PTableVersions.IndexV4)) { throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version)); } _version = header.Version; if (_version == PTableVersions.IndexV1) { _indexEntrySize = IndexEntryV1Size; _indexKeySize = IndexKeyV1Size; } if (_version == PTableVersions.IndexV2) { _indexEntrySize = IndexEntryV2Size; _indexKeySize = IndexKeyV2Size; } if (_version == PTableVersions.IndexV3) { _indexEntrySize = IndexEntryV3Size; _indexKeySize = IndexKeyV3Size; } if (_version >= PTableVersions.IndexV4) { //read the PTable footer var previousPosition = readerWorkItem.Stream.Position; readerWorkItem.Stream.Seek(readerWorkItem.Stream.Length - MD5Size - PTableFooter.GetSize(_version), SeekOrigin.Begin); var footer = PTableFooter.FromStream(readerWorkItem.Stream); if (footer.Version != header.Version) { throw new CorruptIndexException( String.Format("PTable header/footer version mismatch: {0}/{1}", header.Version, footer.Version), new InvalidFileException("Invalid PTable file.")); } if (_version == PTableVersions.IndexV4) { _indexEntrySize = IndexEntryV4Size; _indexKeySize = IndexKeyV4Size; } else { throw new InvalidOperationException("Unknown PTable version: " + _version); } _midpointsCached = footer.NumMidpointsCached; _midpointsCacheSize = _midpointsCached * _indexEntrySize; readerWorkItem.Stream.Seek(previousPosition, SeekOrigin.Begin); } long indexEntriesTotalSize = (_size - PTableHeader.Size - _midpointsCacheSize - PTableFooter.GetSize(_version) - MD5Size); if (indexEntriesTotalSize < 0) { throw new CorruptIndexException(String.Format( "Total size of index entries < 0: {0}. _size: {1}, header size: {2}, _midpointsCacheSize: {3}, footer size: {4}, md5 size: {5}", indexEntriesTotalSize, _size, PTableHeader.Size, _midpointsCacheSize, PTableFooter.GetSize(_version), MD5Size)); } else if (indexEntriesTotalSize % _indexEntrySize != 0) { throw new CorruptIndexException(String.Format( "Total size of index entries: {0} is not divisible by index entry size: {1}", indexEntriesTotalSize, _indexEntrySize)); } _count = indexEntriesTotalSize / _indexEntrySize; if (_version >= PTableVersions.IndexV4 && _count > 0 && _midpointsCached > 0 && _midpointsCached < 2) { //if there is at least 1 index entry with version>=4 and there are cached midpoints, there should always be at least 2 midpoints cached throw new CorruptIndexException(String.Format( "Less than 2 midpoints cached in PTable. Index entries: {0}, Midpoints cached: {1}", _count, _midpointsCached)); } else if (_count >= 2 && _midpointsCached > _count) { //if there are at least 2 index entries, midpoints count should be at most the number of index entries throw new CorruptIndexException(String.Format( "More midpoints cached in PTable than index entries. Midpoints: {0} , Index entries: {1}", _midpointsCached, _count)); } if (Count == 0) { _minEntry = new IndexEntryKey(ulong.MaxValue, long.MaxValue); _maxEntry = new IndexEntryKey(ulong.MinValue, long.MinValue); } else { var minEntry = ReadEntry(_indexEntrySize, Count - 1, readerWorkItem, _version); _minEntry = new IndexEntryKey(minEntry.Stream, minEntry.Version); var maxEntry = ReadEntry(_indexEntrySize, 0, readerWorkItem, _version); _maxEntry = new IndexEntryKey(maxEntry.Stream, maxEntry.Version); } } catch (Exception) { Dispose(); throw; } finally { ReturnWorkItem(readerWorkItem); } int calcdepth = 0; try { calcdepth = GetDepth(_count * _indexEntrySize, depth); _midpoints = CacheMidpointsAndVerifyHash(calcdepth, skipIndexVerify); } catch (PossibleToHandleOutOfMemoryException) { Log.Error( "Unable to create midpoints for PTable '{pTable}' ({count} entries, depth {depth} requested). " + "Performance hit will occur. OOM Exception.", Path.GetFileName(Filename), Count, depth); } Log.Verbose( "Loading PTable (Version: {version}) '{pTable}' ({count} entries, cache depth {depth}) done in {elapsed}.", _version, Path.GetFileName(Filename), Count, calcdepth, sw.Elapsed); }
public static PTable MergeTo <TStreamId>(IList <PTable> tables, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version, int initialReaders, int maxReaders, int cacheDepth = 16, bool skipIndexVerify = false, bool useBloomFilter = true, int lruCacheSize = 1_000_000) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = GetIndexEntrySize(version); long numIndexEntries = 0; for (var i = 0; i < tables.Count; i++) { numIndexEntries += tables[i].Count; } var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); if (tables.Count == 2) { return(MergeTo2(tables, numIndexEntries, indexEntrySize, outputFile, upgradeHash, existsAt, readRecord, version, initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); // special case } Log.Debug("PTables merge started."); var watch = Stopwatch.StartNew(); var enumerators = tables .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count))) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); var buffer = new byte[indexEntrySize]; long indexEntry = 0L; var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); // WRITE INDEX ENTRIES ulong?previousHash = null; while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && current.Stream != previousHash) { // upgradeHash has already ensured the hash is in the right format for the target var streamHash = current.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = current.Stream; } indexEntry++; dumpedEntryCount++; if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Debug( "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); } finally { foreach (var enumerableTable in enumerators) { enumerableTable.Dispose(); } } }
public static PTable MergeTo(IList<PTable> tables, string outputFile, Func<IndexEntry, bool> isHashCollision, int cacheDepth = 16) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.NotNull(isHashCollision, "isHashCollision"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList(); var fileSize = GetFileSize(tables); // approximate file size if (enumerators.Count == 2) return MergeTo2(enumerators, fileSize, outputFile, isHashCollision, cacheDepth); // special case Log.Trace("PTables merge started."); var watch = Stopwatch.StartNew(); for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1024 * 1024, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, 65536)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); uint lastDeleted = uint.MaxValue; var buffer = new byte[IndexEntrySize]; // WRITE INDEX ENTRIES while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; if (current.Version == EventNumber.DeletedStream && !isHashCollision(current)) { lastDeleted = current.Stream; AppendRecordTo(bs, current.Bytes, buffer); } else { if (lastDeleted != current.Stream || current.Version == 0) // we keep 0th event for hash collision detection AppendRecordTo(bs, current.Bytes, buffer); } if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); } } Log.Trace("PTables merge finished in " + watch.Elapsed); return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth); }
public static PTable MergeTo(IList <PTable> tables, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth = 16) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = version == PTableVersions.Index32Bit ? PTable.IndexEntry32Size : IndexEntry64Size; var fileSize = GetFileSize(tables, indexEntrySize); // approximate file size if (tables.Count == 2) { return(MergeTo2(tables, fileSize, indexEntrySize, outputFile, upgradeHash, readRecord, version, cacheDepth)); // special case } Log.Trace("PTables merge started."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => new EnumerablePTable(table, table.IterateAllInOrder().GetEnumerator())).ToList(); for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); var buffer = new byte[indexEntrySize]; // WRITE INDEX ENTRIES while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators, version, upgradeHash, readRecord); var current = enumerators[idx].Current; var item = readRecord(current); //Possibly doing another read if the entry was read in GetMaxOf if (item.Item2) { if (version == PTableVersions.Index64Bit && enumerators[idx].Table.Version == PTableVersions.Index32Bit) { current.Stream = upgradeHash(item.Item1, current.Stream); } AppendRecordTo(bs, buffer, version, current, indexEntrySize); dumpedEntryCount += 1; } if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth)); }
private static PTable MergeTo2(List<IEnumerator<IndexEntry>> enumerators, long fileSize, string outputFile, Func<IndexEntry, bool> isHashCollision, int cacheDepth) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1024 * 1024, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, 65536)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES uint lastDeleted = uint.MaxValue; var buffer = new byte[IndexEntrySize]; var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; while (available1 || available2) { if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0)) { current = enum1.Current; available1 = enum1.MoveNext(); } else { current = enum2.Current; available2 = enum2.MoveNext(); } if (current.Version == EventNumber.DeletedStream && !isHashCollision(current)) { lastDeleted = current.Stream; AppendRecordTo(bs, current.Bytes, buffer); } else { if (lastDeleted != current.Stream || current.Version == 0) // we keep 0th event for hash collision detection AppendRecordTo(bs, current.Bytes, buffer); } } bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); } } Log.Trace("PTables merge finished in {0}.", watch.Elapsed); return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth); }
private static PTable MergeTo2(IList <PTable> tables, long fileSize, int indexEntrySize, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => new EnumerablePTable(table, table.IterateAllInOrder().GetEnumerator())).ToList(); long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; bool restart; do { restart = false; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (version == PTableVersions.Index64Bit && enumerators[0].Table.Version == PTableVersions.Index32Bit) { var res = readRecord(entry1); if (!res.Item2) { available1 = enum1.MoveNext(); restart = true; break; } entry1.Stream = upgradeHash(res.Item1, entry1.Stream); } if (version == PTableVersions.Index64Bit && enumerators[1].Table.Version == PTableVersions.Index32Bit) { var res = readRecord(entry2); if (!res.Item2) { available2 = enum2.MoveNext(); restart = true; break; } entry2.Stream = upgradeHash(res.Item1, entry2.Stream); } if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } //Possibly doing another read if the record was read during the upgrade process var item = readRecord(current); if (item.Item2) { AppendRecordTo(bs, buffer, version, current, indexEntrySize); dumpedEntryCount += 1; } } } while (restart); bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), version, depth: cacheDepth)); }
public static PTable FromMemtable(IMemTable table, string filename, int cacheDepth = 16, bool skipIndexVerify = false) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(filename, "filename"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); int indexEntrySize = GetIndexEntrySize(table.Version); long dumpedEntryCount = 0; var sw = Stopwatch.StartNew(); using (var fs = new FileStream(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { var fileSize = GetFileSizeUpToIndexEntries(table.Count, table.Version); fs.SetLength(fileSize); fs.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(fs, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(table.Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; var records = table.IterateAllInOrder(); List <Midpoint> midpoints = new List <Midpoint>(); var requiredMidpointCount = GetRequiredMidpointCountCached(table.Count, table.Version, cacheDepth); long indexEntry = 0L; foreach (var rec in records) { AppendRecordTo(bs, buffer, table.Version, rec, indexEntrySize); dumpedEntryCount += 1; if (table.Version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, table.Count, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(rec.Stream, rec.Version), indexEntry)); } indexEntry++; } //WRITE MIDPOINTS if (table.Version >= PTableVersions.IndexV4) { var numIndexEntries = table.Count; if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, table.Version, cacheDepth); midpoints = ComputeMidpoints(bs, fs, table.Version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, fs, table.Version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bs.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; fs.SetLength(fs.Position + MD5Size); fs.Write(hash, 0, hash.Length); fs.FlushToDisk(); } } Log.Trace("Dumped MemTable [{id}, {table} entries] in {elapsed}.", table.Id, table.Count, sw.Elapsed); return(new PTable(filename, table.Id, depth: cacheDepth, skipIndexVerify: skipIndexVerify)); }
public static PTable Scavenged(PTable table, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, out long spaceSaved, int cacheDepth = 16, bool skipIndexVerify = false, CancellationToken ct = default(CancellationToken)) { Ensure.NotNull(table, "table"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = GetIndexEntrySize(version); var numIndexEntries = table.Count; var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); Log.Trace("PTables scavenge started with {numIndexEntries} entries.", numIndexEntries); var watch = Stopwatch.StartNew(); long keptCount = 0L; long droppedCount; try { using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE SCAVENGED INDEX ENTRIES var buffer = new byte[indexEntrySize]; using (var enumerator = new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)) { while (enumerator.MoveNext()) { ct.ThrowIfCancellationRequested(); if (existsAt(enumerator.Current)) { AppendRecordTo(bs, buffer, version, enumerator.Current, indexEntrySize); keptCount++; } } } // We calculate this as the EnumerableTable can silently drop entries too. droppedCount = numIndexEntries - keptCount; var forceKeep = version > table.Version; if (droppedCount == 0 && !forceKeep) { Log.Trace( "PTable scavenge finished in {elapsed}. No entries removed so not keeping scavenged table.", watch.Elapsed); try { bs.Close(); File.Delete(outputFile); } catch (Exception ex) { Log.ErrorException(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile); } spaceSaved = 0; return(null); } if (droppedCount == 0 && forceKeep) { Log.Trace("Keeping scavenged index even though it isn't smaller; version upgraded."); } //CALCULATE AND WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { var requiredMidpointCount = GetRequiredMidpointCount(keptCount, version, cacheDepth); var midpoints = ComputeMidpoints(bs, f, version, indexEntrySize, keptCount, requiredMidpointCount, new List <Midpoint>(), ct); WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, keptCount, keptCount, requiredMidpointCount, midpoints); } bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTable scavenge finished in {elapsed} ({droppedCount} entries removed, {keptCount} remaining).", watch.Elapsed, droppedCount, keptCount); var scavengedTable = new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify); spaceSaved = table._size - scavengedTable._size; return(scavengedTable); } catch (Exception) { try { File.Delete(outputFile); } catch (Exception ex) { Log.ErrorException(ex, "Unable to delete unwanted scavenged PTable: {outputFile}", outputFile); } throw; } }
private static PTable MergeTo2(IList<PTable> tables, long fileSize, string outputFile, Func<IndexEntry, bool> recordExistsAt, int cacheDepth) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList(); long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[IndexEntrySize]; var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; while (available1 || available2) { if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0)) { current = enum1.Current; available1 = enum1.MoveNext(); } else { current = enum2.Current; available2 = enum2.MoveNext(); } if (recordExistsAt(current)) { AppendRecordTo(bs, current.Bytes, buffer); dumpedEntryCount += 1; } } bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth); }
public static PTable MergeTo(IList <PTable> tables, string outputFile, Func <IndexEntry, bool> isHashCollision, int cacheDepth = 16) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.NotNull(isHashCollision, "isHashCollision"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var fileSize = GetFileSize(tables); // approximate file size if (tables.Count == 2) { return(MergeTo2(tables, fileSize, outputFile, isHashCollision, cacheDepth)); // special case } Log.Trace("PTables merge started."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList(); for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); uint lastDeleted = uint.MaxValue; var buffer = new byte[IndexEntrySize]; // WRITE INDEX ENTRIES while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; if (current.Version == EventNumber.DeletedStream && !isHashCollision(current)) { lastDeleted = current.Stream; AppendRecordTo(bs, current.Bytes, buffer); dumpedEntryCount += 1; } else { if (lastDeleted != current.Stream || current.Version == 0) // we keep 0th event for hash collision detection { AppendRecordTo(bs, current.Bytes, buffer); dumpedEntryCount += 1; } } if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth)); }
private PTable(string filename, Guid id, int initialReaders = ESConsts.PTableInitialReaderCount, int maxReaders = ESConsts.PTableMaxReaderCount, int depth = 16) { Ensure.NotNullOrEmpty(filename, "filename"); Ensure.NotEmptyGuid(id, "id"); Ensure.Positive(maxReaders, "maxReaders"); Ensure.Nonnegative(depth, "depth"); if (!File.Exists(filename)) { throw new CorruptIndexException(new PTableNotFoundException(filename)); } _id = id; _filename = filename; Log.Trace("Loading and Verification of PTable '{0}' started...", Path.GetFileName(Filename)); var sw = Stopwatch.StartNew(); _size = new FileInfo(_filename).Length; File.SetAttributes(_filename, FileAttributes.ReadOnly | FileAttributes.NotContentIndexed); _workItems = new ObjectPool <WorkItem>(string.Format("PTable {0} work items", _id), initialReaders, maxReaders, () => new WorkItem(filename, DefaultBufferSize), workItem => workItem.Dispose(), pool => OnAllWorkItemsDisposed()); var readerWorkItem = GetWorkItem(); try { readerWorkItem.Stream.Seek(0, SeekOrigin.Begin); var header = PTableHeader.FromStream(readerWorkItem.Stream); if ((header.Version != PTableVersions.Index32Bit) && (header.Version != PTableVersions.Index64Bit)) { throw new CorruptIndexException(new WrongFileVersionException(_filename, header.Version, Version)); } _version = header.Version; if (_version == PTableVersions.Index32Bit) { _indexEntrySize = IndexEntry32Size; _indexKeySize = IndexKey32Size; } if (_version == PTableVersions.Index64Bit) { _indexEntrySize = IndexEntry64Size; _indexKeySize = IndexKey64Size; } _count = ((_size - PTableHeader.Size - MD5Size) / _indexEntrySize); if (Count == 0) { _minEntry = new IndexEntryKey(ulong.MaxValue, int.MaxValue); _maxEntry = new IndexEntryKey(ulong.MinValue, int.MinValue); } else { var minEntry = ReadEntry(_indexEntrySize, Count - 1, readerWorkItem, _version); _minEntry = new IndexEntryKey(minEntry.Stream, minEntry.Version); var maxEntry = ReadEntry(_indexEntrySize, 0, readerWorkItem, _version); _maxEntry = new IndexEntryKey(maxEntry.Stream, maxEntry.Version); } } catch (Exception) { Dispose(); throw; } finally { ReturnWorkItem(readerWorkItem); } int calcdepth = 0; try { calcdepth = GetDepth(_size, depth); _midpoints = CacheMidpointsAndVerifyHash(calcdepth); } catch (PossibleToHandleOutOfMemoryException) { Log.Error("Unable to create midpoints for PTable '{0}' ({1} entries, depth {2} requested). " + "Performance hit will occur. OOM Exception.", Path.GetFileName(Filename), Count, depth); } Log.Trace("Loading PTable (Version: {0}) '{1}' ({2} entries, cache depth {3}) done in {4}.", _version, Path.GetFileName(Filename), Count, calcdepth, sw.Elapsed); }
private static PTable MergeTo2(List <IEnumerator <IndexEntry> > enumerators, long fileSize, string outputFile, Func <IndexEntry, bool> isHashCollision, int cacheDepth) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var bytes = new byte[16]; uint lastdeleted = uint.MaxValue; var md5 = MD5.Create(); using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1000000, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var b = new BufferedStream(cs, 65536)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; while (available1 || available2) { if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0)) { current = enum1.Current; available1 = enum1.MoveNext(); } else { current = enum2.Current; available2 = enum2.MoveNext(); } if (current.Version == int.MaxValue && !isHashCollision(current)) { lastdeleted = current.Stream; AppendRecordTo(b, current.Bytes, bytes); } else { if (lastdeleted != current.Stream) { AppendRecordTo(b, current.Bytes, bytes); } } } f.SetLength(f.Position + MD5Size); b.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); } } Log.Trace("PTables merge finished in " + watch.Elapsed); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth)); }
public static PTable MergeTo(ICollection <PTable> tables, string outputFile, Func <IndexEntry, bool> isHashCollision, int cacheDepth = 16) { var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList(); var fileSize = GetFileSize(tables); // approximate file size if (enumerators.Count == 2) { return(MergeTo2(enumerators, fileSize, outputFile, isHashCollision, cacheDepth)); } Log.Trace("PTables merge started."); var watch = new Stopwatch(); watch.Start(); for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } var bytes = new byte[16]; uint lastdeleted = uint.MaxValue; var md5 = MD5.Create(); using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, 1024 * 1024, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var b = new BufferedStream(cs, 65536)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; if (current.Version == int.MaxValue && !isHashCollision(current)) { lastdeleted = current.Stream; AppendRecordTo(b, current.Bytes, bytes); } else { if (lastdeleted != current.Stream) { AppendRecordTo(b, current.Bytes, bytes); } } if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } f.SetLength(f.Position + MD5Size); b.Flush(); cs.FlushFinalBlock(); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); } } Log.Trace("PTables merge finished in " + watch.Elapsed); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth)); }
private static PTable MergeTo2(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth, bool skipIndexVerify) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); var enumerators = tables.Select(table => new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; long indexEntry = 0L; List <Midpoint> midpoints = new List <Midpoint>(); var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } indexEntry++; dumpedEntryCount++; } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); midpoints = ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify)); } finally { foreach (var enumerator in enumerators) { enumerator.Dispose(); } } }
public static PTable MergeTo(IList<PTable> tables, string outputFile, Func<IndexEntry, bool> recordExistsAt, int cacheDepth = 16) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var fileSize = GetFileSize(tables); // approximate file size if (tables.Count == 2) return MergeTo2(tables, fileSize, outputFile, recordExistsAt, cacheDepth); // special case Log.Trace("PTables merge started."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList(); for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); var buffer = new byte[IndexEntrySize]; // WRITE INDEX ENTRIES while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; if (recordExistsAt(current)) { AppendRecordTo(bs, current.Bytes, buffer); dumpedEntryCount += 1; } if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth); }
public static PTable MergeTo(IList <PTable> tables, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth = 16, bool skipIndexVerify = false) { Ensure.NotNull(tables, "tables"); Ensure.NotNullOrEmpty(outputFile, "outputFile"); Ensure.Nonnegative(cacheDepth, "cacheDepth"); var indexEntrySize = GetIndexEntrySize(version); long numIndexEntries = 0; for (var i = 0; i < tables.Count; i++) { numIndexEntries += tables[i].Count; } var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); if (tables.Count == 2) { return(MergeTo2(tables, numIndexEntries, indexEntrySize, outputFile, upgradeHash, existsAt, readRecord, version, cacheDepth, skipIndexVerify)); // special case } Log.Trace("PTables merge started."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { for (int i = 0; i < enumerators.Count; i++) { if (!enumerators[i].MoveNext()) { enumerators[i].Dispose(); enumerators.RemoveAt(i); i--; } } long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); var buffer = new byte[indexEntrySize]; long indexEntry = 0L; List <Midpoint> midpoints = new List <Midpoint>(); var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); // WRITE INDEX ENTRIES while (enumerators.Count > 0) { var idx = GetMaxOf(enumerators); var current = enumerators[idx].Current; AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } indexEntry++; dumpedEntryCount++; if (!enumerators[idx].MoveNext()) { enumerators[idx].Dispose(); enumerators.RemoveAt(idx); } } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); midpoints = ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bs.Flush(); cs.FlushFinalBlock(); f.FlushToDisk(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify)); } finally { foreach (var enumerableTable in enumerators) { enumerableTable.Dispose(); } } }
private static PTable MergeTo2(IList <PTable> tables, long fileSize, string outputFile, Func <IndexEntry, bool> recordExistsAt, int cacheDepth) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => table.IterateAllInOrder().GetEnumerator()).ToList(); long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(Version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[IndexEntrySize]; var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; while (available1 || available2) { if (available1 && (!available2 || enum1.Current.CompareTo(enum2.Current) > 0)) { current = enum1.Current; available1 = enum1.MoveNext(); } else { current = enum2.Current; available2 = enum2.MoveNext(); } if (recordExistsAt(current)) { AppendRecordTo(bs, current.Bytes, buffer); dumpedEntryCount += 1; } } bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth)); }