private static PTable MergeTo2 <TStreamId>(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile, Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <TStreamId, bool> > readRecord, byte version, int initialReaders, int maxReaders, int cacheDepth, bool skipIndexVerify, bool useBloomFilter, int lruCacheSize) { Log.Debug("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); var enumerators = tables .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count))) using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; long indexEntry = 0L; var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet); var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; ulong? previousHash = null; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } // WRITE BLOOM FILTER ENTRY if (bloomFilter != null && current.Stream != previousHash) { // upgradeHash has already ensured the hash is in the right format for the target var streamHash = current.Stream; bloomFilter.Add(GetSpan(ref streamHash)); previousHash = current.Stream; } indexEntry++; dumpedEntryCount++; } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bloomFilter?.Flush(); bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Debug( "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize)); } finally { foreach (var enumerator in enumerators) { enumerator.Dispose(); } } }
private static PTable MergeTo2(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth, bool skipIndexVerify) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version); var enumerators = tables.Select(table => new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)).ToList(); try { long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSizeUpToIndexEntries); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; long indexEntry = 0L; List <Midpoint> midpoints = new List <Midpoint>(); var requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth); var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } AppendRecordTo(bs, buffer, version, current, indexEntrySize); if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount)) { midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry)); } indexEntry++; dumpedEntryCount++; } //WRITE MIDPOINTS if (version >= PTableVersions.IndexV4) { if (dumpedEntryCount != numIndexEntries) { //if index entries have been removed, compute the midpoints again numIndexEntries = dumpedEntryCount; requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth); midpoints = ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints); } WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints); } bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify)); } finally { foreach (var enumerator in enumerators) { enumerator.Dispose(); } } }
private static PTable MergeTo2(IList <PTable> tables, long fileSize, int indexEntrySize, string outputFile, Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, Tuple <string, bool> > readRecord, byte version, int cacheDepth) { Log.Trace("PTables merge started (specialized for <= 2 tables)."); var watch = Stopwatch.StartNew(); var enumerators = tables.Select(table => new EnumerablePTable(table, table.IterateAllInOrder().GetEnumerator())).ToList(); long dumpedEntryCount = 0; using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None, DefaultSequentialBufferSize, FileOptions.SequentialScan)) { f.SetLength(fileSize); f.Seek(0, SeekOrigin.Begin); using (var md5 = MD5.Create()) using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write)) using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) { // WRITE HEADER var headerBytes = new PTableHeader(version).AsByteArray(); cs.Write(headerBytes, 0, headerBytes.Length); // WRITE INDEX ENTRIES var buffer = new byte[indexEntrySize]; var enum1 = enumerators[0]; var enum2 = enumerators[1]; bool available1 = enum1.MoveNext(); bool available2 = enum2.MoveNext(); IndexEntry current; bool restart; do { restart = false; while (available1 || available2) { var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position); var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position); if (version == PTableVersions.Index64Bit && enumerators[0].Table.Version == PTableVersions.Index32Bit) { var res = readRecord(entry1); if (!res.Item2) { available1 = enum1.MoveNext(); restart = true; break; } entry1.Stream = upgradeHash(res.Item1, entry1.Stream); } if (version == PTableVersions.Index64Bit && enumerators[1].Table.Version == PTableVersions.Index32Bit) { var res = readRecord(entry2); if (!res.Item2) { available2 = enum2.MoveNext(); restart = true; break; } entry2.Stream = upgradeHash(res.Item1, entry2.Stream); } if (available1 && (!available2 || entry1.CompareTo(entry2) > 0)) { current = entry1; available1 = enum1.MoveNext(); } else { current = entry2; available2 = enum2.MoveNext(); } //Possibly doing another read if the record was read during the upgrade process var item = readRecord(current); if (item.Item2) { AppendRecordTo(bs, buffer, version, current, indexEntrySize); dumpedEntryCount += 1; } } } while (restart); bs.Flush(); cs.FlushFinalBlock(); f.SetLength(f.Position + MD5Size); // WRITE MD5 var hash = md5.Hash; f.Write(hash, 0, hash.Length); f.FlushToDisk(); } } Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).", watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount); return(new PTable(outputFile, Guid.NewGuid(), version, depth: cacheDepth)); }