Esempio n. 1
0
        private static PTable MergeTo2 <TStreamId>(IList <PTable> tables, long numIndexEntries, int indexEntrySize,
                                                   string outputFile,
                                                   Func <TStreamId, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt,
                                                   Func <IndexEntry, Tuple <TStreamId, bool> > readRecord,
                                                   byte version, int initialReaders, int maxReaders,
                                                   int cacheDepth, bool skipIndexVerify,
                                                   bool useBloomFilter, int lruCacheSize)
        {
            Log.Debug("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);
            var enumerators = tables
                              .Select(table => new EnumerableTable <TStreamId>(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try {
                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan)) {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var bloomFilter = ConstructBloomFilter(useBloomFilter, outputFile, tables.Sum(table => table.Count)))
                        using (var md5 = MD5.Create())
                            using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                                using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize)) {
                                    // WRITE HEADER
                                    var headerBytes = new PTableHeader(version).AsByteArray();
                                    cs.Write(headerBytes, 0, headerBytes.Length);

                                    // WRITE INDEX ENTRIES
                                    var  buffer                = new byte[indexEntrySize];
                                    long indexEntry            = 0L;
                                    var  requiredMidpointCount =
                                        GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                    using var midpoints = new UnmanagedMemoryAppendOnlyList <Midpoint>((int)requiredMidpointCount + MidpointsOverflowSafetyNet);

                                    var        enum1      = enumerators[0];
                                    var        enum2      = enumerators[1];
                                    bool       available1 = enum1.MoveNext();
                                    bool       available2 = enum2.MoveNext();
                                    IndexEntry current;
                                    ulong?     previousHash = null;
                                    while (available1 || available2)
                                    {
                                        var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version,
                                                                    enum1.Current.Position);
                                        var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version,
                                                                    enum2.Current.Position);

                                        if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                        {
                                            current    = entry1;
                                            available1 = enum1.MoveNext();
                                        }
                                        else
                                        {
                                            current    = entry2;
                                            available2 = enum2.MoveNext();
                                        }

                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        if (version >= PTableVersions.IndexV4 &&
                                            IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                        {
                                            midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version),
                                                                       indexEntry));
                                        }

                                        // WRITE BLOOM FILTER ENTRY
                                        if (bloomFilter != null && current.Stream != previousHash)
                                        {
                                            // upgradeHash has already ensured the hash is in the right format for the target
                                            var streamHash = current.Stream;
                                            bloomFilter.Add(GetSpan(ref streamHash));
                                            previousHash = current.Stream;
                                        }

                                        indexEntry++;
                                        dumpedEntryCount++;
                                    }

                                    //WRITE MIDPOINTS
                                    if (version >= PTableVersions.IndexV4)
                                    {
                                        if (dumpedEntryCount != numIndexEntries)
                                        {
                                            //if index entries have been removed, compute the midpoints again
                                            numIndexEntries       = dumpedEntryCount;
                                            requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                            ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries,
                                                             requiredMidpointCount, midpoints);
                                        }

                                        WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries,
                                                         requiredMidpointCount, midpoints);
                                    }

                                    bloomFilter?.Flush();
                                    bs.Flush();
                                    cs.FlushFinalBlock();

                                    f.SetLength(f.Position + MD5Size);

                                    // WRITE MD5
                                    var hash = md5.Hash;
                                    f.Write(hash, 0, hash.Length);
                                    f.FlushToDisk();
                                }
                }

                Log.Debug(
                    "PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                    watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), initialReaders, maxReaders, cacheDepth, skipIndexVerify, useBloomFilter, lruCacheSize));
            } finally {
                foreach (var enumerator in enumerators)
                {
                    enumerator.Dispose();
                }
            }
        }
        private static PTable MergeTo2(IList <PTable> tables, long numIndexEntries, int indexEntrySize, string outputFile,
                                       Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, bool> existsAt, Func <IndexEntry, Tuple <string, bool> > readRecord,
                                       byte version, int cacheDepth, bool skipIndexVerify)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var fileSizeUpToIndexEntries = GetFileSizeUpToIndexEntries(numIndexEntries, version);
            var enumerators = tables.Select(table => new EnumerableTable(version, table, upgradeHash, existsAt, readRecord)).ToList();

            try
            {
                long dumpedEntryCount = 0;
                using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                              DefaultSequentialBufferSize, FileOptions.SequentialScan))
                {
                    f.SetLength(fileSizeUpToIndexEntries);
                    f.Seek(0, SeekOrigin.Begin);

                    using (var md5 = MD5.Create())
                        using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                            using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                            {
                                // WRITE HEADER
                                var headerBytes = new PTableHeader(version).AsByteArray();
                                cs.Write(headerBytes, 0, headerBytes.Length);

                                // WRITE INDEX ENTRIES
                                var             buffer                = new byte[indexEntrySize];
                                long            indexEntry            = 0L;
                                List <Midpoint> midpoints             = new List <Midpoint>();
                                var             requiredMidpointCount = GetRequiredMidpointCountCached(numIndexEntries, version, cacheDepth);
                                var             enum1      = enumerators[0];
                                var             enum2      = enumerators[1];
                                bool            available1 = enum1.MoveNext();
                                bool            available2 = enum2.MoveNext();
                                IndexEntry      current;
                                while (available1 || available2)
                                {
                                    var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position);
                                    var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position);

                                    if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                    {
                                        current    = entry1;
                                        available1 = enum1.MoveNext();
                                    }
                                    else
                                    {
                                        current    = entry2;
                                        available2 = enum2.MoveNext();
                                    }

                                    AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                    if (version >= PTableVersions.IndexV4 && IsMidpointIndex(indexEntry, numIndexEntries, requiredMidpointCount))
                                    {
                                        midpoints.Add(new Midpoint(new IndexEntryKey(current.Stream, current.Version), indexEntry));
                                    }
                                    indexEntry++;
                                    dumpedEntryCount++;
                                }

                                //WRITE MIDPOINTS
                                if (version >= PTableVersions.IndexV4)
                                {
                                    if (dumpedEntryCount != numIndexEntries)
                                    {
                                        //if index entries have been removed, compute the midpoints again
                                        numIndexEntries       = dumpedEntryCount;
                                        requiredMidpointCount = GetRequiredMidpointCount(numIndexEntries, version, cacheDepth);
                                        midpoints             = ComputeMidpoints(bs, f, version, indexEntrySize, numIndexEntries, requiredMidpointCount, midpoints);
                                    }
                                    WriteMidpointsTo(bs, f, version, indexEntrySize, buffer, dumpedEntryCount, numIndexEntries, requiredMidpointCount, midpoints);
                                }

                                bs.Flush();
                                cs.FlushFinalBlock();

                                f.SetLength(f.Position + MD5Size);

                                // WRITE MD5
                                var hash = md5.Hash;
                                f.Write(hash, 0, hash.Length);
                                f.FlushToDisk();
                            }
                }
                Log.Trace("PTables merge finished in {elapsed} ([{entryCount}] entries merged into {dumpedEntryCount}).",
                          watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
                return(new PTable(outputFile, Guid.NewGuid(), depth: cacheDepth, skipIndexVerify: skipIndexVerify));
            }
            finally
            {
                foreach (var enumerator in enumerators)
                {
                    enumerator.Dispose();
                }
            }
        }
Esempio n. 3
0
        private static PTable MergeTo2(IList <PTable> tables, long fileSize, int indexEntrySize, string outputFile,
                                       Func <string, ulong, ulong> upgradeHash, Func <IndexEntry, Tuple <string, bool> > readRecord,
                                       byte version, int cacheDepth)
        {
            Log.Trace("PTables merge started (specialized for <= 2 tables).");
            var watch = Stopwatch.StartNew();

            var  enumerators      = tables.Select(table => new EnumerablePTable(table, table.IterateAllInOrder().GetEnumerator())).ToList();
            long dumpedEntryCount = 0;

            using (var f = new FileStream(outputFile, FileMode.CreateNew, FileAccess.ReadWrite, FileShare.None,
                                          DefaultSequentialBufferSize, FileOptions.SequentialScan))
            {
                f.SetLength(fileSize);
                f.Seek(0, SeekOrigin.Begin);

                using (var md5 = MD5.Create())
                    using (var cs = new CryptoStream(f, md5, CryptoStreamMode.Write))
                        using (var bs = new BufferedStream(cs, DefaultSequentialBufferSize))
                        {
                            // WRITE HEADER
                            var headerBytes = new PTableHeader(version).AsByteArray();
                            cs.Write(headerBytes, 0, headerBytes.Length);

                            // WRITE INDEX ENTRIES
                            var        buffer     = new byte[indexEntrySize];
                            var        enum1      = enumerators[0];
                            var        enum2      = enumerators[1];
                            bool       available1 = enum1.MoveNext();
                            bool       available2 = enum2.MoveNext();
                            IndexEntry current;
                            bool       restart;
                            do
                            {
                                restart = false;
                                while (available1 || available2)
                                {
                                    var entry1 = new IndexEntry(enum1.Current.Stream, enum1.Current.Version, enum1.Current.Position);
                                    var entry2 = new IndexEntry(enum2.Current.Stream, enum2.Current.Version, enum2.Current.Position);
                                    if (version == PTableVersions.Index64Bit && enumerators[0].Table.Version == PTableVersions.Index32Bit)
                                    {
                                        var res = readRecord(entry1);
                                        if (!res.Item2)
                                        {
                                            available1 = enum1.MoveNext();
                                            restart    = true;
                                            break;
                                        }
                                        entry1.Stream = upgradeHash(res.Item1, entry1.Stream);
                                    }
                                    if (version == PTableVersions.Index64Bit && enumerators[1].Table.Version == PTableVersions.Index32Bit)
                                    {
                                        var res = readRecord(entry2);
                                        if (!res.Item2)
                                        {
                                            available2 = enum2.MoveNext();
                                            restart    = true;
                                            break;
                                        }
                                        entry2.Stream = upgradeHash(res.Item1, entry2.Stream);
                                    }

                                    if (available1 && (!available2 || entry1.CompareTo(entry2) > 0))
                                    {
                                        current    = entry1;
                                        available1 = enum1.MoveNext();
                                    }
                                    else
                                    {
                                        current    = entry2;
                                        available2 = enum2.MoveNext();
                                    }

                                    //Possibly doing another read if the record was read during the upgrade process
                                    var item = readRecord(current);
                                    if (item.Item2)
                                    {
                                        AppendRecordTo(bs, buffer, version, current, indexEntrySize);
                                        dumpedEntryCount += 1;
                                    }
                                }
                            } while (restart);
                            bs.Flush();
                            cs.FlushFinalBlock();

                            f.SetLength(f.Position + MD5Size);

                            // WRITE MD5
                            var hash = md5.Hash;
                            f.Write(hash, 0, hash.Length);
                            f.FlushToDisk();
                        }
            }
            Log.Trace("PTables merge finished in {0} ([{1}] entries merged into {2}).",
                      watch.Elapsed, string.Join(", ", tables.Select(x => x.Count)), dumpedEntryCount);
            return(new PTable(outputFile, Guid.NewGuid(), version, depth: cacheDepth));
        }