private bool ShouldRetrain(FixedSizeTree etagTree) { using (var it = etagTree.Iterate()) { if (it.SeekToLast() == false) { return(false); } long lastEtag = it.CurrentKey; return((lastEtag & 1024) == 0); } }
internal List <long> GetStreamPages(FixedSizeTree chunksTree, StreamInfo *info) { var pages = new List <long>(); var chunkIndex = 0; using (var it = chunksTree.Iterate()) { if (it.Seek(0) == false) { return(pages); } var totalSize = 0L; do { var chunk = (ChunkDetails *)it.CreateReaderForCurrent().Base; totalSize += chunk->ChunkSize; long size = chunk->ChunkSize; if (chunkIndex == chunksTree.NumberOfEntries - 1) { // stream info is put after the last chunk size += StreamInfo.SizeOf + info->TagSize; } var numberOfPages = VirtualPagerLegacyExtensions.GetNumberOfOverflowPages(size); for (int i = 0; i < numberOfPages; i++) { pages.Add(chunk->PageNumber + i); } chunkIndex++; } while (it.MoveNext()); if (totalSize != info->TotalSize) { ThrowStreamSizeMismatch(chunksTree.Name, totalSize, info); } return(pages); } }
private static void CopyFixedSizeTree(FixedSizeTree fst, Func <Transaction, FixedSizeTree> createDestinationTree, StorageEnvironment compactedEnv, TransactionPersistentContext context, Action <long> onEntriesCopiedProgress, Action onAllEntriesCopied, CancellationToken token) { using (var it = fst.Iterate()) { var copiedEntries = 0L; if (it.Seek(Int64.MinValue) == false) { return; } do { token.ThrowIfCancellationRequested(); using (var txw = compactedEnv.WriteTransaction(context)) { var snd = createDestinationTree(txw); var transactionSize = 0L; do { token.ThrowIfCancellationRequested(); using (it.Value(out var val)) snd.Add(it.CurrentKey, val); transactionSize += fst.ValueSize + sizeof(long); copiedEntries++; var reportRate = fst.NumberOfEntries / 33 + 1; if (copiedEntries % reportRate == 0) { onEntriesCopiedProgress(copiedEntries); } } while (transactionSize < compactedEnv.Options.MaxScratchBufferSize / 2 && it.MoveNext()); txw.Commit(); } compactedEnv.FlushLogToDataFile(); if (fst.NumberOfEntries == copiedEntries) { onAllEntriesCopied(); } } while (it.MoveNext()); } }
public void DeleteFixedTree(FixedSizeTree tree, bool isInRoot = true) { while (true) { using (var it = tree.Iterate()) { if (it.Seek(long.MinValue) == false) { break; } var currentKey = it.CurrentKey; tree.Delete(currentKey); } } if (isInRoot) { _lowLevelTransaction.RootObjects.Delete(tree.Name); } }
internal static unsafe Queue <MapEntry> GetMapEntries(FixedSizeTree documentMapEntries) { var entries = new Queue <MapEntry>((int)documentMapEntries.NumberOfEntries); using (var it = documentMapEntries.Iterate()) { do { var currentKey = it.CurrentKey; ulong reduceKeyHash; it.CreateReaderForCurrent().Read((byte *)&reduceKeyHash, sizeof(ulong)); entries.Enqueue(new MapEntry { Id = currentKey, ReduceKeyHash = reduceKeyHash }); } while (it.MoveNext()); } return(entries); }
private void MaybeTrainCompressionDictionary(Table table, FixedSizeTree etagsTree) { // the idea is that we'll get better results by including the most recently modified documents // by iterating over the tag index, which is guaranteed to be always increasing var dataIds = ArrayPool <long> .Shared.Rent(256); var sizes = ArrayPool <UIntPtr> .Shared.Rent(256); try { int used = 0; var totalSize = 0; int totalSkipped = 0; using (var it = etagsTree.Iterate()) { if (it.SeekToLast() == false) { return; // empty table, nothing to train on } do { long id = it.CreateReaderForCurrent().ReadLittleEndianInt64(); table.DirectRead(id, out var size); if (size > 32 * 1024) { if (totalSkipped++ > 16 * 1024) { return; // we are scanning too much, no need to try this hard } // we don't want to skip documents that are too big, they will compress // well on their own, and likely be *too* unique to add meaningfully to the // dictionary continue; } sizes[used] = (UIntPtr)size; dataIds[used++] = id; totalSize += size; } while (used < 256 && it.MovePrev() && totalSize < 1024 * 1024); } if (used < 16) { return;// too few samples to measure } var tx = table._tx; using (tx.Allocator.Allocate(totalSize, out var buffer)) { var cur = buffer.Ptr; for (int i = 0; i < used; i++) { var ptr = table.DirectRead(dataIds[i], out var size); Memory.Copy(cur, ptr, size); cur += size; } using (tx.Allocator.Allocate( // the dictionary Constants.Storage.PageSize - PageHeader.SizeOf - sizeof(CompressionDictionaryInfo) , out var dictionaryBuffer)) { Span <byte> dictionaryBufferSpan = dictionaryBuffer.ToSpan(); ZstdLib.Train(new ReadOnlySpan <byte>(buffer.Ptr, totalSize), new ReadOnlySpan <UIntPtr>(sizes, 0, used), ref dictionaryBufferSpan); var dictionariesTree = tx.CreateTree(TableSchema.CompressionDictionariesSlice); var newId = (int)(dictionariesTree.State.NumberOfEntries + 1); using var compressionDictionary = new ZstdLib.CompressionDictionary(newId, dictionaryBuffer.Ptr, dictionaryBufferSpan.Length, 3); if (ShouldReplaceDictionary(tx, compressionDictionary) == false) { return; } table.CurrentCompressionDictionaryId = newId; compressionDictionary.ExpectedCompressionRatio = GetCompressionRatio(CompressedBuffer.Length, RawBuffer.Length); var rev = Bits.SwapBytes(newId); using (Slice.External(tx.Allocator, (byte *)&rev, sizeof(int), out var slice)) using (dictionariesTree.DirectAdd(slice, sizeof(CompressionDictionaryInfo) + dictionaryBufferSpan.Length, out var dest)) { *((CompressionDictionaryInfo *)dest) = new CompressionDictionaryInfo { ExpectedCompressionRatio = compressionDictionary.ExpectedCompressionRatio }; Memory.Copy(dest + sizeof(CompressionDictionaryInfo), dictionaryBuffer.Ptr, dictionaryBufferSpan.Length); } tx.LowLevelTransaction.OnDispose += RecreateRecoveryDictionaries; } } } finally { ArrayPool <long> .Shared.Return(dataIds); ArrayPool <UIntPtr> .Shared.Return(sizes); } }