private bool ShouldRetrain(FixedSizeTree etagTree)
        {
            using (var it = etagTree.Iterate())
            {
                if (it.SeekToLast() == false)
                {
                    return(false);
                }

                long lastEtag = it.CurrentKey;

                return((lastEtag & 1024) == 0);
            }
        }
Exemple #2
0
        internal List <long> GetStreamPages(FixedSizeTree chunksTree, StreamInfo *info)
        {
            var pages = new List <long>();

            var chunkIndex = 0;

            using (var it = chunksTree.Iterate())
            {
                if (it.Seek(0) == false)
                {
                    return(pages);
                }

                var totalSize = 0L;

                do
                {
                    var chunk = (ChunkDetails *)it.CreateReaderForCurrent().Base;

                    totalSize += chunk->ChunkSize;

                    long size = chunk->ChunkSize;

                    if (chunkIndex == chunksTree.NumberOfEntries - 1)
                    {
                        // stream info is put after the last chunk

                        size += StreamInfo.SizeOf + info->TagSize;
                    }

                    var numberOfPages = VirtualPagerLegacyExtensions.GetNumberOfOverflowPages(size);

                    for (int i = 0; i < numberOfPages; i++)
                    {
                        pages.Add(chunk->PageNumber + i);
                    }

                    chunkIndex++;
                } while (it.MoveNext());

                if (totalSize != info->TotalSize)
                {
                    ThrowStreamSizeMismatch(chunksTree.Name, totalSize, info);
                }

                return(pages);
            }
        }
Exemple #3
0
        private static void CopyFixedSizeTree(FixedSizeTree fst, Func <Transaction, FixedSizeTree> createDestinationTree, StorageEnvironment compactedEnv, TransactionPersistentContext context, Action <long> onEntriesCopiedProgress, Action onAllEntriesCopied, CancellationToken token)
        {
            using (var it = fst.Iterate())
            {
                var copiedEntries = 0L;
                if (it.Seek(Int64.MinValue) == false)
                {
                    return;
                }

                do
                {
                    token.ThrowIfCancellationRequested();
                    using (var txw = compactedEnv.WriteTransaction(context))
                    {
                        var snd             = createDestinationTree(txw);
                        var transactionSize = 0L;

                        do
                        {
                            token.ThrowIfCancellationRequested();

                            using (it.Value(out var val))
                                snd.Add(it.CurrentKey, val);
                            transactionSize += fst.ValueSize + sizeof(long);
                            copiedEntries++;

                            var reportRate = fst.NumberOfEntries / 33 + 1;
                            if (copiedEntries % reportRate == 0)
                            {
                                onEntriesCopiedProgress(copiedEntries);
                            }
                        } while (transactionSize < compactedEnv.Options.MaxScratchBufferSize / 2 && it.MoveNext());

                        txw.Commit();
                    }

                    compactedEnv.FlushLogToDataFile();

                    if (fst.NumberOfEntries == copiedEntries)
                    {
                        onAllEntriesCopied();
                    }
                } while (it.MoveNext());
            }
        }
Exemple #4
0
        public void DeleteFixedTree(FixedSizeTree tree, bool isInRoot = true)
        {
            while (true)
            {
                using (var it = tree.Iterate())
                {
                    if (it.Seek(long.MinValue) == false)
                    {
                        break;
                    }

                    var currentKey = it.CurrentKey;
                    tree.Delete(currentKey);
                }
            }

            if (isInRoot)
            {
                _lowLevelTransaction.RootObjects.Delete(tree.Name);
            }
        }
        internal static unsafe Queue <MapEntry> GetMapEntries(FixedSizeTree documentMapEntries)
        {
            var entries = new Queue <MapEntry>((int)documentMapEntries.NumberOfEntries);

            using (var it = documentMapEntries.Iterate())
            {
                do
                {
                    var   currentKey = it.CurrentKey;
                    ulong reduceKeyHash;

                    it.CreateReaderForCurrent().Read((byte *)&reduceKeyHash, sizeof(ulong));

                    entries.Enqueue(new MapEntry
                    {
                        Id            = currentKey,
                        ReduceKeyHash = reduceKeyHash
                    });
                } while (it.MoveNext());
            }

            return(entries);
        }
        private void MaybeTrainCompressionDictionary(Table table, FixedSizeTree etagsTree)
        {
            // the idea is that we'll get better results by including the most recently modified documents
            // by iterating over the tag index, which is guaranteed to be always increasing
            var dataIds = ArrayPool <long> .Shared.Rent(256);

            var sizes = ArrayPool <UIntPtr> .Shared.Rent(256);

            try
            {
                int used         = 0;
                var totalSize    = 0;
                int totalSkipped = 0;

                using (var it = etagsTree.Iterate())
                {
                    if (it.SeekToLast() == false)
                    {
                        return; // empty table, nothing to train on
                    }
                    do
                    {
                        long id = it.CreateReaderForCurrent().ReadLittleEndianInt64();
                        table.DirectRead(id, out var size);
                        if (size > 32 * 1024)
                        {
                            if (totalSkipped++ > 16 * 1024)
                            {
                                return;  // we are scanning too much, no need to try this hard
                            }
                            // we don't want to skip documents that are too big, they will compress
                            // well on their own, and likely be *too* unique to add meaningfully to the
                            // dictionary
                            continue;
                        }

                        sizes[used]     = (UIntPtr)size;
                        dataIds[used++] = id;
                        totalSize      += size;
                    } while (used < 256 && it.MovePrev() && totalSize < 1024 * 1024);
                }

                if (used < 16)
                {
                    return;// too few samples to measure
                }
                var tx = table._tx;
                using (tx.Allocator.Allocate(totalSize, out var buffer))
                {
                    var cur = buffer.Ptr;
                    for (int i = 0; i < used; i++)
                    {
                        var ptr = table.DirectRead(dataIds[i], out var size);
                        Memory.Copy(cur, ptr, size);
                        cur += size;
                    }

                    using (tx.Allocator.Allocate(
                               // the dictionary
                               Constants.Storage.PageSize - PageHeader.SizeOf - sizeof(CompressionDictionaryInfo)
                               , out var dictionaryBuffer))
                    {
                        Span <byte> dictionaryBufferSpan = dictionaryBuffer.ToSpan();
                        ZstdLib.Train(new ReadOnlySpan <byte>(buffer.Ptr, totalSize),
                                      new ReadOnlySpan <UIntPtr>(sizes, 0, used),
                                      ref dictionaryBufferSpan);

                        var dictionariesTree = tx.CreateTree(TableSchema.CompressionDictionariesSlice);

                        var newId = (int)(dictionariesTree.State.NumberOfEntries + 1);

                        using var compressionDictionary = new ZstdLib.CompressionDictionary(newId, dictionaryBuffer.Ptr, dictionaryBufferSpan.Length, 3);

                        if (ShouldReplaceDictionary(tx, compressionDictionary) == false)
                        {
                            return;
                        }

                        table.CurrentCompressionDictionaryId           = newId;
                        compressionDictionary.ExpectedCompressionRatio = GetCompressionRatio(CompressedBuffer.Length, RawBuffer.Length);

                        var rev = Bits.SwapBytes(newId);
                        using (Slice.External(tx.Allocator, (byte *)&rev, sizeof(int), out var slice))
                            using (dictionariesTree.DirectAdd(slice, sizeof(CompressionDictionaryInfo) + dictionaryBufferSpan.Length, out var dest))
                            {
                                *((CompressionDictionaryInfo *)dest) =
                                    new CompressionDictionaryInfo {
                                    ExpectedCompressionRatio = compressionDictionary.ExpectedCompressionRatio
                                };
                                Memory.Copy(dest + sizeof(CompressionDictionaryInfo), dictionaryBuffer.Ptr, dictionaryBufferSpan.Length);
                            }

                        tx.LowLevelTransaction.OnDispose += RecreateRecoveryDictionaries;
                    }
                }
            }
            finally
            {
                ArrayPool <long> .Shared.Return(dataIds);

                ArrayPool <UIntPtr> .Shared.Return(sizes);
            }
        }