Ejemplo n.º 1
0
        public bool TryCompression(Table table, TableSchema schema)
        {
            try
            {
                var tx       = table._tx;
                int maxSpace = ZstdLib.GetMaxCompression(RawBuffer.Length);
                _compressedScope = tx.Allocator.Allocate(maxSpace + OverheadSize, out CompressedBuffer);
                Compressed       = false;

                var compressionDictionary = tx.LowLevelTransaction.Environment.CompressionDictionariesHolder
                                            .GetCompressionDictionaryFor(tx, table.CurrentCompressionDictionaryId);

                CompressionTried = true;
                var size = ZstdLib.Compress(RawBuffer.ToReadOnlySpan(), CompressedBuffer.ToSpan(), compressionDictionary);
                size += WriteVariableSizeIntInReverse(CompressedBuffer.Ptr + size, compressionDictionary.Id);
                CompressedBuffer.Truncate(size);
                var compressionRatio = GetCompressionRatio(size, RawBuffer.Length);
                if (compressionRatio > compressionDictionary.ExpectedCompressionRatio + 10)
                {
                    // training dictionaries is expensive, only do that if we see that the current compressed
                    // value is significantly worse than the previous one
                    var etagTree = table.GetFixedSizeTree(schema.CompressedEtagSourceIndex);
                    if (ShouldRetrain(etagTree))
                    {
                        MaybeTrainCompressionDictionary(table, etagTree);
                    }
                }

                if (CompressedBuffer.Length >= RawBuffer.Length)
                {
                    // we compressed too large, so we skip compression here
                    _compressedScope.Dispose();
                    // Explicitly not disposing this, we need to have the raw buffer
                    // when we do update then insert and the size is too large
                    // RawScope.Dispose();
                    Compressed = false;
                    return(false);
                }

                Compressed = true;
                return(true);
            }
            catch
            {
                _compressedScope.Dispose();
                RawScope.Dispose();
                throw;
            }
        }
Ejemplo n.º 2
0
        private void MaybeTrainCompressionDictionary(Table table, FixedSizeTree etagsTree)
        {
            // the idea is that we'll get better results by including the most recently modified documents
            // by iterating over the tag index, which is guaranteed to be always increasing
            var dataIds = ArrayPool <long> .Shared.Rent(256);

            var sizes = ArrayPool <UIntPtr> .Shared.Rent(256);

            try
            {
                int used         = 0;
                var totalSize    = 0;
                int totalSkipped = 0;

                using (var it = etagsTree.Iterate())
                {
                    if (it.SeekToLast() == false)
                    {
                        return; // empty table, nothing to train on
                    }
                    do
                    {
                        long id = it.CreateReaderForCurrent().ReadLittleEndianInt64();
                        table.DirectRead(id, out var size);
                        if (size > 32 * 1024)
                        {
                            if (totalSkipped++ > 16 * 1024)
                            {
                                return;  // we are scanning too much, no need to try this hard
                            }
                            // we don't want to skip documents that are too big, they will compress
                            // well on their own, and likely be *too* unique to add meaningfully to the
                            // dictionary
                            continue;
                        }

                        sizes[used]     = (UIntPtr)size;
                        dataIds[used++] = id;
                        totalSize      += size;
                    } while (used < 256 && it.MovePrev() && totalSize < 1024 * 1024);
                }

                if (used < 16)
                {
                    return;// too few samples to measure
                }
                var tx = table._tx;
                using (tx.Allocator.Allocate(totalSize, out var buffer))
                {
                    var cur = buffer.Ptr;
                    for (int i = 0; i < used; i++)
                    {
                        var ptr = table.DirectRead(dataIds[i], out var size);
                        Memory.Copy(cur, ptr, size);
                        cur += size;
                    }

                    using (tx.Allocator.Allocate(
                               // the dictionary
                               Constants.Storage.PageSize - PageHeader.SizeOf - sizeof(CompressionDictionaryInfo)
                               , out var dictionaryBuffer))
                    {
                        Span <byte> dictionaryBufferSpan = dictionaryBuffer.ToSpan();
                        ZstdLib.Train(new ReadOnlySpan <byte>(buffer.Ptr, totalSize),
                                      new ReadOnlySpan <UIntPtr>(sizes, 0, used),
                                      ref dictionaryBufferSpan);

                        var dictionariesTree = tx.CreateTree(TableSchema.CompressionDictionariesSlice);

                        var newId = (int)(dictionariesTree.State.NumberOfEntries + 1);

                        using var compressionDictionary = new ZstdLib.CompressionDictionary(newId, dictionaryBuffer.Ptr, dictionaryBufferSpan.Length, 3);

                        if (ShouldReplaceDictionary(tx, compressionDictionary) == false)
                        {
                            return;
                        }

                        table.CurrentCompressionDictionaryId           = newId;
                        compressionDictionary.ExpectedCompressionRatio = GetCompressionRatio(CompressedBuffer.Length, RawBuffer.Length);

                        var rev = Bits.SwapBytes(newId);
                        using (Slice.External(tx.Allocator, (byte *)&rev, sizeof(int), out var slice))
                            using (dictionariesTree.DirectAdd(slice, sizeof(CompressionDictionaryInfo) + dictionaryBufferSpan.Length, out var dest))
                            {
                                *((CompressionDictionaryInfo *)dest) =
                                    new CompressionDictionaryInfo {
                                    ExpectedCompressionRatio = compressionDictionary.ExpectedCompressionRatio
                                };
                                Memory.Copy(dest + sizeof(CompressionDictionaryInfo), dictionaryBuffer.Ptr, dictionaryBufferSpan.Length);
                            }

                        tx.LowLevelTransaction.OnDispose += RecreateRecoveryDictionaries;
                    }
                }
            }
            finally
            {
                ArrayPool <long> .Shared.Return(dataIds);

                ArrayPool <UIntPtr> .Shared.Return(sizes);
            }
        }