示例#1
0
        /* ZSTD_ldm_skipRawSeqStoreBytes():
         * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
         * Not to be used in conjunction with ZSTD_ldm_skipSequences().
         * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
         */
        public static void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t *rawSeqStore, nuint nbBytes)
        {
            uint currPos = (uint)(rawSeqStore->posInSequence + nbBytes);

            while (currPos != 0 && rawSeqStore->pos < rawSeqStore->size)
            {
                rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];

                if (currPos >= currSeq.litLength + currSeq.matchLength)
                {
                    currPos -= currSeq.litLength + currSeq.matchLength;
                    rawSeqStore->pos++;
                }
                else
                {
                    rawSeqStore->posInSequence = currPos;
                    break;
                }
            }

            if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size)
            {
                rawSeqStore->posInSequence = 0;
            }
        }
示例#2
0
        /**
         * If the sequence length is longer than remaining then the sequence is split
         * between this block and the next.
         *
         * Returns the current sequence to handle, or if the rest of the block should
         * be literals, it returns a sequence with offset == 0.
         */
        private static rawSeq maybeSplitSequence(rawSeqStore_t *rawSeqStore, uint remaining, uint minMatch)
        {
            rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];

            assert(sequence.offset > 0);
            if (remaining >= sequence.litLength + sequence.matchLength)
            {
                rawSeqStore->pos++;
                return(sequence);
            }

            if (remaining <= sequence.litLength)
            {
                sequence.offset = 0;
            }
            else if (remaining < sequence.litLength + sequence.matchLength)
            {
                sequence.matchLength = remaining - sequence.litLength;
                if (sequence.matchLength < minMatch)
                {
                    sequence.offset = 0;
                }
            }

            ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
            return(sequence);
        }
示例#3
0
        /**
         * ZSTD_ldm_skipSequences():
         *
         * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
         * Avoids emitting matches less than `minMatch` bytes.
         * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
         */
        public static void ZSTD_ldm_skipSequences(rawSeqStore_t *rawSeqStore, nuint srcSize, uint minMatch)
        {
            while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size)
            {
                rawSeq *seq = rawSeqStore->seq + rawSeqStore->pos;

                if (srcSize <= seq->litLength)
                {
                    seq->litLength -= (uint)(srcSize);
                    return;
                }

                srcSize       -= seq->litLength;
                seq->litLength = 0;
                if (srcSize < seq->matchLength)
                {
                    seq->matchLength -= (uint)(srcSize);
                    if (seq->matchLength < minMatch)
                    {
                        if (rawSeqStore->pos + 1 < rawSeqStore->size)
                        {
                            seq[1].litLength += seq[0].matchLength;
                        }

                        rawSeqStore->pos++;
                    }

                    return;
                }

                srcSize         -= seq->matchLength;
                seq->matchLength = 0;
                rawSeqStore->pos++;
            }
        }
示例#4
0
        /**
         * ZSTD_ldm_blockCompress():
         *
         * Compresses a block using the predefined sequences, along with a secondary
         * block compressor. The literals section of every sequence is passed to the
         * secondary block compressor, and those sequences are interspersed with the
         * predefined sequences. Returns the length of the last literals.
         * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
         * `rawSeqStore.seq` may also be updated to split the last sequence between two
         * blocks.
         * @return The length of the last literals.
         *
         * NOTE: The source must be at most the maximum block size, but the predefined
         * sequences can be any size, and may be longer than the block. In the case that
         * they are longer than the block, the last sequences may need to be split into
         * two. We handle that case correctly, and update `rawSeqStore` appropriately.
         * NOTE: This function does not return any errors.
         */
        public static nuint ZSTD_ldm_blockCompress(rawSeqStore_t *rawSeqStore, ZSTD_matchState_t *ms, seqStore_t *seqStore, uint *rep, void *src, nuint srcSize)
        {
            ZSTD_compressionParameters *cParams = &ms->cParams;
            uint minMatch = cParams->minMatch;
            ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
            byte *istart = (byte *)(src);
            byte *iend   = istart + srcSize;
            byte *ip     = istart;

            if (cParams->strategy >= ZSTD_strategy.ZSTD_btopt)
            {
                nuint lastLLSize;

                ms->ldmSeqStore = rawSeqStore;
                lastLLSize      = blockCompressor(ms, seqStore, rep, src, srcSize);
                ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
                return(lastLLSize);
            }

            assert(rawSeqStore->pos <= rawSeqStore->size);
            assert(rawSeqStore->size <= rawSeqStore->capacity);
            while (rawSeqStore->pos < rawSeqStore->size && ip < iend)
            {
                rawSeq sequence = maybeSplitSequence(rawSeqStore, (uint)(iend - ip), minMatch);
                int    i;

                if (sequence.offset == 0)
                {
                    break;
                }

                assert(ip + sequence.litLength + sequence.matchLength <= iend);
                ZSTD_ldm_limitTableUpdate(ms, ip);
                ZSTD_ldm_fillFastTables(ms, (void *)ip);

                {
                    nuint newLitLength = blockCompressor(ms, seqStore, rep, (void *)ip, sequence.litLength);

                    ip += sequence.litLength;
                    for (i = 3 - 1; i > 0; i--)
                    {
                        rep[i] = rep[i - 1];
                    }

                    rep[0] = sequence.offset;
                    ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, sequence.offset + (uint)((3 - 1)), sequence.matchLength - 3);
                    ip += sequence.matchLength;
                }
            }

            ZSTD_ldm_limitTableUpdate(ms, ip);
            ZSTD_ldm_fillFastTables(ms, (void *)ip);
            return(blockCompressor(ms, seqStore, rep, (void *)ip, (nuint)(iend - ip)));
        }
示例#5
0
        /**
         * ZSTD_ldm_generateSequences():
         *
         * Generates the sequences using the long distance match finder.
         * Generates long range matching sequences in `sequences`, which parse a prefix
         * of the source. `sequences` must be large enough to store every sequence,
         * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
         * @returns 0 or an error code.
         *
         * NOTE: The user must have called ZSTD_window_update() for all of the input
         * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
         * NOTE: This function returns an error if it runs out of space to store
         *       sequences.
         */
        public static nuint ZSTD_ldm_generateSequences(ldmState_t *ldmState, rawSeqStore_t *sequences, ldmParams_t * @params, void *src, nuint srcSize)
        {
            uint  maxDist       = 1U << (int)@params->windowLog;
            byte *istart        = (byte *)(src);
            byte *iend          = istart + srcSize;
            nuint kMaxChunkSize = (nuint)(1 << 20);
            nuint nbChunks      = (srcSize / kMaxChunkSize) + (uint)(((((srcSize % kMaxChunkSize) != 0)) ? 1 : 0));
            nuint chunk;
            nuint leftoverSize = 0;

            assert(((unchecked ((uint)(-1))) - ((3U << 29) + (1U << ((int)((nuint)(sizeof(nuint)) == 4 ? 30 : 31))))) >= kMaxChunkSize);
            assert(ldmState->window.nextSrc >= (byte *)(src) + srcSize);
            assert(sequences->pos <= sequences->size);
            assert(sequences->size <= sequences->capacity);
            for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk)
            {
                byte *chunkStart = istart + chunk * kMaxChunkSize;
                nuint remaining  = (nuint)(iend - chunkStart);
                byte *chunkEnd   = (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
                nuint chunkSize  = (nuint)(chunkEnd - chunkStart);
                nuint newLeftoverSize;
                nuint prevSize = sequences->size;

                assert(chunkStart < iend);
                if ((ZSTD_window_needOverflowCorrection(ldmState->window, (void *)chunkEnd)) != 0)
                {
                    uint ldmHSize   = 1U << (int)@params->hashLog;
                    uint correction = ZSTD_window_correctOverflow(&ldmState->window, 0, maxDist, (void *)chunkStart);

                    ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
                    ldmState->loadedDictEnd = 0;
                }

                ZSTD_window_enforceMaxDist(&ldmState->window, (void *)chunkEnd, maxDist, &ldmState->loadedDictEnd, (ZSTD_matchState_t **)null);
                newLeftoverSize = ZSTD_ldm_generateSequences_internal(ldmState, sequences, @params, (void *)chunkStart, chunkSize);
                if ((ERR_isError(newLeftoverSize)) != 0)
                {
                    return(newLeftoverSize);
                }

                if (prevSize < sequences->size)
                {
                    sequences->seq[prevSize].litLength += (uint)(leftoverSize);
                    leftoverSize = newLeftoverSize;
                }
                else
                {
                    assert(newLeftoverSize == chunkSize);
                    leftoverSize += chunkSize;
                }
            }

            return(0);
        }
示例#6
0
        private static nuint ZSTD_ldm_generateSequences_internal(ldmState_t *ldmState, rawSeqStore_t *rawSeqStore, ldmParams_t * @params, void *src, nuint srcSize)
        {
            int    extDict        = (int)(ZSTD_window_hasExtDict(ldmState->window));
            uint   minMatchLength = @params->minMatchLength;
            uint   entsPerBucket  = 1U << (int)@params->bucketSizeLog;
            uint   hBits          = @params->hashLog - @params->bucketSizeLog;
            uint   dictLimit      = ldmState->window.dictLimit;
            uint   lowestIndex    = extDict != 0 ? ldmState->window.lowLimit : dictLimit;
            byte * @base          = ldmState->window.@base;
            byte * dictBase       = extDict != 0 ? ldmState->window.dictBase : null;
            byte * dictStart      = extDict != 0 ? dictBase + lowestIndex : null;
            byte * dictEnd        = extDict != 0 ? dictBase + dictLimit : null;
            byte * lowPrefixPtr   = @base + dictLimit;
            byte * istart         = (byte *)(src);
            byte * iend           = istart + srcSize;
            byte * ilimit         = iend - 8;
            byte * anchor         = istart;
            byte * ip             = istart;
            ldmRollingHashState_t hashState;
            nuint *splits = (nuint *)ldmState->splitIndices;
            ldmMatchCandidate_t *candidates = (ldmMatchCandidate_t *)ldmState->matchCandidates;
            uint numSplits;

            if (srcSize < minMatchLength)
            {
                return((nuint)(iend - anchor));
            }

            ZSTD_ldm_gear_init(&hashState, @params);

            {
                nuint n = 0;

                while (n < minMatchLength)
                {
                    numSplits = 0;
                    n        += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, splits, &numSplits);
                }

                ip += minMatchLength;
            }

            while (ip < ilimit)
            {
                nuint hashed;
                uint  n;

                numSplits = 0;
                hashed    = ZSTD_ldm_gear_feed(&hashState, ip, (nuint)(ilimit - ip), splits, &numSplits);
                for (n = 0; n < numSplits; n++)
                {
                    byte *split  = ip + splits[n] - minMatchLength;
                    ulong xxhash = XXH64((void *)split, minMatchLength, 0);
                    uint  hash   = (uint)(xxhash & (((uint)(1) << (int)hBits) - 1));

                    candidates[n].split    = split;
                    candidates[n].hash     = hash;
                    candidates[n].checksum = (uint)(xxhash >> 32);
                    candidates[n].bucket   = ZSTD_ldm_getBucket(ldmState, hash, *@params);
                    Prefetch0((void *)(candidates[n].bucket));
                }

                for (n = 0; n < numSplits; n++)
                {
                    nuint       forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength;
                    byte *      split  = candidates[n].split;
                    uint        checksum = candidates[n].checksum;
                    uint        hash   = candidates[n].hash;
                    ldmEntry_t *bucket = candidates[n].bucket;
                    ldmEntry_t *cur;
                    ldmEntry_t *bestEntry = (ldmEntry_t *)null;
                    ldmEntry_t  newEntry;

                    newEntry.offset   = (uint)(split - @base);
                    newEntry.checksum = checksum;
                    if (split < anchor)
                    {
                        ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params);
                        continue;
                    }

                    for (cur = bucket; cur < bucket + entsPerBucket; cur++)
                    {
                        nuint curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength;

                        if (cur->checksum != checksum || cur->offset <= lowestIndex)
                        {
                            continue;
                        }

                        if (extDict != 0)
                        {
                            byte *curMatchBase = cur->offset < dictLimit ? dictBase : @base;
                            byte *pMatch       = curMatchBase + cur->offset;
                            byte *matchEnd     = cur->offset < dictLimit ? dictEnd : iend;
                            byte *lowMatchPtr  = cur->offset < dictLimit ? dictStart : lowPrefixPtr;

                            curForwardMatchLength = ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
                            if (curForwardMatchLength < minMatchLength)
                            {
                                continue;
                            }

                            curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
                        }
                        else
                        {
                            byte *pMatch = @base + cur->offset;

                            curForwardMatchLength = ZSTD_count(split, pMatch, iend);
                            if (curForwardMatchLength < minMatchLength)
                            {
                                continue;
                            }

                            curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
                        }

                        curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
                        if (curTotalMatchLength > bestMatchLength)
                        {
                            bestMatchLength     = curTotalMatchLength;
                            forwardMatchLength  = curForwardMatchLength;
                            backwardMatchLength = curBackwardMatchLength;
                            bestEntry           = cur;
                        }
                    }

                    if (bestEntry == null)
                    {
                        ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params);
                        continue;
                    }

                    mLength = forwardMatchLength + backwardMatchLength;

                    {
                        uint    offset = (uint)(split - @base) - bestEntry->offset;
                        rawSeq *seq    = rawSeqStore->seq + rawSeqStore->size;

                        if (rawSeqStore->size == rawSeqStore->capacity)
                        {
                            return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall)));
                        }

                        seq->litLength   = (uint)(split - backwardMatchLength - anchor);
                        seq->matchLength = (uint)(mLength);
                        seq->offset      = offset;
                        rawSeqStore->size++;
                    }

                    ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params);
                    anchor = split + forwardMatchLength;
                }

                ip += hashed;
            }

            return((nuint)(iend - anchor));
        }