Ejemplo n.º 1
0
        /** ZSTD_ldm_insertEntry() :
         *  Insert the entry with corresponding hash into the hash table */
        private static void ZSTD_ldm_insertEntry(ldmState_t *ldmState, nuint hash, ldmEntry_t entry, ldmParams_t ldmParams)
        {
            byte *pOffset = ldmState->bucketOffsets + hash;
            uint  offset  = *pOffset;

            *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
            *pOffset = (byte)((offset + 1) & ((1U << (int)ldmParams.bucketSizeLog) - 1));
        }
Ejemplo n.º 2
0
        /**
         * ZSTD_ldm_generateSequences():
         *
         * Generates the sequences using the long distance match finder.
         * Generates long range matching sequences in `sequences`, which parse a prefix
         * of the source. `sequences` must be large enough to store every sequence,
         * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
         * @returns 0 or an error code.
         *
         * NOTE: The user must have called ZSTD_window_update() for all of the input
         * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
         * NOTE: This function returns an error if it runs out of space to store
         *       sequences.
         */
        public static nuint ZSTD_ldm_generateSequences(ldmState_t *ldmState, rawSeqStore_t *sequences, ldmParams_t * @params, void *src, nuint srcSize)
        {
            uint  maxDist       = 1U << (int)@params->windowLog;
            byte *istart        = (byte *)(src);
            byte *iend          = istart + srcSize;
            nuint kMaxChunkSize = (nuint)(1 << 20);
            nuint nbChunks      = (srcSize / kMaxChunkSize) + (uint)(((((srcSize % kMaxChunkSize) != 0)) ? 1 : 0));
            nuint chunk;
            nuint leftoverSize = 0;

            assert(((unchecked ((uint)(-1))) - ((3U << 29) + (1U << ((int)((nuint)(sizeof(nuint)) == 4 ? 30 : 31))))) >= kMaxChunkSize);
            assert(ldmState->window.nextSrc >= (byte *)(src) + srcSize);
            assert(sequences->pos <= sequences->size);
            assert(sequences->size <= sequences->capacity);
            for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk)
            {
                byte *chunkStart = istart + chunk * kMaxChunkSize;
                nuint remaining  = (nuint)(iend - chunkStart);
                byte *chunkEnd   = (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
                nuint chunkSize  = (nuint)(chunkEnd - chunkStart);
                nuint newLeftoverSize;
                nuint prevSize = sequences->size;

                assert(chunkStart < iend);
                if ((ZSTD_window_needOverflowCorrection(ldmState->window, (void *)chunkEnd)) != 0)
                {
                    uint ldmHSize   = 1U << (int)@params->hashLog;
                    uint correction = ZSTD_window_correctOverflow(&ldmState->window, 0, maxDist, (void *)chunkStart);

                    ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
                    ldmState->loadedDictEnd = 0;
                }

                ZSTD_window_enforceMaxDist(&ldmState->window, (void *)chunkEnd, maxDist, &ldmState->loadedDictEnd, (ZSTD_matchState_t **)null);
                newLeftoverSize = ZSTD_ldm_generateSequences_internal(ldmState, sequences, @params, (void *)chunkStart, chunkSize);
                if ((ERR_isError(newLeftoverSize)) != 0)
                {
                    return(newLeftoverSize);
                }

                if (prevSize < sequences->size)
                {
                    sequences->seq[prevSize].litLength += (uint)(leftoverSize);
                    leftoverSize = newLeftoverSize;
                }
                else
                {
                    assert(newLeftoverSize == chunkSize);
                    leftoverSize += chunkSize;
                }
            }

            return(0);
        }
Ejemplo n.º 3
0
        public static void ZSTD_ldm_fillHashTable(ldmState_t *ldmState, byte *ip, byte *iend, ldmParams_t * @params)
        {
            uint   minMatchLength = @params->minMatchLength;
            uint   hBits          = @params->hashLog - @params->bucketSizeLog;
            byte * @base          = ldmState->window.@base;
            byte * istart         = ip;
            ldmRollingHashState_t hashState;
            nuint *splits = (nuint *)ldmState->splitIndices;
            uint   numSplits;

            ZSTD_ldm_gear_init(&hashState, @params);
            while (ip < iend)
            {
                nuint hashed;
                uint  n;

                numSplits = 0;
                hashed    = ZSTD_ldm_gear_feed(&hashState, ip, (nuint)(iend - ip), splits, &numSplits);
                for (n = 0; n < numSplits; n++)
                {
                    if (ip + splits[n] >= istart + minMatchLength)
                    {
                        byte *     split  = ip + splits[n] - minMatchLength;
                        ulong      xxhash = XXH64((void *)split, minMatchLength, 0);
                        uint       hash   = (uint)(xxhash & (((uint)(1) << (int)hBits) - 1));
                        ldmEntry_t entry;

                        entry.offset   = (uint)(split - @base);
                        entry.checksum = (uint)(xxhash >> 32);
                        ZSTD_ldm_insertEntry(ldmState, hash, entry, *@params);
                    }
                }

                ip += hashed;
            }
        }
Ejemplo n.º 4
0
        private static nuint ZSTD_ldm_generateSequences_internal(ldmState_t *ldmState, rawSeqStore_t *rawSeqStore, ldmParams_t * @params, void *src, nuint srcSize)
        {
            int    extDict        = (int)(ZSTD_window_hasExtDict(ldmState->window));
            uint   minMatchLength = @params->minMatchLength;
            uint   entsPerBucket  = 1U << (int)@params->bucketSizeLog;
            uint   hBits          = @params->hashLog - @params->bucketSizeLog;
            uint   dictLimit      = ldmState->window.dictLimit;
            uint   lowestIndex    = extDict != 0 ? ldmState->window.lowLimit : dictLimit;
            byte * @base          = ldmState->window.@base;
            byte * dictBase       = extDict != 0 ? ldmState->window.dictBase : null;
            byte * dictStart      = extDict != 0 ? dictBase + lowestIndex : null;
            byte * dictEnd        = extDict != 0 ? dictBase + dictLimit : null;
            byte * lowPrefixPtr   = @base + dictLimit;
            byte * istart         = (byte *)(src);
            byte * iend           = istart + srcSize;
            byte * ilimit         = iend - 8;
            byte * anchor         = istart;
            byte * ip             = istart;
            ldmRollingHashState_t hashState;
            nuint *splits = (nuint *)ldmState->splitIndices;
            ldmMatchCandidate_t *candidates = (ldmMatchCandidate_t *)ldmState->matchCandidates;
            uint numSplits;

            if (srcSize < minMatchLength)
            {
                return((nuint)(iend - anchor));
            }

            ZSTD_ldm_gear_init(&hashState, @params);

            {
                nuint n = 0;

                while (n < minMatchLength)
                {
                    numSplits = 0;
                    n        += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, splits, &numSplits);
                }

                ip += minMatchLength;
            }

            while (ip < ilimit)
            {
                nuint hashed;
                uint  n;

                numSplits = 0;
                hashed    = ZSTD_ldm_gear_feed(&hashState, ip, (nuint)(ilimit - ip), splits, &numSplits);
                for (n = 0; n < numSplits; n++)
                {
                    byte *split  = ip + splits[n] - minMatchLength;
                    ulong xxhash = XXH64((void *)split, minMatchLength, 0);
                    uint  hash   = (uint)(xxhash & (((uint)(1) << (int)hBits) - 1));

                    candidates[n].split    = split;
                    candidates[n].hash     = hash;
                    candidates[n].checksum = (uint)(xxhash >> 32);
                    candidates[n].bucket   = ZSTD_ldm_getBucket(ldmState, hash, *@params);
                    Prefetch0((void *)(candidates[n].bucket));
                }

                for (n = 0; n < numSplits; n++)
                {
                    nuint       forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength;
                    byte *      split  = candidates[n].split;
                    uint        checksum = candidates[n].checksum;
                    uint        hash   = candidates[n].hash;
                    ldmEntry_t *bucket = candidates[n].bucket;
                    ldmEntry_t *cur;
                    ldmEntry_t *bestEntry = (ldmEntry_t *)null;
                    ldmEntry_t  newEntry;

                    newEntry.offset   = (uint)(split - @base);
                    newEntry.checksum = checksum;
                    if (split < anchor)
                    {
                        ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params);
                        continue;
                    }

                    for (cur = bucket; cur < bucket + entsPerBucket; cur++)
                    {
                        nuint curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength;

                        if (cur->checksum != checksum || cur->offset <= lowestIndex)
                        {
                            continue;
                        }

                        if (extDict != 0)
                        {
                            byte *curMatchBase = cur->offset < dictLimit ? dictBase : @base;
                            byte *pMatch       = curMatchBase + cur->offset;
                            byte *matchEnd     = cur->offset < dictLimit ? dictEnd : iend;
                            byte *lowMatchPtr  = cur->offset < dictLimit ? dictStart : lowPrefixPtr;

                            curForwardMatchLength = ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
                            if (curForwardMatchLength < minMatchLength)
                            {
                                continue;
                            }

                            curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
                        }
                        else
                        {
                            byte *pMatch = @base + cur->offset;

                            curForwardMatchLength = ZSTD_count(split, pMatch, iend);
                            if (curForwardMatchLength < minMatchLength)
                            {
                                continue;
                            }

                            curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
                        }

                        curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
                        if (curTotalMatchLength > bestMatchLength)
                        {
                            bestMatchLength     = curTotalMatchLength;
                            forwardMatchLength  = curForwardMatchLength;
                            backwardMatchLength = curBackwardMatchLength;
                            bestEntry           = cur;
                        }
                    }

                    if (bestEntry == null)
                    {
                        ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params);
                        continue;
                    }

                    mLength = forwardMatchLength + backwardMatchLength;

                    {
                        uint    offset = (uint)(split - @base) - bestEntry->offset;
                        rawSeq *seq    = rawSeqStore->seq + rawSeqStore->size;

                        if (rawSeqStore->size == rawSeqStore->capacity)
                        {
                            return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall)));
                        }

                        seq->litLength   = (uint)(split - backwardMatchLength - anchor);
                        seq->matchLength = (uint)(mLength);
                        seq->offset      = offset;
                        rawSeqStore->size++;
                    }

                    ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params);
                    anchor = split + forwardMatchLength;
                }

                ip += hashed;
            }

            return((nuint)(iend - anchor));
        }
Ejemplo n.º 5
0
 /** ZSTD_ldm_getBucket() :
  *  Returns a pointer to the start of the bucket associated with hash. */
 private static ldmEntry_t *ZSTD_ldm_getBucket(ldmState_t *ldmState, nuint hash, ldmParams_t ldmParams)
 {
     return(ldmState->hashTable + (hash << (int)ldmParams.bucketSizeLog));
 }