/** ZSTD_ldm_insertEntry() : * Insert the entry with corresponding hash into the hash table */ private static void ZSTD_ldm_insertEntry(ldmState_t *ldmState, nuint hash, ldmEntry_t entry, ldmParams_t ldmParams) { byte *pOffset = ldmState->bucketOffsets + hash; uint offset = *pOffset; *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; *pOffset = (byte)((offset + 1) & ((1U << (int)ldmParams.bucketSizeLog) - 1)); }
/** * ZSTD_ldm_generateSequences(): * * Generates the sequences using the long distance match finder. * Generates long range matching sequences in `sequences`, which parse a prefix * of the source. `sequences` must be large enough to store every sequence, * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. * @returns 0 or an error code. * * NOTE: The user must have called ZSTD_window_update() for all of the input * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. * NOTE: This function returns an error if it runs out of space to store * sequences. */ public static nuint ZSTD_ldm_generateSequences(ldmState_t *ldmState, rawSeqStore_t *sequences, ldmParams_t * @params, void *src, nuint srcSize) { uint maxDist = 1U << (int)@params->windowLog; byte *istart = (byte *)(src); byte *iend = istart + srcSize; nuint kMaxChunkSize = (nuint)(1 << 20); nuint nbChunks = (srcSize / kMaxChunkSize) + (uint)(((((srcSize % kMaxChunkSize) != 0)) ? 1 : 0)); nuint chunk; nuint leftoverSize = 0; assert(((unchecked ((uint)(-1))) - ((3U << 29) + (1U << ((int)((nuint)(sizeof(nuint)) == 4 ? 30 : 31))))) >= kMaxChunkSize); assert(ldmState->window.nextSrc >= (byte *)(src) + srcSize); assert(sequences->pos <= sequences->size); assert(sequences->size <= sequences->capacity); for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { byte *chunkStart = istart + chunk * kMaxChunkSize; nuint remaining = (nuint)(iend - chunkStart); byte *chunkEnd = (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; nuint chunkSize = (nuint)(chunkEnd - chunkStart); nuint newLeftoverSize; nuint prevSize = sequences->size; assert(chunkStart < iend); if ((ZSTD_window_needOverflowCorrection(ldmState->window, (void *)chunkEnd)) != 0) { uint ldmHSize = 1U << (int)@params->hashLog; uint correction = ZSTD_window_correctOverflow(&ldmState->window, 0, maxDist, (void *)chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); ldmState->loadedDictEnd = 0; } ZSTD_window_enforceMaxDist(&ldmState->window, (void *)chunkEnd, maxDist, &ldmState->loadedDictEnd, (ZSTD_matchState_t **)null); newLeftoverSize = ZSTD_ldm_generateSequences_internal(ldmState, sequences, @params, (void *)chunkStart, chunkSize); if ((ERR_isError(newLeftoverSize)) != 0) { return(newLeftoverSize); } if (prevSize < sequences->size) { sequences->seq[prevSize].litLength += (uint)(leftoverSize); leftoverSize = newLeftoverSize; } else { assert(newLeftoverSize == chunkSize); leftoverSize += chunkSize; } } return(0); }
public static void ZSTD_ldm_fillHashTable(ldmState_t *ldmState, byte *ip, byte *iend, ldmParams_t * @params) { uint minMatchLength = @params->minMatchLength; uint hBits = @params->hashLog - @params->bucketSizeLog; byte * @base = ldmState->window.@base; byte * istart = ip; ldmRollingHashState_t hashState; nuint *splits = (nuint *)ldmState->splitIndices; uint numSplits; ZSTD_ldm_gear_init(&hashState, @params); while (ip < iend) { nuint hashed; uint n; numSplits = 0; hashed = ZSTD_ldm_gear_feed(&hashState, ip, (nuint)(iend - ip), splits, &numSplits); for (n = 0; n < numSplits; n++) { if (ip + splits[n] >= istart + minMatchLength) { byte * split = ip + splits[n] - minMatchLength; ulong xxhash = XXH64((void *)split, minMatchLength, 0); uint hash = (uint)(xxhash & (((uint)(1) << (int)hBits) - 1)); ldmEntry_t entry; entry.offset = (uint)(split - @base); entry.checksum = (uint)(xxhash >> 32); ZSTD_ldm_insertEntry(ldmState, hash, entry, *@params); } } ip += hashed; } }
private static nuint ZSTD_ldm_generateSequences_internal(ldmState_t *ldmState, rawSeqStore_t *rawSeqStore, ldmParams_t * @params, void *src, nuint srcSize) { int extDict = (int)(ZSTD_window_hasExtDict(ldmState->window)); uint minMatchLength = @params->minMatchLength; uint entsPerBucket = 1U << (int)@params->bucketSizeLog; uint hBits = @params->hashLog - @params->bucketSizeLog; uint dictLimit = ldmState->window.dictLimit; uint lowestIndex = extDict != 0 ? ldmState->window.lowLimit : dictLimit; byte * @base = ldmState->window.@base; byte * dictBase = extDict != 0 ? ldmState->window.dictBase : null; byte * dictStart = extDict != 0 ? dictBase + lowestIndex : null; byte * dictEnd = extDict != 0 ? dictBase + dictLimit : null; byte * lowPrefixPtr = @base + dictLimit; byte * istart = (byte *)(src); byte * iend = istart + srcSize; byte * ilimit = iend - 8; byte * anchor = istart; byte * ip = istart; ldmRollingHashState_t hashState; nuint *splits = (nuint *)ldmState->splitIndices; ldmMatchCandidate_t *candidates = (ldmMatchCandidate_t *)ldmState->matchCandidates; uint numSplits; if (srcSize < minMatchLength) { return((nuint)(iend - anchor)); } ZSTD_ldm_gear_init(&hashState, @params); { nuint n = 0; while (n < minMatchLength) { numSplits = 0; n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, splits, &numSplits); } ip += minMatchLength; } while (ip < ilimit) { nuint hashed; uint n; numSplits = 0; hashed = ZSTD_ldm_gear_feed(&hashState, ip, (nuint)(ilimit - ip), splits, &numSplits); for (n = 0; n < numSplits; n++) { byte *split = ip + splits[n] - minMatchLength; ulong xxhash = XXH64((void *)split, minMatchLength, 0); uint hash = (uint)(xxhash & (((uint)(1) << (int)hBits) - 1)); candidates[n].split = split; candidates[n].hash = hash; candidates[n].checksum = (uint)(xxhash >> 32); candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *@params); Prefetch0((void *)(candidates[n].bucket)); } for (n = 0; n < numSplits; n++) { nuint forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength; byte * split = candidates[n].split; uint checksum = candidates[n].checksum; uint hash = candidates[n].hash; ldmEntry_t *bucket = candidates[n].bucket; ldmEntry_t *cur; ldmEntry_t *bestEntry = (ldmEntry_t *)null; ldmEntry_t newEntry; newEntry.offset = (uint)(split - @base); newEntry.checksum = checksum; if (split < anchor) { ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params); continue; } for (cur = bucket; cur < bucket + entsPerBucket; cur++) { nuint curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength; if (cur->checksum != checksum || cur->offset <= lowestIndex) { continue; } if (extDict != 0) { byte *curMatchBase = cur->offset < dictLimit ? dictBase : @base; byte *pMatch = curMatchBase + cur->offset; byte *matchEnd = cur->offset < dictLimit ? dictEnd : iend; byte *lowMatchPtr = cur->offset < dictLimit ? dictStart : lowPrefixPtr; curForwardMatchLength = ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); } else { byte *pMatch = @base + cur->offset; curForwardMatchLength = ZSTD_count(split, pMatch, iend); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); } curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; if (curTotalMatchLength > bestMatchLength) { bestMatchLength = curTotalMatchLength; forwardMatchLength = curForwardMatchLength; backwardMatchLength = curBackwardMatchLength; bestEntry = cur; } } if (bestEntry == null) { ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params); continue; } mLength = forwardMatchLength + backwardMatchLength; { uint offset = (uint)(split - @base) - bestEntry->offset; rawSeq *seq = rawSeqStore->seq + rawSeqStore->size; if (rawSeqStore->size == rawSeqStore->capacity) { return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall))); } seq->litLength = (uint)(split - backwardMatchLength - anchor); seq->matchLength = (uint)(mLength); seq->offset = offset; rawSeqStore->size++; } ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params); anchor = split + forwardMatchLength; } ip += hashed; } return((nuint)(iend - anchor)); }
/** ZSTD_ldm_getBucket() : * Returns a pointer to the start of the bucket associated with hash. */ private static ldmEntry_t *ZSTD_ldm_getBucket(ldmState_t *ldmState, nuint hash, ldmParams_t ldmParams) { return(ldmState->hashTable + (hash << (int)ldmParams.bucketSizeLog)); }