/* ZSTD_ldm_skipRawSeqStoreBytes(): * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. * Not to be used in conjunction with ZSTD_ldm_skipSequences(). * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). */ public static void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t *rawSeqStore, nuint nbBytes) { uint currPos = (uint)(rawSeqStore->posInSequence + nbBytes); while (currPos != 0 && rawSeqStore->pos < rawSeqStore->size) { rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; if (currPos >= currSeq.litLength + currSeq.matchLength) { currPos -= currSeq.litLength + currSeq.matchLength; rawSeqStore->pos++; } else { rawSeqStore->posInSequence = currPos; break; } } if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { rawSeqStore->posInSequence = 0; } }
/** * If the sequence length is longer than remaining then the sequence is split * between this block and the next. * * Returns the current sequence to handle, or if the rest of the block should * be literals, it returns a sequence with offset == 0. */ private static rawSeq maybeSplitSequence(rawSeqStore_t *rawSeqStore, uint remaining, uint minMatch) { rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; assert(sequence.offset > 0); if (remaining >= sequence.litLength + sequence.matchLength) { rawSeqStore->pos++; return(sequence); } if (remaining <= sequence.litLength) { sequence.offset = 0; } else if (remaining < sequence.litLength + sequence.matchLength) { sequence.matchLength = remaining - sequence.litLength; if (sequence.matchLength < minMatch) { sequence.offset = 0; } } ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); return(sequence); }
/** * ZSTD_ldm_skipSequences(): * * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. * Avoids emitting matches less than `minMatch` bytes. * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). */ public static void ZSTD_ldm_skipSequences(rawSeqStore_t *rawSeqStore, nuint srcSize, uint minMatch) { while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { rawSeq *seq = rawSeqStore->seq + rawSeqStore->pos; if (srcSize <= seq->litLength) { seq->litLength -= (uint)(srcSize); return; } srcSize -= seq->litLength; seq->litLength = 0; if (srcSize < seq->matchLength) { seq->matchLength -= (uint)(srcSize); if (seq->matchLength < minMatch) { if (rawSeqStore->pos + 1 < rawSeqStore->size) { seq[1].litLength += seq[0].matchLength; } rawSeqStore->pos++; } return; } srcSize -= seq->matchLength; seq->matchLength = 0; rawSeqStore->pos++; } }
/** * ZSTD_ldm_blockCompress(): * * Compresses a block using the predefined sequences, along with a secondary * block compressor. The literals section of every sequence is passed to the * secondary block compressor, and those sequences are interspersed with the * predefined sequences. Returns the length of the last literals. * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. * `rawSeqStore.seq` may also be updated to split the last sequence between two * blocks. * @return The length of the last literals. * * NOTE: The source must be at most the maximum block size, but the predefined * sequences can be any size, and may be longer than the block. In the case that * they are longer than the block, the last sequences may need to be split into * two. We handle that case correctly, and update `rawSeqStore` appropriately. * NOTE: This function does not return any errors. */ public static nuint ZSTD_ldm_blockCompress(rawSeqStore_t *rawSeqStore, ZSTD_matchState_t *ms, seqStore_t *seqStore, uint *rep, void *src, nuint srcSize) { ZSTD_compressionParameters *cParams = &ms->cParams; uint minMatch = cParams->minMatch; ZSTD_blockCompressor blockCompressor = ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); byte *istart = (byte *)(src); byte *iend = istart + srcSize; byte *ip = istart; if (cParams->strategy >= ZSTD_strategy.ZSTD_btopt) { nuint lastLLSize; ms->ldmSeqStore = rawSeqStore; lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); return(lastLLSize); } assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { rawSeq sequence = maybeSplitSequence(rawSeqStore, (uint)(iend - ip), minMatch); int i; if (sequence.offset == 0) { break; } assert(ip + sequence.litLength + sequence.matchLength <= iend); ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, (void *)ip); { nuint newLitLength = blockCompressor(ms, seqStore, rep, (void *)ip, sequence.litLength); ip += sequence.litLength; for (i = 3 - 1; i > 0; i--) { rep[i] = rep[i - 1]; } rep[0] = sequence.offset; ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, sequence.offset + (uint)((3 - 1)), sequence.matchLength - 3); ip += sequence.matchLength; } } ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, (void *)ip); return(blockCompressor(ms, seqStore, rep, (void *)ip, (nuint)(iend - ip))); }
/** * ZSTD_ldm_generateSequences(): * * Generates the sequences using the long distance match finder. * Generates long range matching sequences in `sequences`, which parse a prefix * of the source. `sequences` must be large enough to store every sequence, * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. * @returns 0 or an error code. * * NOTE: The user must have called ZSTD_window_update() for all of the input * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. * NOTE: This function returns an error if it runs out of space to store * sequences. */ public static nuint ZSTD_ldm_generateSequences(ldmState_t *ldmState, rawSeqStore_t *sequences, ldmParams_t * @params, void *src, nuint srcSize) { uint maxDist = 1U << (int)@params->windowLog; byte *istart = (byte *)(src); byte *iend = istart + srcSize; nuint kMaxChunkSize = (nuint)(1 << 20); nuint nbChunks = (srcSize / kMaxChunkSize) + (uint)(((((srcSize % kMaxChunkSize) != 0)) ? 1 : 0)); nuint chunk; nuint leftoverSize = 0; assert(((unchecked ((uint)(-1))) - ((3U << 29) + (1U << ((int)((nuint)(sizeof(nuint)) == 4 ? 30 : 31))))) >= kMaxChunkSize); assert(ldmState->window.nextSrc >= (byte *)(src) + srcSize); assert(sequences->pos <= sequences->size); assert(sequences->size <= sequences->capacity); for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { byte *chunkStart = istart + chunk * kMaxChunkSize; nuint remaining = (nuint)(iend - chunkStart); byte *chunkEnd = (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; nuint chunkSize = (nuint)(chunkEnd - chunkStart); nuint newLeftoverSize; nuint prevSize = sequences->size; assert(chunkStart < iend); if ((ZSTD_window_needOverflowCorrection(ldmState->window, (void *)chunkEnd)) != 0) { uint ldmHSize = 1U << (int)@params->hashLog; uint correction = ZSTD_window_correctOverflow(&ldmState->window, 0, maxDist, (void *)chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); ldmState->loadedDictEnd = 0; } ZSTD_window_enforceMaxDist(&ldmState->window, (void *)chunkEnd, maxDist, &ldmState->loadedDictEnd, (ZSTD_matchState_t **)null); newLeftoverSize = ZSTD_ldm_generateSequences_internal(ldmState, sequences, @params, (void *)chunkStart, chunkSize); if ((ERR_isError(newLeftoverSize)) != 0) { return(newLeftoverSize); } if (prevSize < sequences->size) { sequences->seq[prevSize].litLength += (uint)(leftoverSize); leftoverSize = newLeftoverSize; } else { assert(newLeftoverSize == chunkSize); leftoverSize += chunkSize; } } return(0); }
private static nuint ZSTD_ldm_generateSequences_internal(ldmState_t *ldmState, rawSeqStore_t *rawSeqStore, ldmParams_t * @params, void *src, nuint srcSize) { int extDict = (int)(ZSTD_window_hasExtDict(ldmState->window)); uint minMatchLength = @params->minMatchLength; uint entsPerBucket = 1U << (int)@params->bucketSizeLog; uint hBits = @params->hashLog - @params->bucketSizeLog; uint dictLimit = ldmState->window.dictLimit; uint lowestIndex = extDict != 0 ? ldmState->window.lowLimit : dictLimit; byte * @base = ldmState->window.@base; byte * dictBase = extDict != 0 ? ldmState->window.dictBase : null; byte * dictStart = extDict != 0 ? dictBase + lowestIndex : null; byte * dictEnd = extDict != 0 ? dictBase + dictLimit : null; byte * lowPrefixPtr = @base + dictLimit; byte * istart = (byte *)(src); byte * iend = istart + srcSize; byte * ilimit = iend - 8; byte * anchor = istart; byte * ip = istart; ldmRollingHashState_t hashState; nuint *splits = (nuint *)ldmState->splitIndices; ldmMatchCandidate_t *candidates = (ldmMatchCandidate_t *)ldmState->matchCandidates; uint numSplits; if (srcSize < minMatchLength) { return((nuint)(iend - anchor)); } ZSTD_ldm_gear_init(&hashState, @params); { nuint n = 0; while (n < minMatchLength) { numSplits = 0; n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, splits, &numSplits); } ip += minMatchLength; } while (ip < ilimit) { nuint hashed; uint n; numSplits = 0; hashed = ZSTD_ldm_gear_feed(&hashState, ip, (nuint)(ilimit - ip), splits, &numSplits); for (n = 0; n < numSplits; n++) { byte *split = ip + splits[n] - minMatchLength; ulong xxhash = XXH64((void *)split, minMatchLength, 0); uint hash = (uint)(xxhash & (((uint)(1) << (int)hBits) - 1)); candidates[n].split = split; candidates[n].hash = hash; candidates[n].checksum = (uint)(xxhash >> 32); candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *@params); Prefetch0((void *)(candidates[n].bucket)); } for (n = 0; n < numSplits; n++) { nuint forwardMatchLength = 0, backwardMatchLength = 0, bestMatchLength = 0, mLength; byte * split = candidates[n].split; uint checksum = candidates[n].checksum; uint hash = candidates[n].hash; ldmEntry_t *bucket = candidates[n].bucket; ldmEntry_t *cur; ldmEntry_t *bestEntry = (ldmEntry_t *)null; ldmEntry_t newEntry; newEntry.offset = (uint)(split - @base); newEntry.checksum = checksum; if (split < anchor) { ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params); continue; } for (cur = bucket; cur < bucket + entsPerBucket; cur++) { nuint curForwardMatchLength, curBackwardMatchLength, curTotalMatchLength; if (cur->checksum != checksum || cur->offset <= lowestIndex) { continue; } if (extDict != 0) { byte *curMatchBase = cur->offset < dictLimit ? dictBase : @base; byte *pMatch = curMatchBase + cur->offset; byte *matchEnd = cur->offset < dictLimit ? dictEnd : iend; byte *lowMatchPtr = cur->offset < dictLimit ? dictStart : lowPrefixPtr; curForwardMatchLength = ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); } else { byte *pMatch = @base + cur->offset; curForwardMatchLength = ZSTD_count(split, pMatch, iend); if (curForwardMatchLength < minMatchLength) { continue; } curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); } curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; if (curTotalMatchLength > bestMatchLength) { bestMatchLength = curTotalMatchLength; forwardMatchLength = curForwardMatchLength; backwardMatchLength = curBackwardMatchLength; bestEntry = cur; } } if (bestEntry == null) { ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params); continue; } mLength = forwardMatchLength + backwardMatchLength; { uint offset = (uint)(split - @base) - bestEntry->offset; rawSeq *seq = rawSeqStore->seq + rawSeqStore->size; if (rawSeqStore->size == rawSeqStore->capacity) { return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall))); } seq->litLength = (uint)(split - backwardMatchLength - anchor); seq->matchLength = (uint)(mLength); seq->offset = offset; rawSeqStore->size++; } ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *@params); anchor = split + forwardMatchLength; } ip += hashed; } return((nuint)(iend - anchor)); }