/** * Tries a set of parameters and updates the COVER_best_t with the results. * This function is thread safe if zstd is compiled with multithreaded support. * It takes its parameters as an *OWNING* opaque pointer to support threading. */ private static void FASTCOVER_tryParameters(void *opaque) { FASTCOVER_tryParameters_data_s *data = (FASTCOVER_tryParameters_data_s *)(opaque); FASTCOVER_ctx_t * ctx = data->ctx; ZDICT_cover_params_t parameters = data->parameters; nuint dictBufferCapacity = data->dictBufferCapacity; nuint totalCompressedSize = (unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_GENERIC))); ushort * segmentFreqs = (ushort *)(calloc((nuint)((ulong)(1) << (int)ctx->f), (nuint)(2))); byte * dict = (byte *)(malloc(dictBufferCapacity)); COVER_dictSelection selection = COVER_dictSelectionError((unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_GENERIC)))); uint * freqs = (uint *)(malloc((nuint)(((ulong)(1) << (int)ctx->f) * (nuint)(4)))); if (segmentFreqs == null || dict == null || freqs == null) { goto _cleanup; } memcpy((void *)freqs, (void *)ctx->freqs, (nuint)(((ulong)(1) << (int)ctx->f) * (nuint)(sizeof(uint)))); { nuint tail = FASTCOVER_buildDictionary(ctx, freqs, (void *)dict, dictBufferCapacity, parameters, segmentFreqs); uint nbFinalizeSamples = (uint)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, totalCompressedSize); if ((COVER_dictSelectionIsError(selection)) != 0) { goto _cleanup; } } _cleanup: free((void *)dict); COVER_best_finish(data->best, parameters, selection); free((void *)data); free((void *)segmentFreqs); COVER_dictSelectionFree(selection); free((void *)freqs); }
/*! ZDICT_optimizeTrainFromBuffer_fastCover(): * The same requirements as above hold for all the parameters except `parameters`. * This function tries many parameter combinations (specifically, k and d combinations) * and picks the best parameters. `*parameters` is filled with the best parameters found, * dictionary constructed with those parameters is stored in `dictBuffer`. * All of the parameters d, k, steps, f, and accel are optional. * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. * if steps is zero it defaults to its default value. * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. * If f is zero, default value of 20 is used. * If accel is zero, default value of 1 is used. * * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). * On success `*parameters` contains the parameters selected. * See ZDICT_trainFromBuffer() for details on failure modes. * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. */ public static nuint ZDICT_optimizeTrainFromBuffer_fastCover(void *dictBuffer, nuint dictBufferCapacity, void *samplesBuffer, nuint *samplesSizes, uint nbSamples, ZDICT_fastCover_params_t *parameters) { ZDICT_cover_params_t coverParams; FASTCOVER_accel_t accelParams; uint nbThreads = parameters->nbThreads; double splitPoint = parameters->splitPoint <= 0.0 ? 0.75 : parameters->splitPoint; uint kMinD = (uint)(parameters->d == 0 ? 6 : parameters->d); uint kMaxD = (uint)(parameters->d == 0 ? 8 : parameters->d); uint kMinK = (uint)(parameters->k == 0 ? 50 : parameters->k); uint kMaxK = (uint)(parameters->k == 0 ? 2000 : parameters->k); uint kSteps = (uint)(parameters->steps == 0 ? 40 : parameters->steps); uint kStepSize = (((kMaxK - kMinK) / kSteps) > (1) ? ((kMaxK - kMinK) / kSteps) : (1)); uint kIterations = (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); uint f = (uint)(parameters->f == 0 ? 20 : parameters->f); uint accel = (uint)(parameters->accel == 0 ? 1 : parameters->accel); uint shrinkDict = 0; int displayLevel = (int)parameters->zParams.notificationLevel; uint iteration = 1; uint d; uint k; COVER_best_s best; int warned = 0; if (splitPoint <= 0 || splitPoint > 1) { if (displayLevel >= 1) { ; } return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_parameter_outOfBound))); } if (accel == 0 || accel > 10) { if (displayLevel >= 1) { ; } return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_parameter_outOfBound))); } if (kMinK < kMaxD || kMaxK < kMinK) { if (displayLevel >= 1) { ; } return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_parameter_outOfBound))); } if (nbSamples == 0) { if (displayLevel >= 1) { ; } return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_srcSize_wrong))); } if (dictBufferCapacity < 256) { if (displayLevel >= 1) { ; } return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall))); } if (nbThreads > 1) { throw new NotImplementedException("Multiple threads are not supported"); } COVER_best_init(&best); memset((void *)&coverParams, 0, (nuint)(sizeof(ZDICT_cover_params_t))); FASTCOVER_convertToCoverParams(*parameters, &coverParams); accelParams = FASTCOVER_defaultAccelParameters[accel]; g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; if (displayLevel >= 2) { ; } for (d = kMinD; d <= kMaxD; d += 2) { FASTCOVER_ctx_t ctx; if (displayLevel >= 3) { ; } { nuint initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); if ((ERR_isError(initVal)) != 0) { if (displayLevel >= 1) { ; } COVER_best_destroy(&best); return(initVal); } } if (warned == 0) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); warned = 1; } for (k = kMinK; k <= kMaxK; k += kStepSize) { FASTCOVER_tryParameters_data_s *data = (FASTCOVER_tryParameters_data_s *)(malloc((nuint)(sizeof(FASTCOVER_tryParameters_data_s)))); if (displayLevel >= 3) { ; } if (data == null) { if (displayLevel >= 1) { ; } COVER_best_destroy(&best); FASTCOVER_ctx_destroy(&ctx); return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_memory_allocation))); } data->ctx = &ctx; data->best = &best; data->dictBufferCapacity = dictBufferCapacity; data->parameters = coverParams; data->parameters.k = k; data->parameters.d = d; data->parameters.splitPoint = splitPoint; data->parameters.steps = kSteps; data->parameters.shrinkDict = shrinkDict; data->parameters.zParams.notificationLevel = (uint)g_displayLevel; if ((FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, data->ctx->f, accel)) == 0) { free((void *)data); continue; } COVER_best_start(&best); FASTCOVER_tryParameters((void *)data); ++iteration; } COVER_best_wait(&best); FASTCOVER_ctx_destroy(&ctx); } if (displayLevel >= 2) { ; } { nuint dictSize = best.dictSize; if ((ERR_isError(best.compressedSize)) != 0) { nuint compressedSize = best.compressedSize; COVER_best_destroy(&best); return(compressedSize); } FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); memcpy(dictBuffer, best.dict, dictSize); COVER_best_destroy(&best); return(dictSize); } }