private static void FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, ZDICT_cover_params_t *coverParams) { coverParams->k = fastCoverParams.k; coverParams->d = fastCoverParams.d; coverParams->steps = fastCoverParams.steps; coverParams->nbThreads = fastCoverParams.nbThreads; coverParams->splitPoint = fastCoverParams.splitPoint; coverParams->zParams = fastCoverParams.zParams; coverParams->shrinkDict = fastCoverParams.shrinkDict; }
/*! ZDICT_trainFromBuffer_fastCover(): * Train a dictionary from an array of samples using a modified version of COVER algorithm. * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. * d and k are required. * All other parameters are optional, will use default values if not provided * The resulting dictionary will be saved into `dictBuffer`. * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) * or an error code, which can be tested with ZDICT_isError(). * See ZDICT_trainFromBuffer() for details on failure modes. * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. * In general, it's recommended to provide a few thousands samples, though this can vary a lot. * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. */ public static nuint ZDICT_trainFromBuffer_fastCover(void *dictBuffer, nuint dictBufferCapacity, void *samplesBuffer, nuint *samplesSizes, uint nbSamples, ZDICT_fastCover_params_t parameters) { byte * dict = (byte *)(dictBuffer); FASTCOVER_ctx_t ctx; ZDICT_cover_params_t coverParams; FASTCOVER_accel_t accelParams; g_displayLevel = (int)parameters.zParams.notificationLevel; parameters.splitPoint = 1.0; parameters.f = (uint)(parameters.f == 0 ? 20 : parameters.f); parameters.accel = (uint)(parameters.accel == 0 ? 1 : parameters.accel); memset((void *)&coverParams, 0, (nuint)(sizeof(ZDICT_cover_params_t))); FASTCOVER_convertToCoverParams(parameters, &coverParams); if ((FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, parameters.accel)) == 0) { return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_parameter_outOfBound))); } if (nbSamples == 0) { return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_srcSize_wrong))); } if (dictBufferCapacity < 256) { return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall))); } accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; { nuint initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, coverParams.d, parameters.splitPoint, parameters.f, accelParams); if ((ERR_isError(initVal)) != 0) { return(initVal); } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); { ushort *segmentFreqs = (ushort *)(calloc((nuint)((ulong)(1) << (int)parameters.f), (nuint)(2))); nuint tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, dictBufferCapacity, coverParams, segmentFreqs); uint nbFinalizeSamples = (uint)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); nuint dictionarySize = ZDICT_finalizeDictionary((void *)dict, dictBufferCapacity, (void *)(dict + tail), dictBufferCapacity - tail, samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); if ((ERR_isError(dictionarySize)) == 0) { ; } FASTCOVER_ctx_destroy(&ctx); free((void *)segmentFreqs); return(dictionarySize); } }