예제 #1
0
 private static void FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, ZDICT_cover_params_t *coverParams)
 {
     coverParams->k          = fastCoverParams.k;
     coverParams->d          = fastCoverParams.d;
     coverParams->steps      = fastCoverParams.steps;
     coverParams->nbThreads  = fastCoverParams.nbThreads;
     coverParams->splitPoint = fastCoverParams.splitPoint;
     coverParams->zParams    = fastCoverParams.zParams;
     coverParams->shrinkDict = fastCoverParams.shrinkDict;
 }
예제 #2
0
        /*! ZDICT_trainFromBuffer_fastCover():
         *  Train a dictionary from an array of samples using a modified version of COVER algorithm.
         *  Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
         *  supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
         *  d and k are required.
         *  All other parameters are optional, will use default values if not provided
         *  The resulting dictionary will be saved into `dictBuffer`.
         * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
         *          or an error code, which can be tested with ZDICT_isError().
         *          See ZDICT_trainFromBuffer() for details on failure modes.
         *  Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory.
         *  Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
         *        It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
         *        In general, it's recommended to provide a few thousands samples, though this can vary a lot.
         *        It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
         */
        public static nuint ZDICT_trainFromBuffer_fastCover(void *dictBuffer, nuint dictBufferCapacity, void *samplesBuffer, nuint *samplesSizes, uint nbSamples, ZDICT_fastCover_params_t parameters)
        {
            byte *               dict = (byte *)(dictBuffer);
            FASTCOVER_ctx_t      ctx;
            ZDICT_cover_params_t coverParams;
            FASTCOVER_accel_t    accelParams;

            g_displayLevel        = (int)parameters.zParams.notificationLevel;
            parameters.splitPoint = 1.0;
            parameters.f          = (uint)(parameters.f == 0 ? 20 : parameters.f);
            parameters.accel      = (uint)(parameters.accel == 0 ? 1 : parameters.accel);
            memset((void *)&coverParams, 0, (nuint)(sizeof(ZDICT_cover_params_t)));
            FASTCOVER_convertToCoverParams(parameters, &coverParams);
            if ((FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, parameters.accel)) == 0)
            {
                return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_parameter_outOfBound)));
            }

            if (nbSamples == 0)
            {
                return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_srcSize_wrong)));
            }

            if (dictBufferCapacity < 256)
            {
                return(unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_dstSize_tooSmall)));
            }

            accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];

            {
                nuint initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, coverParams.d, parameters.splitPoint, parameters.f, accelParams);

                if ((ERR_isError(initVal)) != 0)
                {
                    return(initVal);
                }
            }

            COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);

            {
                ushort *segmentFreqs      = (ushort *)(calloc((nuint)((ulong)(1) << (int)parameters.f), (nuint)(2)));
                nuint   tail              = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, dictBufferCapacity, coverParams, segmentFreqs);
                uint    nbFinalizeSamples = (uint)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);
                nuint   dictionarySize    = ZDICT_finalizeDictionary((void *)dict, dictBufferCapacity, (void *)(dict + tail), dictBufferCapacity - tail, samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);

                if ((ERR_isError(dictionarySize)) == 0)
                {
                    ;
                }

                FASTCOVER_ctx_destroy(&ctx);
                free((void *)segmentFreqs);
                return(dictionarySize);
            }
        }