Example #1
0
        /**
         *  Checks total compressed size of a dictionary
         */
        public static nuint COVER_checkTotalCompressedSize(ZDICT_cover_params_t parameters, nuint *samplesSizes, byte *samples, nuint *offsets, nuint nbTrainSamples, nuint nbSamples, byte *dict, nuint dictBufferCapacity)
        {
            nuint         totalCompressedSize = (unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_GENERIC)));
            ZSTD_CCtx_s * cctx;
            ZSTD_CDict_s *cdict;
            void *        dst;
            nuint         dstCapacity;
            nuint         i;


            {
                nuint maxSampleSize = 0;

                i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
                for (; i < nbSamples; ++i)
                {
                    maxSampleSize = ((samplesSizes[i]) > (maxSampleSize) ? (samplesSizes[i]) : (maxSampleSize));
                }

                dstCapacity = ZSTD_compressBound(maxSampleSize);
                dst         = malloc(dstCapacity);
            }

            cctx  = ZSTD_createCCtx();
            cdict = ZSTD_createCDict((void *)dict, dictBufferCapacity, parameters.zParams.compressionLevel);
            if (dst == null || cctx == null || cdict == null)
            {
                goto _compressCleanup;
            }

            totalCompressedSize = dictBufferCapacity;
            i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
            for (; i < nbSamples; ++i)
            {
                nuint size = ZSTD_compress_usingCDict(cctx, dst, dstCapacity, (void *)(samples + offsets[i]), samplesSizes[i], cdict);

                if ((ERR_isError(size)) != 0)
                {
                    totalCompressedSize = size;
                    goto _compressCleanup;
                }

                totalCompressedSize += size;
            }

_compressCleanup:
            ZSTD_freeCCtx(cctx);
            ZSTD_freeCDict(cdict);
            if (dst != null)
            {
                free(dst);
            }

            return(totalCompressedSize);
        }
Example #2
0
 private static void FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, ZDICT_fastCover_params_t *fastCoverParams, uint f, uint accel)
 {
     fastCoverParams->k          = coverParams.k;
     fastCoverParams->d          = coverParams.d;
     fastCoverParams->steps      = coverParams.steps;
     fastCoverParams->nbThreads  = coverParams.nbThreads;
     fastCoverParams->splitPoint = coverParams.splitPoint;
     fastCoverParams->f          = f;
     fastCoverParams->accel      = accel;
     fastCoverParams->zParams    = coverParams.zParams;
     fastCoverParams->shrinkDict = coverParams.shrinkDict;
 }
Example #3
0
        /**
         * Called when a thread finishes executing, both on error or success.
         * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
         * If this dictionary is the best so far save it and its parameters.
         */
        public static void COVER_best_finish(COVER_best_s *best, ZDICT_cover_params_t parameters, COVER_dictSelection selection)
        {
            void *dict           = (void *)selection.dictContent;
            nuint compressedSize = selection.totalCompressedSize;
            nuint dictSize       = selection.dictSize;

            if (best == null)
            {
                return;
            }


            {
                nuint liveJobs;


                --best->liveJobs;
                liveJobs = best->liveJobs;
                if (compressedSize < best->compressedSize)
                {
                    if (best->dict == null || best->dictSize < dictSize)
                    {
                        if (best->dict != null)
                        {
                            free(best->dict);
                        }

                        best->dict = malloc(dictSize);
                        if (best->dict == null)
                        {
                            best->compressedSize = (unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_GENERIC)));
                            best->dictSize       = 0;


                            return;
                        }
                    }

                    if (dict != null)
                    {
                        memcpy(best->dict, dict, dictSize);
                        best->dictSize       = dictSize;
                        best->parameters     = parameters;
                        best->compressedSize = compressedSize;
                    }
                }

                if (liveJobs == 0)
                {
                }
            }
        }
Example #4
0
        private static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, nuint maxDictSize, uint f, uint accel)
        {
            if (parameters.d == 0 || parameters.k == 0)
            {
                return(0);
            }

            if (parameters.d != 6 && parameters.d != 8)
            {
                return(0);
            }

            if (parameters.k > maxDictSize)
            {
                return(0);
            }

            if (parameters.d > parameters.k)
            {
                return(0);
            }

            if (f > 31 || f == 0)
            {
                return(0);
            }

            if (parameters.splitPoint <= 0 || parameters.splitPoint > 1)
            {
                return(0);
            }

            if (accel > 10 || accel == 0)
            {
                return(0);
            }

            return(1);
        }
Example #5
0
        /**
         * Tries a set of parameters and updates the COVER_best_t with the results.
         * This function is thread safe if zstd is compiled with multithreaded support.
         * It takes its parameters as an *OWNING* opaque pointer to support threading.
         */
        private static void FASTCOVER_tryParameters(void *opaque)
        {
            FASTCOVER_tryParameters_data_s *data    = (FASTCOVER_tryParameters_data_s *)(opaque);
            FASTCOVER_ctx_t *    ctx                = data->ctx;
            ZDICT_cover_params_t parameters         = data->parameters;
            nuint               dictBufferCapacity  = data->dictBufferCapacity;
            nuint               totalCompressedSize = (unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_GENERIC)));
            ushort *            segmentFreqs        = (ushort *)(calloc((nuint)((ulong)(1) << (int)ctx->f), (nuint)(2)));
            byte *              dict                = (byte *)(malloc(dictBufferCapacity));
            COVER_dictSelection selection           = COVER_dictSelectionError((unchecked ((nuint)(-(int)ZSTD_ErrorCode.ZSTD_error_GENERIC))));
            uint *              freqs               = (uint *)(malloc((nuint)(((ulong)(1) << (int)ctx->f) * (nuint)(4))));

            if (segmentFreqs == null || dict == null || freqs == null)
            {
                goto _cleanup;
            }

            memcpy((void *)freqs, (void *)ctx->freqs, (nuint)(((ulong)(1) << (int)ctx->f) * (nuint)(sizeof(uint))));

            {
                nuint tail = FASTCOVER_buildDictionary(ctx, freqs, (void *)dict, dictBufferCapacity, parameters, segmentFreqs);
                uint  nbFinalizeSamples = (uint)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);

                selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, totalCompressedSize);
                if ((COVER_dictSelectionIsError(selection)) != 0)
                {
                    goto _cleanup;
                }
            }

_cleanup:
            free((void *)dict);
            COVER_best_finish(data->best, parameters, selection);
            free((void *)data);
            free((void *)segmentFreqs);
            COVER_dictSelectionFree(selection);
            free((void *)freqs);
        }
Example #6
0
        /**
         * Called to finalize the dictionary and select one based on whether or not
         * the shrink-dict flag was enabled. If enabled the dictionary used is the
         * smallest dictionary within a specified regression of the compressed size
         * from the largest dictionary.
         */
        public static COVER_dictSelection COVER_selectDict(byte *customDictContent, nuint dictBufferCapacity, nuint dictContentSize, byte *samplesBuffer, nuint *samplesSizes, uint nbFinalizeSamples, nuint nbCheckSamples, nuint nbSamples, ZDICT_cover_params_t @params, nuint *offsets, nuint totalCompressedSize)
        {
            nuint  largestDict          = 0;
            nuint  largestCompressed    = 0;
            byte * customDictContentEnd = customDictContent + dictContentSize;
            byte * largestDictbuffer    = (byte *)(malloc(dictBufferCapacity));
            byte * candidateDictBuffer  = (byte *)(malloc(dictBufferCapacity));
            double regressionTolerance  = ((double)(@params.shrinkDictMaxRegression) / 100.0) + 1.00;

            if (largestDictbuffer == null || candidateDictBuffer == null)
            {
                free((void *)largestDictbuffer);
                free((void *)candidateDictBuffer);
                return(COVER_dictSelectionError(dictContentSize));
            }

            memcpy((void *)largestDictbuffer, (void *)customDictContent, dictContentSize);
            dictContentSize = ZDICT_finalizeDictionary((void *)largestDictbuffer, dictBufferCapacity, (void *)customDictContent, dictContentSize, (void *)samplesBuffer, samplesSizes, nbFinalizeSamples, @params.zParams);
            if ((ZDICT_isError(dictContentSize)) != 0)
            {
                free((void *)largestDictbuffer);
                free((void *)candidateDictBuffer);
                return(COVER_dictSelectionError(dictContentSize));
            }

            totalCompressedSize = COVER_checkTotalCompressedSize(@params, samplesSizes, samplesBuffer, offsets, nbCheckSamples, nbSamples, largestDictbuffer, dictContentSize);
            if ((ERR_isError(totalCompressedSize)) != 0)
            {
                free((void *)largestDictbuffer);
                free((void *)candidateDictBuffer);
                return(COVER_dictSelectionError(totalCompressedSize));
            }

            if (@params.shrinkDict == 0)
            {
                COVER_dictSelection selection = new COVER_dictSelection
                {
                    dictContent         = largestDictbuffer,
                    dictSize            = dictContentSize,
                    totalCompressedSize = totalCompressedSize,
                };

                free((void *)candidateDictBuffer);
                return(selection);
            }

            largestDict       = dictContentSize;
            largestCompressed = totalCompressedSize;
            dictContentSize   = 256;
            while (dictContentSize < largestDict)
            {
                memcpy((void *)candidateDictBuffer, (void *)largestDictbuffer, largestDict);
                dictContentSize = ZDICT_finalizeDictionary((void *)candidateDictBuffer, dictBufferCapacity, (void *)(customDictContentEnd - dictContentSize), dictContentSize, (void *)samplesBuffer, samplesSizes, nbFinalizeSamples, @params.zParams);
                if ((ZDICT_isError(dictContentSize)) != 0)
                {
                    free((void *)largestDictbuffer);
                    free((void *)candidateDictBuffer);
                    return(COVER_dictSelectionError(dictContentSize));
                }

                totalCompressedSize = COVER_checkTotalCompressedSize(@params, samplesSizes, samplesBuffer, offsets, nbCheckSamples, nbSamples, candidateDictBuffer, dictContentSize);
                if ((ERR_isError(totalCompressedSize)) != 0)
                {
                    free((void *)largestDictbuffer);
                    free((void *)candidateDictBuffer);
                    return(COVER_dictSelectionError(totalCompressedSize));
                }

                if (totalCompressedSize <= largestCompressed * regressionTolerance)
                {
                    COVER_dictSelection selection = new COVER_dictSelection
                    {
                        dictContent         = candidateDictBuffer,
                        dictSize            = dictContentSize,
                        totalCompressedSize = totalCompressedSize,
                    };

                    free((void *)largestDictbuffer);
                    return(selection);
                }

                dictContentSize *= 2;
            }

            dictContentSize     = largestDict;
            totalCompressedSize = largestCompressed;

            {
                COVER_dictSelection selection = new COVER_dictSelection
                {
                    dictContent         = largestDictbuffer,
                    dictSize            = dictContentSize,
                    totalCompressedSize = totalCompressedSize,
                };

                free((void *)candidateDictBuffer);
                return(selection);
            }
        }
Example #7
0
        /*-*************************************
        *  Helper functions
        ***************************************/
        /**
         * Selects the best segment in an epoch.
         * Segments of are scored according to the function:
         *
         * Let F(d) be the frequency of all dmers with hash value d.
         * Let S_i be hash value of the dmer at position i of segment S which has length k.
         *
         *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
         *
         * Once the dmer with hash value d is in the dictionary we set F(d) = 0.
         */
        private static COVER_segment_t FASTCOVER_selectSegment(FASTCOVER_ctx_t *ctx, uint *freqs, uint begin, uint end, ZDICT_cover_params_t parameters, ushort *segmentFreqs)
        {
            uint            k           = parameters.k;
            uint            d           = parameters.d;
            uint            f           = ctx->f;
            uint            dmersInK    = k - d + 1;
            COVER_segment_t bestSegment = new COVER_segment_t
            {
                begin = 0,
                end   = 0,
                score = 0,
            };
            COVER_segment_t activeSegment;

            activeSegment.begin = begin;
            activeSegment.end   = begin;
            activeSegment.score = 0;
            while (activeSegment.end < end)
            {
                nuint idx = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + activeSegment.end), f, d);

                if (segmentFreqs[idx] == 0)
                {
                    activeSegment.score += freqs[idx];
                }

                activeSegment.end += 1;
                segmentFreqs[idx] += 1;
                if (activeSegment.end - activeSegment.begin == dmersInK + 1)
                {
                    nuint delIndex = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + activeSegment.begin), f, d);

                    segmentFreqs[delIndex] -= 1;
                    if (segmentFreqs[delIndex] == 0)
                    {
                        activeSegment.score -= freqs[delIndex];
                    }

                    activeSegment.begin += 1;
                }

                if (activeSegment.score > bestSegment.score)
                {
                    bestSegment = activeSegment;
                }
            }

            while (activeSegment.begin < end)
            {
                nuint delIndex = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + activeSegment.begin), f, d);

                segmentFreqs[delIndex] -= 1;
                activeSegment.begin    += 1;
            }


            {
                uint pos;

                for (pos = bestSegment.begin; pos != bestSegment.end; ++pos)
                {
                    nuint i = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + pos), f, d);

                    freqs[i] = 0;
                }
            }

            return(bestSegment);
        }
Example #8
0
        /**
         * Given the prepared context build the dictionary.
         */
        private static nuint FASTCOVER_buildDictionary(FASTCOVER_ctx_t *ctx, uint *freqs, void *dictBuffer, nuint dictBufferCapacity, ZDICT_cover_params_t parameters, ushort *segmentFreqs)
        {
            byte *dict = (byte *)(dictBuffer);
            nuint tail = dictBufferCapacity;
            COVER_epoch_info_t epochs = COVER_computeEpochs((uint)(dictBufferCapacity), (uint)(ctx->nbDmers), parameters.k, 1);
            nuint maxZeroScoreRun     = 10;
            nuint zeroScoreRun        = 0;
            nuint epoch;

            for (epoch = 0; tail > 0; epoch = (nuint)((epoch + 1) % epochs.num))
            {
                uint            epochBegin = (uint)(epoch * epochs.size);
                uint            epochEnd   = epochBegin + epochs.size;
                nuint           segmentSize;
                COVER_segment_t segment = FASTCOVER_selectSegment(ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);

                if (segment.score == 0)
                {
                    if (++zeroScoreRun >= maxZeroScoreRun)
                    {
                        break;
                    }

                    continue;
                }

                zeroScoreRun = 0;
                segmentSize  = ((segment.end - segment.begin + parameters.d - 1) < (tail) ? (segment.end - segment.begin + parameters.d - 1) : (tail));
                if (segmentSize < parameters.d)
                {
                    break;
                }

                tail -= segmentSize;
                memcpy((void *)(dict + tail), (void *)(ctx->samples + segment.begin), segmentSize);
            }

            return(tail);
        }