Exemplo n.º 1
0
        /**
         * Given the prepared context build the dictionary.
         */
        private static nuint FASTCOVER_buildDictionary(FASTCOVER_ctx_t *ctx, uint *freqs, void *dictBuffer, nuint dictBufferCapacity, ZDICT_cover_params_t parameters, ushort *segmentFreqs)
        {
            byte *dict = (byte *)(dictBuffer);
            nuint tail = dictBufferCapacity;
            COVER_epoch_info_t epochs = COVER_computeEpochs((uint)(dictBufferCapacity), (uint)(ctx->nbDmers), parameters.k, 1);
            nuint maxZeroScoreRun     = 10;
            nuint zeroScoreRun        = 0;
            nuint epoch;

            for (epoch = 0; tail > 0; epoch = (nuint)((epoch + 1) % epochs.num))
            {
                uint            epochBegin = (uint)(epoch * epochs.size);
                uint            epochEnd   = epochBegin + epochs.size;
                nuint           segmentSize;
                COVER_segment_t segment = FASTCOVER_selectSegment(ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);

                if (segment.score == 0)
                {
                    if (++zeroScoreRun >= maxZeroScoreRun)
                    {
                        break;
                    }

                    continue;
                }

                zeroScoreRun = 0;
                segmentSize  = ((segment.end - segment.begin + parameters.d - 1) < (tail) ? (segment.end - segment.begin + parameters.d - 1) : (tail));
                if (segmentSize < parameters.d)
                {
                    break;
                }

                tail -= segmentSize;
                memcpy((void *)(dict + tail), (void *)(ctx->samples + segment.begin), segmentSize);
            }

            return(tail);
        }
Exemplo n.º 2
0
        /*-*************************************
        *  Helper functions
        ***************************************/
        /**
         * Selects the best segment in an epoch.
         * Segments of are scored according to the function:
         *
         * Let F(d) be the frequency of all dmers with hash value d.
         * Let S_i be hash value of the dmer at position i of segment S which has length k.
         *
         *     Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
         *
         * Once the dmer with hash value d is in the dictionary we set F(d) = 0.
         */
        private static COVER_segment_t FASTCOVER_selectSegment(FASTCOVER_ctx_t *ctx, uint *freqs, uint begin, uint end, ZDICT_cover_params_t parameters, ushort *segmentFreqs)
        {
            uint            k           = parameters.k;
            uint            d           = parameters.d;
            uint            f           = ctx->f;
            uint            dmersInK    = k - d + 1;
            COVER_segment_t bestSegment = new COVER_segment_t
            {
                begin = 0,
                end   = 0,
                score = 0,
            };
            COVER_segment_t activeSegment;

            activeSegment.begin = begin;
            activeSegment.end   = begin;
            activeSegment.score = 0;
            while (activeSegment.end < end)
            {
                nuint idx = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + activeSegment.end), f, d);

                if (segmentFreqs[idx] == 0)
                {
                    activeSegment.score += freqs[idx];
                }

                activeSegment.end += 1;
                segmentFreqs[idx] += 1;
                if (activeSegment.end - activeSegment.begin == dmersInK + 1)
                {
                    nuint delIndex = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + activeSegment.begin), f, d);

                    segmentFreqs[delIndex] -= 1;
                    if (segmentFreqs[delIndex] == 0)
                    {
                        activeSegment.score -= freqs[delIndex];
                    }

                    activeSegment.begin += 1;
                }

                if (activeSegment.score > bestSegment.score)
                {
                    bestSegment = activeSegment;
                }
            }

            while (activeSegment.begin < end)
            {
                nuint delIndex = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + activeSegment.begin), f, d);

                segmentFreqs[delIndex] -= 1;
                activeSegment.begin    += 1;
            }


            {
                uint pos;

                for (pos = bestSegment.begin; pos != bestSegment.end; ++pos)
                {
                    nuint i = FASTCOVER_hashPtrToIndex((void *)(ctx->samples + pos), f, d);

                    freqs[i] = 0;
                }
            }

            return(bestSegment);
        }