Exemplo n.º 1
0
            /* Find the best 'out_' histogram for each of the 'in' histograms.
             * When called, clusters[0..num_clusters) contains the unique values from
             * symbols[0..in_size), but this property is not preserved in this function.
             * Note: we assume that out_[]->bit_cost_ is already up-to-date. */
            public static void BrotliHistogramRemap(HistogramDistance *in_,
                                                    size_t in_size, uint *clusters, size_t num_clusters,
                                                    HistogramDistance *out_, uint *symbols)
            {
                size_t i;

                for (i = 0; i < in_size; ++i)
                {
                    uint   best_out  = i == 0 ? symbols[0] : symbols[i - 1];
                    double best_bits =
                        BrotliHistogramBitCostDistance(&in_[i], &out_[best_out]);
                    size_t j;
                    for (j = 0; j < num_clusters; ++j)
                    {
                        double cur_bits =
                            BrotliHistogramBitCostDistance(&in_[i], &out_[clusters[j]]);
                        if (cur_bits < best_bits)
                        {
                            best_bits = cur_bits;
                            best_out  = clusters[j];
                        }
                    }
                    symbols[i] = best_out;
                }

                /* Recompute each out_ based on raw and symbols. */
                for (i = 0; i < num_clusters; ++i)
                {
                    HistogramDistance.HistogramClear(&out_[clusters[i]]);
                }
                for (i = 0; i < in_size; ++i)
                {
                    HistogramDistance.HistogramAddHistogram(&out_[symbols[i]], &in_[i]);
                }
            }
Exemplo n.º 2
0
 /* What is the bit cost of moving histogram from cur_symbol to candidate. */
 public static double BrotliHistogramBitCostDistance(
     HistogramDistance *histogram, HistogramDistance *candidate)
 {
     if (histogram->total_count_ == 0)
     {
         return(0.0);
     }
     else
     {
         HistogramDistance tmp = *histogram;
         HistogramDistance.HistogramAddHistogram(&tmp, candidate);
         return(BitCostDistance.BrotliPopulationCost(&tmp) - candidate->bit_cost_);
     }
 }
Exemplo n.º 3
0
            static void BrotliCompareAndPushToQueue(
                HistogramDistance *out_, uint *cluster_size, uint idx1,
                uint idx2, size_t max_num_pairs, HistogramPair *pairs,
                size_t *num_pairs)
            {
                bool          is_good_pair = false;
                HistogramPair p            = new HistogramPair();

                if (idx1 == idx2)
                {
                    return;
                }
                if (idx2 < idx1)
                {
                    uint t = idx2;
                    idx2 = idx1;
                    idx1 = t;
                }
                p.idx1       = idx1;
                p.idx2       = idx2;
                p.cost_diff  = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]);
                p.cost_diff -= out_[idx1].bit_cost_;
                p.cost_diff -= out_[idx2].bit_cost_;

                if (out_[idx1].total_count_ == 0)
                {
                    p.cost_combo = out_[idx2].bit_cost_;
                    is_good_pair = true;
                }
                else if (out_[idx2].total_count_ == 0)
                {
                    p.cost_combo = out_[idx1].bit_cost_;
                    is_good_pair = true;
                }
                else
                {
                    double            threshold = *num_pairs == 0 ? 1e99 : Math.Max(0.0, pairs[0].cost_diff);
                    HistogramDistance combo     = out_[idx1];
                    double            cost_combo;
                    HistogramDistance.HistogramAddHistogram(&combo, &out_[idx2]);
                    cost_combo = BitCostDistance.BrotliPopulationCost(&combo);
                    if (cost_combo < threshold - p.cost_diff)
                    {
                        p.cost_combo = cost_combo;
                        is_good_pair = true;
                    }
                }
                if (is_good_pair)
                {
                    p.cost_diff += p.cost_combo;
                    if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p))
                    {
                        /* Replace the top of the queue if needed. */
                        if (*num_pairs < max_num_pairs)
                        {
                            pairs[*num_pairs] = pairs[0];
                            ++(*num_pairs);
                        }
                        pairs[0] = p;
                    }
                    else if (*num_pairs < max_num_pairs)
                    {
                        pairs[*num_pairs] = p;
                        ++(*num_pairs);
                    }
                }
            }
Exemplo n.º 4
0
            public static size_t BrotliHistogramCombine(HistogramDistance *out_,
                                                        uint *cluster_size,
                                                        uint *symbols,
                                                        uint *clusters,
                                                        HistogramPair *pairs,
                                                        size_t num_clusters,
                                                        size_t symbols_size,
                                                        size_t max_clusters,
                                                        size_t max_num_pairs)
            {
                double cost_diff_threshold = 0.0;
                size_t min_cluster_size    = 1;
                size_t num_pairs           = 0;

                {
                    /* We maintain a vector of histogram pairs, with the property that the pair
                     * with the maximum bit cost reduction is the first. */
                    size_t idx1;
                    for (idx1 = 0; idx1 < num_clusters; ++idx1)
                    {
                        size_t idx2;
                        for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2)
                        {
                            BrotliCompareAndPushToQueue(out_, cluster_size, clusters[idx1],
                                                        clusters[idx2], max_num_pairs, &pairs[0], &num_pairs);
                        }
                    }
                }

                while (num_clusters > min_cluster_size)
                {
                    uint   best_idx1;
                    uint   best_idx2;
                    size_t i;
                    if (pairs[0].cost_diff >= cost_diff_threshold)
                    {
                        cost_diff_threshold = 1e99;
                        min_cluster_size    = max_clusters;
                        continue;
                    }
                    /* Take the best pair from the top of heap. */
                    best_idx1 = pairs[0].idx1;
                    best_idx2 = pairs[0].idx2;
                    HistogramDistance.HistogramAddHistogram(&out_[best_idx1], &out_[best_idx2]);
                    out_[best_idx1].bit_cost_ = pairs[0].cost_combo;
                    cluster_size[best_idx1]  += cluster_size[best_idx2];
                    for (i = 0; i < symbols_size; ++i)
                    {
                        if (symbols[i] == best_idx2)
                        {
                            symbols[i] = best_idx1;
                        }
                    }
                    for (i = 0; i < num_clusters; ++i)
                    {
                        if (clusters[i] == best_idx2)
                        {
                            memmove(&clusters[i], &clusters[i + 1],
                                    (num_clusters - i - 1) * sizeof(uint));
                            break;
                        }
                    }
                    --num_clusters;
                    {
                        /* Remove pairs intersecting the just combined best pair. */
                        size_t copy_to_idx = 0;
                        for (i = 0; i < num_pairs; ++i)
                        {
                            HistogramPair *p = &pairs[i];
                            if (p->idx1 == best_idx1 || p->idx2 == best_idx1 ||
                                p->idx1 == best_idx2 || p->idx2 == best_idx2)
                            {
                                /* Remove invalid pair from the queue. */
                                continue;
                            }
                            if (HistogramPairIsLess(&pairs[0], p))
                            {
                                /* Replace the top of the queue if needed. */
                                HistogramPair front = pairs[0];
                                pairs[0]           = *p;
                                pairs[copy_to_idx] = front;
                            }
                            else
                            {
                                pairs[copy_to_idx] = *p;
                            }
                            ++copy_to_idx;
                        }
                        num_pairs = copy_to_idx;
                    }

                    /* Push new pairs formed with the combined histogram to the heap. */
                    for (i = 0; i < num_clusters; ++i)
                    {
                        BrotliCompareAndPushToQueue(out_, cluster_size, best_idx1, clusters[i],
                                                    max_num_pairs, &pairs[0], &num_pairs);
                    }
                }
                return(num_clusters);
            }
Exemplo n.º 5
0
            /* Does either of three things:
             * (1) emits the current block with a new block type;
             * (2) emits the current block with the type of the second last block;
             * (3) merges the current block with the last block. */
            public static unsafe void BlockSplitterFinishBlock(
                BlockSplitterDistance *self, bool is_final)
            {
                BlockSplit *       split        = self->split_;
                double *           last_entropy = self->last_entropy_;
                HistogramDistance *histograms   = self->histograms_;

                self->block_size_ =
                    Math.Max(self->block_size_, self->min_block_size_);
                if (self->num_blocks_ == 0)
                {
                    /* Create first block. */
                    split->lengths[0] = (uint)self->block_size_;
                    split->types[0]   = 0;
                    last_entropy[0]   =
                        BitsEntropy(histograms[0].data_, self->alphabet_size_);
                    last_entropy[1] = last_entropy[0];
                    ++self->num_blocks_;
                    ++split->num_types;
                    ++self->curr_histogram_ix_;
                    if (self->curr_histogram_ix_ < *self->histograms_size_)
                    {
                        HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]);
                    }
                    self->block_size_ = 0;
                }
                else if (self->block_size_ > 0)
                {
                    double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_,
                                                 self->alphabet_size_);
                    HistogramDistance *combined_histo   = stackalloc HistogramDistance[2];
                    double *           combined_entropy = stackalloc double[2];
                    double *           diff             = stackalloc double[2];
                    size_t             j;
                    for (j = 0; j < 2; ++j)
                    {
                        size_t last_histogram_ix = j == 0 ? self->last_histogram_ix_0 : self->last_histogram_ix_1;
                        combined_histo[j] = histograms[self->curr_histogram_ix_];
                        HistogramDistance.HistogramAddHistogram(&combined_histo[j],
                                                                &histograms[last_histogram_ix]);
                        combined_entropy[j] = BitsEntropy(
                            &combined_histo[j].data_[0], self->alphabet_size_);
                        diff[j] = combined_entropy[j] - entropy - last_entropy[j];
                    }

                    if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES &&
                        diff[0] > self->split_threshold_ &&
                        diff[1] > self->split_threshold_)
                    {
                        /* Create new block. */
                        split->lengths[self->num_blocks_] = (uint)self->block_size_;
                        split->types[self->num_blocks_]   = (byte)split->num_types;
                        self->last_histogram_ix_1         = self->last_histogram_ix_0;
                        self->last_histogram_ix_0         = (byte)split->num_types;
                        last_entropy[1] = last_entropy[0];
                        last_entropy[0] = entropy;
                        ++self->num_blocks_;
                        ++split->num_types;
                        ++self->curr_histogram_ix_;
                        if (self->curr_histogram_ix_ < *self->histograms_size_)
                        {
                            HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]);
                        }
                        self->block_size_        = 0;
                        self->merge_last_count_  = 0;
                        self->target_block_size_ = self->min_block_size_;
                    }
                    else if (diff[1] < diff[0] - 20.0)
                    {
                        /* Combine this block with second last block. */
                        split->lengths[self->num_blocks_] = (uint)self->block_size_;
                        split->types[self->num_blocks_]   = split->types[self->num_blocks_ - 2];
                        size_t tmp = self->last_histogram_ix_0;
                        self->last_histogram_ix_0             = self->last_histogram_ix_1;
                        self->last_histogram_ix_1             = tmp;
                        histograms[self->last_histogram_ix_0] = combined_histo[1];
                        last_entropy[1] = last_entropy[0];
                        last_entropy[0] = combined_entropy[1];
                        ++self->num_blocks_;
                        self->block_size_ = 0;
                        HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]);
                        self->merge_last_count_  = 0;
                        self->target_block_size_ = self->min_block_size_;
                    }
                    else
                    {
                        /* Combine this block with last block. */
                        split->lengths[self->num_blocks_ - 1] += (uint)self->block_size_;
                        histograms[self->last_histogram_ix_0]  = combined_histo[0];
                        last_entropy[0] = combined_entropy[0];
                        if (split->num_types == 1)
                        {
                            last_entropy[1] = last_entropy[0];
                        }
                        self->block_size_ = 0;
                        HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]);
                        if (++self->merge_last_count_ > 1)
                        {
                            self->target_block_size_ += self->min_block_size_;
                        }
                    }
                }
                if (is_final)
                {
                    *self->histograms_size_ = split->num_types;
                    split->num_blocks = self->num_blocks_;
                }
            }