/* Find the best 'out_' histogram for each of the 'in' histograms. * When called, clusters[0..num_clusters) contains the unique values from * symbols[0..in_size), but this property is not preserved in this function. * Note: we assume that out_[]->bit_cost_ is already up-to-date. */ public static void BrotliHistogramRemap(HistogramDistance *in_, size_t in_size, uint *clusters, size_t num_clusters, HistogramDistance *out_, uint *symbols) { size_t i; for (i = 0; i < in_size; ++i) { uint best_out = i == 0 ? symbols[0] : symbols[i - 1]; double best_bits = BrotliHistogramBitCostDistance(&in_[i], &out_[best_out]); size_t j; for (j = 0; j < num_clusters; ++j) { double cur_bits = BrotliHistogramBitCostDistance(&in_[i], &out_[clusters[j]]); if (cur_bits < best_bits) { best_bits = cur_bits; best_out = clusters[j]; } } symbols[i] = best_out; } /* Recompute each out_ based on raw and symbols. */ for (i = 0; i < num_clusters; ++i) { HistogramDistance.HistogramClear(&out_[clusters[i]]); } for (i = 0; i < in_size; ++i) { HistogramDistance.HistogramAddHistogram(&out_[symbols[i]], &in_[i]); } }
public static unsafe void InitBlockSplitter( ref MemoryManager m, BlockSplitterDistance *self, size_t alphabet_size, size_t min_block_size, double split_threshold, size_t num_symbols, BlockSplit *split, HistogramDistance **histograms, size_t *histograms_size) { size_t max_num_blocks = num_symbols / min_block_size + 1; /* We have to allocate one more histogram than the maximum number of block * types for the current histogram when the meta-block is too big. */ size_t max_num_types = Math.Min(max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1); self->alphabet_size_ = alphabet_size; self->min_block_size_ = min_block_size; self->split_threshold_ = split_threshold; self->num_blocks_ = 0; self->split_ = split; self->histograms_size_ = histograms_size; self->target_block_size_ = min_block_size; self->block_size_ = 0; self->curr_histogram_ix_ = 0; self->merge_last_count_ = 0; BrotliEnsureCapacity(ref m, sizeof(byte), (void **)&split->types, &split->types_alloc_size, max_num_blocks); BrotliEnsureCapacity(ref m, sizeof(uint), (void **)&split->lengths, &split->lengths_alloc_size, max_num_blocks); self->split_->num_blocks = max_num_blocks; *histograms_size = max_num_types; *histograms = (HistogramDistance *)BrotliAllocate(ref m, *histograms_size * sizeof(HistogramDistance)); self->histograms_ = *histograms; /* Clear only current histogram. */ HistogramDistance.HistogramClear(&self->histograms_[0]); self->last_histogram_ix_0 = self->last_histogram_ix_1 = 0; }
/* Adds the next symbol to the current histogram. When the current histogram * reaches the target size, decides on merging the block. */ public static unsafe void BlockSplitterAddSymbol(BlockSplitterDistance *self, size_t symbol) { HistogramDistance.HistogramAdd(&self->histograms_[self->curr_histogram_ix_], symbol); ++self->block_size_; if (self->block_size_ == self->target_block_size_) { BlockSplitterFinishBlock(self, /* is_final = */ false); } }
private static unsafe void BrotliBuildHistogramsWithContext( Command *cmds, size_t num_commands, BlockSplit *literal_split, BlockSplit *insert_and_copy_split, BlockSplit *dist_split, byte *ringbuffer, size_t start_pos, size_t mask, byte prev_byte, byte prev_byte2, ContextType *context_modes, HistogramLiteral *literal_histograms, HistogramCommand *insert_and_copy_histograms, HistogramDistance *copy_dist_histograms) { size_t pos = start_pos; BlockSplitIterator literal_it; BlockSplitIterator insert_and_copy_it; BlockSplitIterator dist_it; size_t i; InitBlockSplitIterator(&literal_it, literal_split); InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split); InitBlockSplitIterator(&dist_it, dist_split); for (i = 0; i < num_commands; ++i) { Command *cmd = &cmds[i]; size_t j; BlockSplitIteratorNext(&insert_and_copy_it); HistogramCommand.HistogramAdd(&insert_and_copy_histograms[insert_and_copy_it.type_], cmd->cmd_prefix_); for (j = cmd->insert_len_; j != 0; --j) { size_t context; BlockSplitIteratorNext(&literal_it); context = context_modes != null ? ((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) + Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) : literal_it.type_; HistogramLiteral.HistogramAdd(&literal_histograms[context], ringbuffer[pos & mask]); prev_byte2 = prev_byte; prev_byte = ringbuffer[pos & mask]; ++pos; } pos += CommandCopyLen(cmd); if (CommandCopyLen(cmd) != 0) { prev_byte2 = ringbuffer[(pos - 2) & mask]; prev_byte = ringbuffer[(pos - 1) & mask]; if (cmd->cmd_prefix_ >= 128) { size_t context; BlockSplitIteratorNext(&dist_it); context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) + CommandDistanceContext(cmd); HistogramDistance.HistogramAdd(©_dist_histograms[context], cmd->dist_prefix_); } } } }
/* What is the bit cost of moving histogram from cur_symbol to candidate. */ public static double BrotliHistogramBitCostDistance( HistogramDistance *histogram, HistogramDistance *candidate) { if (histogram->total_count_ == 0) { return(0.0); } else { HistogramDistance tmp = *histogram; HistogramDistance.HistogramAddHistogram(&tmp, candidate); return(BitCostDistance.BrotliPopulationCost(&tmp) - candidate->bit_cost_); } }
public void DifferentImages(string imageSet, string image1Name, string image2Name) { using (var image1 = new Bitmap(Path.Combine("Images", imageSet, image1Name))) using (var image2 = new Bitmap(Path.Combine("Images", imageSet, image2Name))) { var img1 = Bitmap2ByteArray(image1); var img2 = Bitmap2ByteArray(image2); var distance = HistogramDistance.Calculate(img1, img2, false); Assert.True(distance > 100000); distance = HistogramDistance.Calculate(img1, img2, true); Assert.True(distance > 0.1); } }
public static size_t BrotliHistogramCombine(HistogramDistance *out_, uint *cluster_size, uint *symbols, uint *clusters, HistogramPair *pairs, size_t num_clusters, size_t symbols_size, size_t max_clusters, size_t max_num_pairs) { double cost_diff_threshold = 0.0; size_t min_cluster_size = 1; size_t num_pairs = 0; { /* We maintain a vector of histogram pairs, with the property that the pair * with the maximum bit cost reduction is the first. */ size_t idx1; for (idx1 = 0; idx1 < num_clusters; ++idx1) { size_t idx2; for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) { BrotliCompareAndPushToQueue(out_, cluster_size, clusters[idx1], clusters[idx2], max_num_pairs, &pairs[0], &num_pairs); } } } while (num_clusters > min_cluster_size) { uint best_idx1; uint best_idx2; size_t i; if (pairs[0].cost_diff >= cost_diff_threshold) { cost_diff_threshold = 1e99; min_cluster_size = max_clusters; continue; } /* Take the best pair from the top of heap. */ best_idx1 = pairs[0].idx1; best_idx2 = pairs[0].idx2; HistogramDistance.HistogramAddHistogram(&out_[best_idx1], &out_[best_idx2]); out_[best_idx1].bit_cost_ = pairs[0].cost_combo; cluster_size[best_idx1] += cluster_size[best_idx2]; for (i = 0; i < symbols_size; ++i) { if (symbols[i] == best_idx2) { symbols[i] = best_idx1; } } for (i = 0; i < num_clusters; ++i) { if (clusters[i] == best_idx2) { memmove(&clusters[i], &clusters[i + 1], (num_clusters - i - 1) * sizeof(uint)); break; } } --num_clusters; { /* Remove pairs intersecting the just combined best pair. */ size_t copy_to_idx = 0; for (i = 0; i < num_pairs; ++i) { HistogramPair *p = &pairs[i]; if (p->idx1 == best_idx1 || p->idx2 == best_idx1 || p->idx1 == best_idx2 || p->idx2 == best_idx2) { /* Remove invalid pair from the queue. */ continue; } if (HistogramPairIsLess(&pairs[0], p)) { /* Replace the top of the queue if needed. */ HistogramPair front = pairs[0]; pairs[0] = *p; pairs[copy_to_idx] = front; } else { pairs[copy_to_idx] = *p; } ++copy_to_idx; } num_pairs = copy_to_idx; } /* Push new pairs formed with the combined histogram to the heap. */ for (i = 0; i < num_clusters; ++i) { BrotliCompareAndPushToQueue(out_, cluster_size, best_idx1, clusters[i], max_num_pairs, &pairs[0], &num_pairs); } } return(num_clusters); }
static void BrotliCompareAndPushToQueue( HistogramDistance *out_, uint *cluster_size, uint idx1, uint idx2, size_t max_num_pairs, HistogramPair *pairs, size_t *num_pairs) { bool is_good_pair = false; HistogramPair p = new HistogramPair(); if (idx1 == idx2) { return; } if (idx2 < idx1) { uint t = idx2; idx2 = idx1; idx1 = t; } p.idx1 = idx1; p.idx2 = idx2; p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]); p.cost_diff -= out_[idx1].bit_cost_; p.cost_diff -= out_[idx2].bit_cost_; if (out_[idx1].total_count_ == 0) { p.cost_combo = out_[idx2].bit_cost_; is_good_pair = true; } else if (out_[idx2].total_count_ == 0) { p.cost_combo = out_[idx1].bit_cost_; is_good_pair = true; } else { double threshold = *num_pairs == 0 ? 1e99 : Math.Max(0.0, pairs[0].cost_diff); HistogramDistance combo = out_[idx1]; double cost_combo; HistogramDistance.HistogramAddHistogram(&combo, &out_[idx2]); cost_combo = BitCostDistance.BrotliPopulationCost(&combo); if (cost_combo < threshold - p.cost_diff) { p.cost_combo = cost_combo; is_good_pair = true; } } if (is_good_pair) { p.cost_diff += p.cost_combo; if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) { /* Replace the top of the queue if needed. */ if (*num_pairs < max_num_pairs) { pairs[*num_pairs] = pairs[0]; ++(*num_pairs); } pairs[0] = p; } else if (*num_pairs < max_num_pairs) { pairs[*num_pairs] = p; ++(*num_pairs); } } }
private static unsafe void BrotliBuildMetaBlock(ref MemoryManager m, byte *ringbuffer, size_t pos, size_t mask, BrotliEncoderParams *params_, byte prev_byte, byte prev_byte2, Command *cmds, size_t num_commands, ContextType literal_context_mode, MetaBlockSplit *mb) { /* Histogram ids need to fit in one byte. */ size_t kMaxNumberOfHistograms = 256; HistogramDistance *distance_histograms; HistogramLiteral * literal_histograms; ContextType * literal_context_modes = null; size_t literal_histograms_size; size_t distance_histograms_size; size_t i; size_t literal_context_multiplier = 1; BrotliSplitBlock(ref m, cmds, num_commands, ringbuffer, pos, mask, params_, &mb->literal_split, &mb->command_split, &mb->distance_split); if (!params_->disable_literal_context_modeling) { literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS; literal_context_modes = (ContextType *)BrotliAllocate(ref m, mb->literal_split.num_types * sizeof(ContextType)); for (i = 0; i < mb->literal_split.num_types; ++i) { literal_context_modes[i] = literal_context_mode; } } literal_histograms_size = mb->literal_split.num_types * literal_context_multiplier; literal_histograms = (HistogramLiteral *)BrotliAllocate(ref m, literal_histograms_size * sizeof(HistogramLiteral)); HistogramLiteral.ClearHistograms(literal_histograms, literal_histograms_size); distance_histograms_size = mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS; distance_histograms = (HistogramDistance *)BrotliAllocate(ref m, distance_histograms_size * sizeof(HistogramDistance)); HistogramDistance.ClearHistograms(distance_histograms, distance_histograms_size); mb->command_histograms_size = mb->command_split.num_types; mb->command_histograms = (HistogramCommand *)BrotliAllocate(ref m, mb->command_histograms_size * sizeof(HistogramCommand)); HistogramCommand.ClearHistograms(mb->command_histograms, mb->command_histograms_size); BrotliBuildHistogramsWithContext(cmds, num_commands, &mb->literal_split, &mb->command_split, &mb->distance_split, ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes, literal_histograms, mb->command_histograms, distance_histograms); BrotliFree(ref m, literal_context_modes); mb->literal_context_map_size = mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS; mb->literal_context_map = (uint *)BrotliAllocate(ref m, mb->literal_context_map_size * sizeof(uint)); mb->literal_histograms_size = mb->literal_context_map_size; mb->literal_histograms = (HistogramLiteral *)BrotliAllocate(ref m, mb->literal_histograms_size * sizeof(HistogramLiteral)); ClusterLiteral.BrotliClusterHistograms(ref m, literal_histograms, literal_histograms_size, kMaxNumberOfHistograms, mb->literal_histograms, &mb->literal_histograms_size, mb->literal_context_map); BrotliFree(ref m, literal_histograms); if (params_->disable_literal_context_modeling) { /* Distribute assignment to all contexts. */ for (i = mb->literal_split.num_types; i != 0;) { size_t j = 0; i--; for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) { mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] = mb->literal_context_map[i]; } } } mb->distance_context_map_size = mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS; mb->distance_context_map = (uint *)BrotliAllocate(ref m, mb->distance_context_map_size * sizeof(uint)); mb->distance_histograms_size = mb->distance_context_map_size; mb->distance_histograms = (HistogramDistance *)BrotliAllocate(ref m, mb->distance_histograms_size * sizeof(HistogramDistance)); ClusterDistance.BrotliClusterHistograms(ref m, distance_histograms, mb->distance_context_map_size, kMaxNumberOfHistograms, mb->distance_histograms, &mb->distance_histograms_size, mb->distance_context_map); BrotliFree(ref m, distance_histograms); }
/// <summary> /// Returns the distance between two histograms. 0 is identical, 1 is completely different. /// </summary> /// <param name="histogramA">The first histogram.</param> /// <param name="histogramB">The second histogram.</param> /// <param name="model">The computation model.</param> /// <returns>A distance between 0 and 1.</returns> public static float HistogramDistance(Histogram histogramA, Histogram histogramB, HistogramDistance model) { switch (model) { case Unity.QuickSearch.HistogramDistance.CityBlock: return(CityBlockDistance(histogramA, histogramB)); case Unity.QuickSearch.HistogramDistance.Euclidean: return(EuclideanDistance(histogramA, histogramB)); case Unity.QuickSearch.HistogramDistance.Bhattacharyya: return(BhattacharyyaDistance(histogramA, histogramB)); case Unity.QuickSearch.HistogramDistance.MDPA: return(MDPA(histogramA, histogramB)); } return(1.0f); }
public static double BrotliPopulationCost(HistogramDistance *histogram) { const double kOneSymbolHistogramCost = 12; const double kTwoSymbolHistogramCost = 20; const double kThreeSymbolHistogramCost = 28; const double kFourSymbolHistogramCost = 37; size_t data_size = HistogramDistance.HistogramDataSize(); int count = 0; size_t *s = stackalloc size_t[5]; double bits = 0.0; size_t i; if (histogram->total_count_ == 0) { return(kOneSymbolHistogramCost); } for (i = 0; i < data_size; ++i) { if (histogram->data_[i] > 0) { s[count] = i; ++count; if (count > 4) { break; } } } if (count == 1) { return(kOneSymbolHistogramCost); } if (count == 2) { return(kTwoSymbolHistogramCost + (double)histogram->total_count_); } if (count == 3) { uint histo0 = histogram->data_[s[0]]; uint histo1 = histogram->data_[s[1]]; uint histo2 = histogram->data_[s[2]]; uint histomax = Math.Max(histo0, Math.Max(histo1, histo2)); return(kThreeSymbolHistogramCost + 2 * (histo0 + histo1 + histo2) - histomax); } if (count == 4) { uint *histo = stackalloc uint[4]; uint h23; uint histomax; for (i = 0; i < 4; ++i) { histo[i] = histogram->data_[s[i]]; } /* Sort */ for (i = 0; i < 4; ++i) { size_t j; for (j = i + 1; j < 4; ++j) { if (histo[j] > histo[i]) { uint tmp = histo[j]; histo[j] = histo[i]; histo[i] = tmp; } } } h23 = histo[2] + histo[3]; histomax = Math.Max(h23, histo[0]); return(kFourSymbolHistogramCost + 3 * h23 + 2 * (histo[0] + histo[1]) - histomax); } { /* In this loop we compute the entropy of the histogram and simultaneously * build a simplified histogram of the code length codes where we use the * zero repeat code 17, but we don't use the non-zero repeat code 16. */ size_t max_depth = 1; uint * depth_histo = stackalloc uint[BROTLI_CODE_LENGTH_CODES]; memset(depth_histo, 0, BROTLI_CODE_LENGTH_CODES * sizeof(uint)); double log2total = FastLog2(histogram->total_count_); for (i = 0; i < data_size;) { if (histogram->data_[i] > 0) { /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) = * = log2(total_count) - log2(count(symbol)) */ double log2p = log2total - FastLog2(histogram->data_[i]); /* Approximate the bit depth by round(-log2(P(symbol))) */ size_t depth = (size_t)(log2p + 0.5); bits += histogram->data_[i] * log2p; if (depth > 15) { depth = 15; } if (depth > max_depth) { max_depth = depth; } ++depth_histo[depth]; ++i; } else { /* Compute the run length of zeros and add the appropriate number of 0 * and 17 code length codes to the code length code histogram. */ uint reps = 1; size_t k; for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) { ++reps; } i += reps; if (i == data_size) { /* Don't add any cost for the last zero run, since these are encoded * only implicitly. */ break; } if (reps < 3) { depth_histo[0] += reps; } else { reps -= 2; while (reps > 0) { ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH]; /* Add the 3 extra bits for the 17 code length code. */ bits += 3; reps >>= 3; } } } } /* Add the estimated encoding cost of the code length code histogram. */ bits += (double)(18 + 2 * max_depth); /* Add the entropy of the code length code histogram. */ bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES); } return(bits); }
/* Does either of three things: * (1) emits the current block with a new block type; * (2) emits the current block with the type of the second last block; * (3) merges the current block with the last block. */ public static unsafe void BlockSplitterFinishBlock( BlockSplitterDistance *self, bool is_final) { BlockSplit * split = self->split_; double * last_entropy = self->last_entropy_; HistogramDistance *histograms = self->histograms_; self->block_size_ = Math.Max(self->block_size_, self->min_block_size_); if (self->num_blocks_ == 0) { /* Create first block. */ split->lengths[0] = (uint)self->block_size_; split->types[0] = 0; last_entropy[0] = BitsEntropy(histograms[0].data_, self->alphabet_size_); last_entropy[1] = last_entropy[0]; ++self->num_blocks_; ++split->num_types; ++self->curr_histogram_ix_; if (self->curr_histogram_ix_ < *self->histograms_size_) { HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]); } self->block_size_ = 0; } else if (self->block_size_ > 0) { double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_, self->alphabet_size_); HistogramDistance *combined_histo = stackalloc HistogramDistance[2]; double * combined_entropy = stackalloc double[2]; double * diff = stackalloc double[2]; size_t j; for (j = 0; j < 2; ++j) { size_t last_histogram_ix = j == 0 ? self->last_histogram_ix_0 : self->last_histogram_ix_1; combined_histo[j] = histograms[self->curr_histogram_ix_]; HistogramDistance.HistogramAddHistogram(&combined_histo[j], &histograms[last_histogram_ix]); combined_entropy[j] = BitsEntropy( &combined_histo[j].data_[0], self->alphabet_size_); diff[j] = combined_entropy[j] - entropy - last_entropy[j]; } if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES && diff[0] > self->split_threshold_ && diff[1] > self->split_threshold_) { /* Create new block. */ split->lengths[self->num_blocks_] = (uint)self->block_size_; split->types[self->num_blocks_] = (byte)split->num_types; self->last_histogram_ix_1 = self->last_histogram_ix_0; self->last_histogram_ix_0 = (byte)split->num_types; last_entropy[1] = last_entropy[0]; last_entropy[0] = entropy; ++self->num_blocks_; ++split->num_types; ++self->curr_histogram_ix_; if (self->curr_histogram_ix_ < *self->histograms_size_) { HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]); } self->block_size_ = 0; self->merge_last_count_ = 0; self->target_block_size_ = self->min_block_size_; } else if (diff[1] < diff[0] - 20.0) { /* Combine this block with second last block. */ split->lengths[self->num_blocks_] = (uint)self->block_size_; split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; size_t tmp = self->last_histogram_ix_0; self->last_histogram_ix_0 = self->last_histogram_ix_1; self->last_histogram_ix_1 = tmp; histograms[self->last_histogram_ix_0] = combined_histo[1]; last_entropy[1] = last_entropy[0]; last_entropy[0] = combined_entropy[1]; ++self->num_blocks_; self->block_size_ = 0; HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]); self->merge_last_count_ = 0; self->target_block_size_ = self->min_block_size_; } else { /* Combine this block with last block. */ split->lengths[self->num_blocks_ - 1] += (uint)self->block_size_; histograms[self->last_histogram_ix_0] = combined_histo[0]; last_entropy[0] = combined_entropy[0]; if (split->num_types == 1) { last_entropy[1] = last_entropy[0]; } self->block_size_ = 0; HistogramDistance.HistogramClear(&histograms[self->curr_histogram_ix_]); if (++self->merge_last_count_ > 1) { self->target_block_size_ += self->min_block_size_; } } } if (is_final) { *self->histograms_size_ = split->num_types; split->num_blocks = self->num_blocks_; } }