📄 xdelta3-djw.h
字号:
h1->parent = h2->parent = ents_size; heap_insert (heap, ents, ++heap_last, ents_size++); } IF_DEBUG (heap_check (heap, ents, heap_last)); /* Now compute prefix code lengths, counting parents. */ for (i = 1; i < asize+1; i += 1) { int b = 0; if (ents[i].freq != 0) { int p = i; while ((p = ents[p].parent) != 0) { b += 1; } if (b > maxlen) { overflow = 1; } total_bits += b * freq[i-1]; } /* clen is 0-origin, unlike ents. */ IF_DEBUG1 (DP(RINT "clen[%d] = %d\n", i-1, b)); clen[i-1] = b; } IF_DEBUG (if (first_bits == 0) first_bits = total_bits); if (! overflow) { IF_DEBUG1 (if (first_bits != total_bits) { DP(RINT "code length overflow changed %u bits\n", (usize_t)(total_bits - first_bits)); }); return total_bits; } /* OPT: There is a non-looping way to fix overflow shown in zlib, but this * is easier (for now), as done in bzip2. */ for (i = 1; i < asize+1; i += 1) { ents[i].freq = ents[i].freq / 2 + 1; } goto again;}static voiddjw_build_codes (usize_t *codes, const uint8_t *clen, int asize, int abs_max){ int i, l; int min_clen = DJW_MAX_CODELEN; int max_clen = 0; usize_t code = 0; /* Find the min and max code length */ for (i = 0; i < asize; i += 1) { if (clen[i] > 0 && clen[i] < min_clen) { min_clen = clen[i]; } max_clen = max (max_clen, (int) clen[i]); } XD3_ASSERT (max_clen <= abs_max); /* Generate a code for each symbol with the appropriate length. */ for (l = min_clen; l <= max_clen; l += 1) { for (i = 0; i < asize; i += 1) { if (clen[i] == l) { codes[i] = code++; } } code <<= 1; } IF_DEBUG1 ({ for (i = 0; i < asize; i += 1) { DP(RINT "code[%d] = %u\n", i, codes[i]); } });}/*********************************************************************//* MOVE-TO-FRONT *//*********************************************************************/static voiddjw_compute_mtf_1_2 (djw_prefix *prefix, uint8_t *mtf, djw_weight *freq_out, usize_t nsym){ int i, j, k; usize_t sym; usize_t size = prefix->scount; usize_t mtf_i = 0; int mtf_run = 0; /* This +2 is for the RUN_0, RUN_1 codes */ memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2)); for (i = 0; i < size; ) { /* OPT: Bzip optimizes this algorithm a little by effectively checking * j==0 before the MTF update. */ sym = prefix->symbol[i++]; for (j = 0; mtf[j] != sym; j += 1) { } XD3_ASSERT (j <= nsym); for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; } mtf[0] = sym; if (j == 0) { mtf_run += 1; continue; } if (mtf_run > 0) { djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); } /* Non-zero symbols are offset by RUN_1 */ prefix->mtfsym[mtf_i++] = j+RUN_1; freq_out[j+RUN_1] += 1; } if (mtf_run > 0) { djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out); } prefix->mcount = mtf_i;}/* Counts character frequencies of the input buffer, returns the size. */static usize_tdjw_count_freqs (djw_weight *freq, xd3_output *input){ xd3_output *in; usize_t size = 0; memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE); for (in = input; in; in = in->next_page) { const uint8_t *p = in->base; const uint8_t *p_max = p + in->next; size += in->next; do { ++freq[*p]; } while (++p < p_max); } IF_DEBUG1 ({int i; DP(RINT "freqs: "); for (i = 0; i < ALPHABET_SIZE; i += 1) { DP(RINT "%u ", freq[i]); } DP(RINT "\n");}); return size;}static voiddjw_compute_multi_prefix (int groups, uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE], djw_prefix *prefix){ int gp, i; prefix->scount = ALPHABET_SIZE; memcpy (prefix->symbol, clen[0], ALPHABET_SIZE); for (gp = 1; gp < groups; gp += 1) { for (i = 0; i < ALPHABET_SIZE; i += 1) { if (clen[gp][i] == 0) { continue; } prefix->symbol[prefix->scount++] = clen[gp][i]; } }}static voiddjw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq){ /* This +1 is for the 0 code-length. */ uint8_t clmtf[DJW_MAX_CODELEN+1]; djw_init_clen_mtf_1_2 (clmtf); djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN);}static intdjw_encode_prefix (xd3_stream *stream, xd3_output **output, bit_state *bstate, djw_prefix *prefix){ int ret, i; usize_t num_to_encode; djw_weight clfreq[DJW_TOTAL_CODES]; uint8_t clclen[DJW_TOTAL_CODES]; usize_t clcode[DJW_TOTAL_CODES]; /* Move-to-front encode prefix symbols, count frequencies */ djw_compute_prefix_1_2 (prefix, clfreq); /* Compute codes */ djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); djw_build_codes (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN); /* Compute number of extra codes beyond basic ones for this template. */ num_to_encode = DJW_TOTAL_CODES; while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0) { num_to_encode -= 1; } XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS)); /* Encode: # of extra codes */ if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS, num_to_encode - DJW_EXTRA_12OFFSET))) { return ret; } /* Encode: MTF code lengths */ for (i = 0; i < num_to_encode; i += 1) { if ((ret = xd3_encode_bits (stream, output, bstate, DJW_CLCLEN_BITS, clclen[i]))) { return ret; } } /* Encode: CLEN code lengths */ for (i = 0; i < prefix->mcount; i += 1) { usize_t mtf_sym = prefix->mtfsym[i]; usize_t bits = clclen[mtf_sym]; usize_t code = clcode[mtf_sym]; if ((ret = xd3_encode_bits (stream, output, bstate, bits, code))) { return ret; } } return 0;}static voiddjw_compute_selector_1_2 (djw_prefix *prefix, usize_t groups, djw_weight *gbest_freq){ uint8_t grmtf[DJW_MAX_GROUPS]; usize_t i; for (i = 0; i < groups; i += 1) { grmtf[i] = i; } djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);}static intxd3_encode_howmany_groups (xd3_stream *stream, xd3_sec_cfg *cfg, usize_t input_size, usize_t *ret_groups, usize_t *ret_sector_size){ usize_t cfg_groups = 0; usize_t cfg_sector_size = 0; usize_t sugg_groups = 0; usize_t sugg_sector_size = 0; if (cfg->ngroups != 0) { if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS) { stream->msg = "invalid secondary encoder group number"; return XD3_INTERNAL; } cfg_groups = cfg->ngroups; } if (cfg->sector_size != 0) { if (cfg->sector_size < DJW_SECTORSZ_MULT || cfg->sector_size > DJW_SECTORSZ_MAX || (cfg->sector_size % DJW_SECTORSZ_MULT) != 0) { stream->msg = "invalid secondary encoder sector size"; return XD3_INTERNAL; } cfg_sector_size = cfg->sector_size; } if (cfg_groups == 0 || cfg_sector_size == 0) { /* These values were found empirically using xdelta3-tune around version * xdfs-0.256. */ switch (cfg->data_type) { case DATA_SECTION: if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; } else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; } else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; } else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; } else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; } else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; } else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; } else { sugg_groups = 8; sugg_sector_size = 70; } break; case INST_SECTION: if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; } else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; } else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; } else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; } else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; } else { sugg_groups = 8; sugg_sector_size = 40; } break; case ADDR_SECTION: if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; } else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; } else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; } else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; } else { sugg_groups = 7; sugg_sector_size = 130; } break; } if (cfg_groups == 0) { cfg_groups = sugg_groups; } if (cfg_sector_size == 0) { cfg_sector_size = sugg_sector_size; } } if (cfg_groups != 1 && cfg_sector_size == 0) { switch (cfg->data_type) { case DATA_SECTION: cfg_sector_size = 20; break; case INST_SECTION: cfg_sector_size = 50; break; case ADDR_SECTION: cfg_sector_size = 130; break; } } (*ret_groups) = cfg_groups; (*ret_sector_size) = cfg_sector_size; XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS); XD3_ASSERT (cfg_groups == 1 || (cfg_sector_size >= DJW_SECTORSZ_MULT && cfg_sector_size <= DJW_SECTORSZ_MAX)); return 0;}static intxd3_encode_huff (xd3_stream *stream, djw_stream *h, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg){ int ret; usize_t groups, sector_size; bit_state bstate = BIT_STATE_ENCODE_INIT; xd3_output *in; int output_bits; usize_t input_bits; usize_t input_bytes; usize_t initial_offset = output->next; djw_weight real_freq[ALPHABET_SIZE]; uint8_t *gbest = NULL; uint8_t *gbest_mtf = NULL; input_bytes = djw_count_freqs (real_freq, input); input_bits = input_bytes * 8; XD3_ASSERT (input_bytes > 0); if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes, & groups, & sector_size))) { return ret; } if (0) { regroup: /* Sometimes we dynamically decide there are too many groups. Arrive * here. */ output->next = initial_offset; xd3_bit_state_encode_init (& bstate); } /* Encode: # of groups (3 bits) */ if ((ret = xd3_encode_bits (stream, & output, & bstate, DJW_GROUP_BITS, groups-1))) { goto failure; } if (groups == 1) { /* Single Huffman group. */ usize_t code[ALPHABET_SIZE]; /* Codes */ uint8_t clen[ALPHABET_SIZE]; uint8_t prefix_mtfsym[ALPHABET_SIZE]; djw_prefix prefix; output_bits = djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN); if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient) { goto nosecond; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -