⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xdelta3-djw.h

📁 Linux下一个可以比较二进制文件的工具xdelta3.0u的源码。
💻 H
📖 第 1 页 / 共 4 页
字号:
      /* Encode: prefix */      prefix.mtfsym = prefix_mtfsym;      prefix.symbol = clen;      prefix.scount = ALPHABET_SIZE;      if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))	{	  goto failure;	}      if (output_bits + (8 * output->next) + EFFICIENCY_BITS >=	  input_bits && ! cfg->inefficient)	{	  goto nosecond;	}      /* Encode: data */      for (in = input; in; in = in->next_page)	{	  const uint8_t *p     = in->base;	  const uint8_t *p_max = p + in->next;	  do	    {	      usize_t sym  = *p++;	      usize_t bits = clen[sym];	      IF_DEBUG (output_bits -= bits);	      if ((ret = xd3_encode_bits (stream, & output,					  & bstate, bits, code[sym])))		{		  goto failure;		}	    }	  while (p < p_max);	}      XD3_ASSERT (output_bits == 0);    }  else    {      /* DJW Huffman */      djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];      uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];      djw_weight left = input_bytes;      int gp;      int niter = 0;      usize_t select_bits;      usize_t sym1 = 0, sym2 = 0, s;      usize_t   gcost[DJW_MAX_GROUPS];      usize_t  gbest_code[DJW_MAX_GROUPS+2];      uint8_t  gbest_clen[DJW_MAX_GROUPS+2];      usize_t   gbest_max = 1 + (input_bytes - 1) / sector_size;      int      best_bits = 0;      usize_t   gbest_no;      usize_t   gpcnt;      const uint8_t *p;      IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]);      /* Encode: sector size (5 bits) */      if ((ret = xd3_encode_bits (stream, & output, & bstate,				  DJW_SECTORSZ_BITS,				  (sector_size/DJW_SECTORSZ_MULT)-1)))	{	  goto failure;	}      /* Dynamic allocation. */      if (gbest == NULL)	{	  if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL)	    {	      ret = ENOMEM;	      goto failure;	    }	}      if (gbest_mtf == NULL)	{	  if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL)	    {	      ret = ENOMEM;	      goto failure;	    }	}      /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */      /* Generate initial code length tables. */      for (gp = 0; gp < groups; gp += 1)	{	  djw_weight sum  = 0;	  djw_weight goal = left / (groups - gp);	  IF_DEBUG1 (usize_t nz = 0);	  /* Due to the single-code granularity of this distribution, it may	   * be that we can't generate a distribution for each group.  In that	   * case subtract one group and try again.  If (inefficient), we're	   * testing group behavior, so don't mess things up. */	  if (goal == 0 && !cfg->inefficient)	    {	      IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n",			    groups));	      groups -= 1;	      goto regroup;	    }	  /* Sum == goal is possible when (cfg->inefficient)... */	  while (sum < goal)	    {	      XD3_ASSERT (sym2 < ALPHABET_SIZE);	      IF_DEBUG1 (nz += real_freq[sym2] != 0);	      sum += real_freq[sym2++];	    }	  IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) "		       "(%u/%u = %.3f)\n",		       gp, sym1, sym2, nz, sum,		       input_bytes, sum / (double)input_bytes););	  for (s = 0; s < ALPHABET_SIZE; s += 1)	    {	      evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;	    }	  left -= sum;	  sym1  = sym2+1;	}    repeat:      niter += 1;      gbest_no = 0;      memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);      IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups));      /* For each input page (loop is irregular to allow non-pow2-size group       * size. */      in = input;      p  = in->base;      /* For each group-size sector. */      do	{	  const uint8_t *p0  = p;	  xd3_output    *in0 = in;	  usize_t best   = 0;	  usize_t winner = 0;	  /* Select best group for each sector, update evolve_freq. */	  memset (gcost, 0, sizeof (gcost[0]) * groups);	  /* For each byte in sector. */	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)	    {	      /* For each group. */	      for (gp = 0; gp < groups; gp += 1)		{		  gcost[gp] += evolve_clen[gp][*p];		}	      /* Check end-of-input-page. */#             define GP_PAGE()                \	      if (++p - in->base == in->next) \		{                             \		  in = in->next_page;         \		  if (in == NULL) { break; }  \		  p  = in->base;              \		}	      GP_PAGE ();	    }	  /* Find min cost group for this sector */	  best = -1U;	  for (gp = 0; gp < groups; gp += 1)	    {	      if (gcost[gp] < best) { best = gcost[gp]; winner = gp; }	    }	  XD3_ASSERT(gbest_no < gbest_max);	  gbest[gbest_no++] = winner;	  IF_DEBUG1 (gcount[winner] += 1);	  p  = p0;	  in = in0;	  /* Update group frequencies. */	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)	    {	      evolve_freq[winner][*p] += 1;	      GP_PAGE ();	    }	}      while (in != NULL);      XD3_ASSERT (gbest_no == gbest_max);      /* Recompute code lengths. */      output_bits = 0;      for (gp = 0; gp < groups; gp += 1)	{	  int i;	  uint8_t evolve_zero[ALPHABET_SIZE];	  int any_zeros = 0;	  memset (evolve_zero, 0, sizeof (evolve_zero));	  /* Cannot allow a zero clen when the real frequency is non-zero.	   * Note: this means we are going to encode a fairly long code for	   * these unused entries.  An improvement would be to implement a	   * NOTUSED code for when these are actually zero, but this requires	   * another data structure (evolve_zero) since we don't know when	   * evolve_freq[i] == 0...  Briefly tested, looked worse. */	  for (i = 0; i < ALPHABET_SIZE; i += 1)	    {	      if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)		{		  evolve_freq[gp][i] = 1;		  evolve_zero[i] = 1;		  any_zeros = 1;		}	    }	  output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp],					   ALPHABET_SIZE, DJW_MAX_CODELEN);	  /* The above faking of frequencies does not matter for the last	   * iteration, but we don't know when that is yet.  However, it also	   * breaks the output_bits computation.  Necessary for accuracy, and	   * for the (output_bits==0) assert after all bits are output. */	  if (any_zeros)	    {	      IF_DEBUG1 (usize_t save_total = output_bits);	      for (i = 0; i < ALPHABET_SIZE; i += 1)		{		  if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; }		}	      IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n",			    save_total - output_bits, gp));	    }	}      IF_DEBUG1(	DP(RINT "pass %u total bits: %u group uses: ", niter, output_bits);	for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }	DP(RINT "\n");	);      /* End iteration. */      IF_DEBUG1 (if (niter > 1 && best_bits < output_bits) {	DP(RINT "iteration lost %u bits\n", output_bits - best_bits); });      if (niter == 1 || (niter < DJW_MAX_ITER &&			 (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT))	{	  best_bits = output_bits;	  goto repeat;	}      /* Efficiency check. */      if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)	{	  goto nosecond;	}      IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n",		    input_bytes, output_bits / 8.0));      /* Encode: prefix */      {	uint8_t     prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];	uint8_t     prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];	uint8_t     prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];	djw_prefix prefix;	prefix.symbol = prefix_symbol;	prefix.mtfsym = prefix_mtfsym;	prefix.repcnt = prefix_repcnt;	djw_compute_multi_prefix (groups, evolve_clen, & prefix);	if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))	  {	    goto failure;	  }      }      /* Encode: selector frequencies */      {	djw_weight gbest_freq[DJW_MAX_GROUPS+1];	djw_prefix gbest_prefix;	usize_t i;	gbest_prefix.scount = gbest_no;	gbest_prefix.symbol = gbest;	gbest_prefix.mtfsym = gbest_mtf;	djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);	select_bits =	  djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);	djw_build_codes  (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN);	for (i = 0; i < groups+1; i += 1)	  {	    if ((ret = xd3_encode_bits (stream, & output, & bstate,					DJW_GBCLEN_BITS, gbest_clen[i])))	      {		goto failure;	      }	  }	for (i = 0; i < gbest_prefix.mcount; i += 1)	  {	    usize_t gp_mtf      = gbest_mtf[i];	    usize_t gp_sel_bits = gbest_clen[gp_mtf];	    usize_t gp_sel_code = gbest_code[gp_mtf];	    XD3_ASSERT (gp_mtf < groups+1);	    if ((ret = xd3_encode_bits (stream, & output, & bstate,					gp_sel_bits, gp_sel_code)))	      {		goto failure;	      }	    IF_DEBUG (select_bits -= gp_sel_bits);	  }	XD3_ASSERT (select_bits == 0);      }      /* Efficiency check. */      if (output_bits + select_bits + (8 * output->next) +	  EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)	{	  goto nosecond;	}      /* Encode: data */      {	usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];	usize_t sector = 0;	/* Build code tables for each group. */	for (gp = 0; gp < groups; gp += 1)	  {	    djw_build_codes (evolve_code[gp], evolve_clen[gp],			     ALPHABET_SIZE, DJW_MAX_CODELEN);	  }	/* Now loop over the input. */	in = input;	p  = in->base;	do	  {	    /* For each sector. */	    usize_t   gp_best  = gbest[sector];	    usize_t *gp_codes = evolve_code[gp_best];	    uint8_t *gp_clens = evolve_clen[gp_best];	    XD3_ASSERT (sector < gbest_no);	    sector += 1;	    /* Encode the sector data. */	    for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)	      {		usize_t sym  = *p;		usize_t bits = gp_clens[sym];		usize_t code = gp_codes[sym];		IF_DEBUG (output_bits -= bits);		if ((ret = xd3_encode_bits (stream, & output, & bstate,					    bits, code)))		  {		    goto failure;		  }		GP_PAGE ();	      }	  }	while (in != NULL);	XD3_ASSERT (select_bits == 0);	XD3_ASSERT (output_bits == 0);      }    }  ret = xd3_flush_bits (stream, & output, & bstate);  if (0)    {    nosecond:      stream->msg = "secondary compression was inefficient";      ret = XD3_NOSECOND;    } failure:  xd3_free (stream, gbest);  xd3_free (stream, gbest_mtf);  return ret;}#endif /* XD3_ENCODER *//*********************************************************************//*                              DECODE                               *//*********************************************************************/static voiddjw_build_decoder (xd3_stream    *stream,		   usize_t        asize,		   usize_t        abs_max,		   const uint8_t *clen,		   uint8_t       *inorder,		   usize_t       *base,		   usize_t       *limit,		   usize_t       *min_clenp,		   usize_t       *max_clenp){  int i, l;  const uint8_t *ci;  usize_t nr_clen [DJW_TOTAL_CODES];  usize_t tmp_base[DJW_TOTAL_CODES];  int min_clen;  int max_clen;  /* Assumption: the two temporary arrays are large enough to hold abs_max. */  XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);  /* This looks something like the start of zlib's inftrees.c */  memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));  /* Count number of each code length */  i  = asize;  ci = clen;  do    {      /* Caller _must_ check that values are in-range.  Most of the time the       * caller decodes a specific number of bits, which imply the max value,       * and the other time the caller decodes a huffman value, which must be       * in-range.  Therefore, its an assertion and this function cannot       * otherwise fail. */      XD3_ASSERT (*ci <= abs_max);      nr_clen[*ci++]++;    }  while (--i != 0);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -