pg_lzcompress.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 727 行 · 第 1/2 页

C
727
字号
/* ---------- * pglz_find_match - * *		Lookup the history table if the actual input stream matches *		another sequence of characters, starting somewhere earlier *		in the input buffer. * ---------- */static inline intpglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,				int *lenp, int *offp, int good_match, int good_drop){	PGLZ_HistEntry *hent;	int32		len = 0;	int32		off = 0;	/*	 * Traverse the linked history list until a good enough match is found.	 */	hent = hstart[pglz_hist_idx(input, end)];	while (hent)	{		const char *ip = input;		const char *hp = hent->pos;		int32		thisoff;		int32		thislen;		/*		 * Stop if the offset does not fit into our tag anymore.		 */		thisoff = ip - hp;		if (thisoff >= 0x0fff)			break;		/*		 * Determine length of match. A better match must be larger than the		 * best so far. And if we already have a match of 16 or more bytes,		 * it's worth the call overhead to use memcmp() to check if this match		 * is equal for the same size. After that we must fallback to		 * character by character comparison to know the exact position where		 * the diff occurred.		 */		thislen = 0;		if (len >= 16)		{			if (memcmp(ip, hp, len) == 0)			{				thislen = len;				ip += len;				hp += len;				while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)				{					thislen++;					ip++;					hp++;				}			}		}		else		{			while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)			{				thislen++;				ip++;				hp++;			}		}		/*		 * Remember this match as the best (if it is)		 */		if (thislen > len)		{			len = thislen;			off = thisoff;		}		/*		 * Advance to the next history entry		 */		hent = hent->next;		/*		 * Be happy with lesser good matches the more entries we visited. But		 * no point in doing calculation if we're at end of list.		 */		if (hent)		{			if (len >= good_match)				break;			good_match -= (good_match * good_drop) / 100;		}	}	/*	 * Return match information only if it results at least in one byte	 * reduction.	 */	if (len > 2)	{		*lenp = len;		*offp = off;		return 1;	}	return 0;}/* ---------- * pglz_compress - * *		Compresses source into dest using strategy. * ---------- */boolpglz_compress(const char *source, int32 slen, PGLZ_Header *dest,			  const PGLZ_Strategy *strategy){	unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);	unsigned char *bstart = bp;	int			hist_next = 0;	bool		hist_recycle = false;	const char *dp = source;	const char *dend = source + slen;	unsigned char ctrl_dummy = 0;	unsigned char *ctrlp = &ctrl_dummy;	unsigned char ctrlb = 0;	unsigned char ctrl = 0;	int32		match_len;	int32		match_off;	int32		good_match;	int32		good_drop;	int32		result_size;	int32		result_max;	int32		need_rate;	/*	 * Our fallback strategy is the default.	 */	if (strategy == NULL)		strategy = PGLZ_strategy_default;	/*	 * If the strategy forbids compression (at all or if source chunk too	 * small), fail.	 */	if (strategy->match_size_good <= 0 ||		slen < strategy->min_input_size)		return false;	/*	 * Save the original source size in the header.	 */	dest->rawsize = slen;	/*	 * Limit the match size to the maximum implementation allowed value	 */	if ((good_match = strategy->match_size_good) > PGLZ_MAX_MATCH)		good_match = PGLZ_MAX_MATCH;	if (good_match < 17)		good_match = 17;	if ((good_drop = strategy->match_size_drop) < 0)		good_drop = 0;	if (good_drop > 100)		good_drop = 100;	/*	 * Initialize the history lists to empty.  We do not need to zero the	 * hist_entries[] array; its entries are initialized as they are used.	 */	memset((void *) hist_start, 0, sizeof(hist_start));	/*	 * Compute the maximum result size allowed by the strategy. If the input	 * size exceeds force_input_size, the max result size is the input size	 * itself. Otherwise, it is the input size minus the minimum wanted	 * compression rate.	 */	if (slen >= strategy->force_input_size)		result_max = slen;	else	{		need_rate = strategy->min_comp_rate;		if (need_rate < 0)			need_rate = 0;		else if (need_rate > 99)			need_rate = 99;		result_max = slen - ((slen * need_rate) / 100);	}	/*	 * Compress the source directly into the output buffer.	 */	while (dp < dend)	{		/*		 * If we already exceeded the maximum result size, fail.		 *		 * We check once per loop; since the loop body could emit as many as 4		 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better		 * allow 4 slop bytes.		 */		if (bp - bstart >= result_max)			return false;		/*		 * Try to find a match in the history		 */		if (pglz_find_match(hist_start, dp, dend, &match_len,							&match_off, good_match, good_drop))		{			/*			 * Create the tag and add history entries for all matched			 * characters.			 */			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);			while (match_len--)			{				pglz_hist_add(hist_start, hist_entries,							  hist_next, hist_recycle,							  dp, dend);				dp++;			/* Do not do this ++ in the line above!		*/				/* The macro would do it four times - Jan.	*/			}		}		else		{			/*			 * No match found. Copy one literal byte.			 */			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);			pglz_hist_add(hist_start, hist_entries,						  hist_next, hist_recycle,						  dp, dend);			dp++;				/* Do not do this ++ in the line above!		*/			/* The macro would do it four times - Jan.	*/		}	}	/*	 * Write out the last control byte and check that we haven't overrun the	 * output size allowed by the strategy.	 */	*ctrlp = ctrlb;	result_size = bp - bstart;	if (result_size >= result_max)		return false;	/*	 * Success - need only fill in the actual length of the compressed datum.	 */	SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));	return true;}/* ---------- * pglz_decompress - * *		Decompresses source into dest. * ---------- */voidpglz_decompress(const PGLZ_Header *source, char *dest){	const unsigned char *sp;	const unsigned char *srcend;	unsigned char *dp;	unsigned char *destend;	sp = ((const unsigned char *) source) + sizeof(PGLZ_Header);	srcend = ((const unsigned char *) source) + VARSIZE(source);	dp = (unsigned char *) dest;	destend = dp + source->rawsize;	while (sp < srcend && dp < destend)	{		/*		 * Read one control byte and process the next 8 items (or as many		 * as remain in the compressed input).		 */		unsigned char ctrl = *sp++;		int		ctrlc;		for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++)		{			if (ctrl & 1)			{				/*				 * Otherwise it contains the match length minus 3 and the				 * upper 4 bits of the offset. The next following byte				 * contains the lower 8 bits of the offset. If the length is				 * coded as 18, another extension tag byte tells how much				 * longer the match really was (0-255).				 */				int32		len;				int32		off;				len = (sp[0] & 0x0f) + 3;				off = ((sp[0] & 0xf0) << 4) | sp[1];				sp += 2;				if (len == 18)					len += *sp++;				/*				 * Check for output buffer overrun, to ensure we don't				 * clobber memory in case of corrupt input.  Note: we must				 * advance dp here to ensure the error is detected below				 * the loop.  We don't simply put the elog inside the loop				 * since that will probably interfere with optimization.				 */				if (dp + len > destend)				{					dp += len;					break;				}				/*				 * Now we copy the bytes specified by the tag from OUTPUT to				 * OUTPUT. It is dangerous and platform dependent to use				 * memcpy() here, because the copied areas could overlap				 * extremely!				 */				while (len--)				{					*dp = dp[-off];					dp++;				}			}			else			{				/*				 * An unset control bit means LITERAL BYTE. So we just copy				 * one from INPUT to OUTPUT.				 */				if (dp >= destend)	/* check for buffer overrun */					break;			/* do not clobber memory */				*dp++ = *sp++;			}			/*			 * Advance the control bit			 */			ctrl >>= 1;		}	}	/*	 * Check we decompressed the right amount.	 */	if (dp != destend || sp != srcend)		elog(ERROR, "compressed data is corrupt");	/*	 * That's it.	 */}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?