blocksort.c

来自「最新的busybox源码」· C语言代码 · 共 1,073 行 · 第 1/2 页
1,073 行
/* * bzip2 is written by Julian Seward <jseward@bzip.org>. * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>. * See README and LICENSE files in this directory for more information. *//*-------------------------------------------------------------*//*--- Block sorting machinery                               ---*//*---                                           blocksort.c ---*//*-------------------------------------------------------------*//* ------------------------------------------------------------------This file is part of bzip2/libbzip2, a program and library forlossless, block-sorting data compression.bzip2/libbzip2 version 1.0.4 of 20 December 2006Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>Please read the WARNING, DISCLAIMER and PATENTS sections in theREADME file.This program is released under the terms of the license containedin the file LICENSE.------------------------------------------------------------------ *//* #include "bzlib_private.h" */#define mswap(zz1, zz2) \{ \	int32_t zztmp = zz1; \	zz1 = zz2; \	zz2 = zztmp; \}static/* No measurable speed gain with inlining *//* ALWAYS_INLINE */void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn){	while (zzn > 0) {		mswap(ptr[zzp1], ptr[zzp2]);		zzp1++;		zzp2++;		zzn--;	}}staticALWAYS_INLINEint32_t mmin(int32_t a, int32_t b){	return (a < b) ? a : b;}/*---------------------------------------------*//*--- Fallback O(N log(N)^2) sorting        ---*//*--- algorithm, for repetitive blocks      ---*//*---------------------------------------------*//*---------------------------------------------*/staticinlinevoid fallbackSimpleSort(uint32_t* fmap,		uint32_t* eclass,		int32_t   lo,		int32_t   hi){	int32_t i, j, tmp;	uint32_t ec_tmp;	if (lo == hi) return;	if (hi - lo > 3) {		for (i = hi-4; i >= lo; i--) {			tmp = fmap[i];			ec_tmp = eclass[tmp];			for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)				fmap[j-4] = fmap[j];			fmap[j-4] = tmp;		}	}	for (i = hi-1; i >= lo; i--) {		tmp = fmap[i];		ec_tmp = eclass[tmp];		for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)			fmap[j-1] = fmap[j];		fmap[j-1] = tmp;	}}/*---------------------------------------------*/#define fpush(lz,hz) { \	stackLo[sp] = lz; \	stackHi[sp] = hz; \	sp++; \}#define fpop(lz,hz) { \	sp--; \	lz = stackLo[sp]; \	hz = stackHi[sp]; \}#define FALLBACK_QSORT_SMALL_THRESH 10#define FALLBACK_QSORT_STACK_SIZE   100staticvoid fallbackQSort3(uint32_t* fmap,		uint32_t* eclass,		int32_t   loSt,		int32_t   hiSt){	int32_t unLo, unHi, ltLo, gtHi, n, m;	int32_t sp, lo, hi;	uint32_t med, r, r3;	int32_t stackLo[FALLBACK_QSORT_STACK_SIZE];	int32_t stackHi[FALLBACK_QSORT_STACK_SIZE];	r = 0;	sp = 0;	fpush(loSt, hiSt);	while (sp > 0) {		AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);		fpop(lo, hi);		if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {			fallbackSimpleSort(fmap, eclass, lo, hi);			continue;		}		/* Random partitioning.  Median of 3 sometimes fails to		 * avoid bad cases.  Median of 9 seems to help but		 * looks rather expensive.  This too seems to work but		 * is cheaper.  Guidance for the magic constants		 * 7621 and 32768 is taken from Sedgewick's algorithms		 * book, chapter 35.		 */		r = ((r * 7621) + 1) % 32768;		r3 = r % 3;		if (r3 == 0)			med = eclass[fmap[lo]];		else if (r3 == 1)			med = eclass[fmap[(lo+hi)>>1]];		else			med = eclass[fmap[hi]];		unLo = ltLo = lo;		unHi = gtHi = hi;		while (1) {			while (1) {				if (unLo > unHi) break;				n = (int32_t)eclass[fmap[unLo]] - (int32_t)med;				if (n == 0) {					mswap(fmap[unLo], fmap[ltLo]);					ltLo++;					unLo++;					continue;				};				if (n > 0) break;				unLo++;			}			while (1) {				if (unLo > unHi) break;				n = (int32_t)eclass[fmap[unHi]] - (int32_t)med;				if (n == 0) {					mswap(fmap[unHi], fmap[gtHi]);					gtHi--; unHi--;					continue;				};				if (n < 0) break;				unHi--;			}			if (unLo > unHi) break;			mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;		}		AssertD(unHi == unLo-1, "fallbackQSort3(2)");		if (gtHi < ltLo) continue;		n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n);		m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m);		n = lo + unLo - ltLo - 1;		m = hi - (gtHi - unHi) + 1;		if (n - lo > hi - m) {			fpush(lo, n);			fpush(m, hi);		} else {			fpush(m, hi);			fpush(lo, n);		}	}}#undef fpush#undef fpop#undef FALLBACK_QSORT_SMALL_THRESH#undef FALLBACK_QSORT_STACK_SIZE/*---------------------------------------------*//* Pre: *	nblock > 0 *	eclass exists for [0 .. nblock-1] *	((uint8_t*)eclass) [0 .. nblock-1] holds block *	ptr exists for [0 .. nblock-1] * * Post: *	((uint8_t*)eclass) [0 .. nblock-1] holds block *	All other areas of eclass destroyed *	fmap [0 .. nblock-1] holds sorted order *	bhtab[0 .. 2+(nblock/32)] destroyed*/#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))#define      WORD_BH(zz)  bhtab[(zz) >> 5]#define UNALIGNED_BH(zz)  ((zz) & 0x01f)staticvoid fallbackSort(uint32_t* fmap,		uint32_t* eclass,		uint32_t* bhtab,		int32_t   nblock){	int32_t ftab[257];	int32_t ftabCopy[256];	int32_t H, i, j, k, l, r, cc, cc1;	int32_t nNotDone;	int32_t nBhtab;	uint8_t* eclass8 = (uint8_t*)eclass;	/*	 * Initial 1-char radix sort to generate	 * initial fmap and initial BH bits.	 */	for (i = 0; i < 257;    i++) ftab[i] = 0;	for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;	for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];	j = ftab[0];  /* bbox: optimized */	for (i = 1; i < 257;    i++) {		j += ftab[i];		ftab[i] = j;	}	for (i = 0; i < nblock; i++) {		j = eclass8[i];		k = ftab[j] - 1;		ftab[j] = k;		fmap[k] = i;	}	nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */	for (i = 0; i < nBhtab; i++) bhtab[i] = 0;	for (i = 0; i < 256; i++) SET_BH(ftab[i]);	/*	 * Inductively refine the buckets.  Kind-of an	 * "exponential radix sort" (!), inspired by the	 * Manber-Myers suffix array construction algorithm.	 */	/*-- set sentinel bits for block-end detection --*/	for (i = 0; i < 32; i++) {		SET_BH(nblock + 2*i);		CLEAR_BH(nblock + 2*i + 1);	}	/*-- the log(N) loop --*/	H = 1;	while (1) {		j = 0;		for (i = 0; i < nblock; i++) {			if (ISSET_BH(i))				j = i;			k = fmap[i] - H;			if (k < 0)				k += nblock;			eclass[k] = j;		}		nNotDone = 0;		r = -1;		while (1) {		/*-- find the next non-singleton bucket --*/			k = r + 1;			while (ISSET_BH(k) && UNALIGNED_BH(k))				k++;			if (ISSET_BH(k)) {				while (WORD_BH(k) == 0xffffffff) k += 32;				while (ISSET_BH(k)) k++;			}			l = k - 1;			if (l >= nblock)				break;			while (!ISSET_BH(k) && UNALIGNED_BH(k))				k++;			if (!ISSET_BH(k)) {				while (WORD_BH(k) == 0x00000000) k += 32;				while (!ISSET_BH(k)) k++;			}			r = k - 1;			if (r >= nblock)				break;			/*-- now [l, r] bracket current bucket --*/			if (r > l) {				nNotDone += (r - l + 1);				fallbackQSort3(fmap, eclass, l, r);				/*-- scan bucket and generate header bits-- */				cc = -1;				for (i = l; i <= r; i++) {					cc1 = eclass[fmap[i]];					if (cc != cc1) {						SET_BH(i);						cc = cc1;					};				}			}		}		H *= 2;		if (H > nblock || nNotDone == 0)			break;	}	/*	 * Reconstruct the original block in	 * eclass8 [0 .. nblock-1], since the	 * previous phase destroyed it.	 */	j = 0;	for (i = 0; i < nblock; i++) {		while (ftabCopy[j] == 0)			j++;		ftabCopy[j]--;		eclass8[fmap[i]] = (uint8_t)j;	}	AssertH(j < 256, 1005);}#undef       SET_BH#undef     CLEAR_BH#undef     ISSET_BH#undef      WORD_BH#undef UNALIGNED_BH/*---------------------------------------------*//*--- The main, O(N^2 log(N)) sorting       ---*//*--- algorithm.  Faster for "normal"       ---*//*--- non-repetitive blocks.                ---*//*---------------------------------------------*//*---------------------------------------------*/staticNOINLINEint mainGtU(		uint32_t  i1,		uint32_t  i2,		uint8_t*  block,		uint16_t* quadrant,		uint32_t  nblock,		int32_t*  budget){	int32_t  k;	uint8_t  c1, c2;	uint16_t s1, s2;/* Loop unrolling here is actually very useful * (generated code is much simpler), * code size increase is only 270 bytes (i386) * but speeds up compression 10% overall */#if CONFIG_BZIP2_FEATURE_SPEED >= 1#define TIMES_8(code) \	code; code; code; code; \	code; code; code; code;#define TIMES_12(code) \	code; code; code; code; \	code; code; code; code; \	code; code; code; code;#else#define TIMES_8(code) \{ \	int nn = 8; \	do { \		code; \	} while (--nn); \}#define TIMES_12(code) \{ \	int nn = 12; \	do { \		code; \	} while (--nn); \}#endif	AssertD(i1 != i2, "mainGtU");	TIMES_12(		c1 = block[i1]; c2 = block[i2];		if (c1 != c2) return (c1 > c2);		i1++; i2++;	)	k = nblock + 8;	do {		TIMES_8(			c1 = block[i1]; c2 = block[i2];			if (c1 != c2) return (c1 > c2);			s1 = quadrant[i1]; s2 = quadrant[i2];			if (s1 != s2) return (s1 > s2);			i1++; i2++;		)		if (i1 >= nblock) i1 -= nblock;		if (i2 >= nblock) i2 -= nblock;		(*budget)--;		k -= 8;	} while (k >= 0);	return False;}#undef TIMES_8#undef TIMES_12/*---------------------------------------------*//* * Knuth's increments seem to work better * than Incerpi-Sedgewick here.  Possibly * because the number of elems to sort is * usually small, typically <= 20. */staticconst int32_t incs[14] = {	1, 4, 13, 40, 121, 364, 1093, 3280,	9841, 29524, 88573, 265720,	797161, 2391484};staticvoid mainSimpleSort(uint32_t* ptr,		uint8_t*  block,		uint16_t* quadrant,		int32_t   nblock,		int32_t   lo,		int32_t   hi,		int32_t   d,		int32_t*  budget){	int32_t i, j, h, bigN, hp;	uint32_t v;	bigN = hi - lo + 1;	if (bigN < 2) return;	hp = 0;	while (incs[hp] < bigN) hp++;	hp--;	for (; hp >= 0; hp--) {		h = incs[hp];		i = lo + h;		while (1) {			/*-- copy 1 --*/			if (i > hi) break;			v = ptr[i];			j = i;			while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {				ptr[j] = ptr[j-h];				j = j - h;				if (j <= (lo + h - 1)) break;			}			ptr[j] = v;			i++;/* 1.5% overall speedup, +290 bytes */#if CONFIG_BZIP2_FEATURE_SPEED >= 3			/*-- copy 2 --*/			if (i > hi) break;			v = ptr[i];			j = i;			while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {				ptr[j] = ptr[j-h];				j = j - h;				if (j <= (lo + h - 1)) break;			}			ptr[j] = v;			i++;			/*-- copy 3 --*/			if (i > hi) break;			v = ptr[i];			j = i;			while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) {				ptr[j] = ptr[j-h];				j = j - h;				if (j <= (lo + h - 1)) break;			}			ptr[j] = v;			i++;#endif			if (*budget < 0) return;		}	}}/*---------------------------------------------*//* * The following is an implementation of * an elegant 3-way quicksort for strings, * described in a paper "Fast Algorithms for * Sorting and Searching Strings", by Robert * Sedgewick and Jon L. Bentley. */
blocksort.c - 源码说明

本页面展示了「最新的busybox源码」中的 blocksort.c 源码文件，采用 C语言编程语言编写，共 1,073 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与busybox相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?