📄 blocksort.c
字号:
/* * bzip2 is written by Julian Seward <jseward@bzip.org>. * Adapted for busybox by Denys Vlasenko <vda.linux@googlemail.com>. * See README and LICENSE files in this directory for more information. *//*-------------------------------------------------------------*//*--- Block sorting machinery ---*//*--- blocksort.c ---*//*-------------------------------------------------------------*//* ------------------------------------------------------------------This file is part of bzip2/libbzip2, a program and library forlossless, block-sorting data compression.bzip2/libbzip2 version 1.0.4 of 20 December 2006Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>Please read the WARNING, DISCLAIMER and PATENTS sections in theREADME file.This program is released under the terms of the license containedin the file LICENSE.------------------------------------------------------------------ *//* #include "bzlib_private.h" */#define mswap(zz1, zz2) \{ \ int32_t zztmp = zz1; \ zz1 = zz2; \ zz2 = zztmp; \}static/* No measurable speed gain with inlining *//* ALWAYS_INLINE */void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn){ while (zzn > 0) { mswap(ptr[zzp1], ptr[zzp2]); zzp1++; zzp2++; zzn--; }}staticALWAYS_INLINEint32_t mmin(int32_t a, int32_t b){ return (a < b) ? a : b;}/*---------------------------------------------*//*--- Fallback O(N log(N)^2) sorting ---*//*--- algorithm, for repetitive blocks ---*//*---------------------------------------------*//*---------------------------------------------*/staticinlinevoid fallbackSimpleSort(uint32_t* fmap, uint32_t* eclass, int32_t lo, int32_t hi){ int32_t i, j, tmp; uint32_t ec_tmp; if (lo == hi) return; if (hi - lo > 3) { for (i = hi-4; i >= lo; i--) { tmp = fmap[i]; ec_tmp = eclass[tmp]; for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) fmap[j-4] = fmap[j]; fmap[j-4] = tmp; } } for (i = hi-1; i >= lo; i--) { tmp = fmap[i]; ec_tmp = eclass[tmp]; for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) fmap[j-1] = fmap[j]; fmap[j-1] = tmp; }}/*---------------------------------------------*/#define fpush(lz,hz) { \ stackLo[sp] = lz; \ stackHi[sp] = hz; \ sp++; \}#define fpop(lz,hz) { \ sp--; \ lz = stackLo[sp]; \ hz = stackHi[sp]; \}#define FALLBACK_QSORT_SMALL_THRESH 10#define FALLBACK_QSORT_STACK_SIZE 100staticvoid fallbackQSort3(uint32_t* fmap, uint32_t* eclass, int32_t loSt, int32_t hiSt){ int32_t unLo, unHi, ltLo, gtHi, n, m; int32_t sp, lo, hi; uint32_t med, r, r3; int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; r = 0; sp = 0; fpush(loSt, hiSt); while (sp > 0) { AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); fpop(lo, hi); if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { fallbackSimpleSort(fmap, eclass, lo, hi); continue; } /* Random partitioning. Median of 3 sometimes fails to * avoid bad cases. Median of 9 seems to help but * looks rather expensive. This too seems to work but * is cheaper. Guidance for the magic constants * 7621 and 32768 is taken from Sedgewick's algorithms * book, chapter 35. */ r = ((r * 7621) + 1) % 32768; r3 = r % 3; if (r3 == 0) med = eclass[fmap[lo]]; else if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else med = eclass[fmap[hi]]; unLo = ltLo = lo; unHi = gtHi = hi; while (1) { while (1) { if (unLo > unHi) break; n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; if (n == 0) { mswap(fmap[unLo], fmap[ltLo]); ltLo++; unLo++; continue; }; if (n > 0) break; unLo++; } while (1) { if (unLo > unHi) break; n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; if (n == 0) { mswap(fmap[unHi], fmap[gtHi]); gtHi--; unHi--; continue; }; if (n < 0) break; unHi--; } if (unLo > unHi) break; mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; } AssertD(unHi == unLo-1, "fallbackQSort3(2)"); if (gtHi < ltLo) continue; n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n); m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m); n = lo + unLo - ltLo - 1; m = hi - (gtHi - unHi) + 1; if (n - lo > hi - m) { fpush(lo, n); fpush(m, hi); } else { fpush(m, hi); fpush(lo, n); } }}#undef fpush#undef fpop#undef FALLBACK_QSORT_SMALL_THRESH#undef FALLBACK_QSORT_STACK_SIZE/*---------------------------------------------*//* Pre: * nblock > 0 * eclass exists for [0 .. nblock-1] * ((uint8_t*)eclass) [0 .. nblock-1] holds block * ptr exists for [0 .. nblock-1] * * Post: * ((uint8_t*)eclass) [0 .. nblock-1] holds block * All other areas of eclass destroyed * fmap [0 .. nblock-1] holds sorted order * bhtab[0 .. 2+(nblock/32)] destroyed*/#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))#define WORD_BH(zz) bhtab[(zz) >> 5]#define UNALIGNED_BH(zz) ((zz) & 0x01f)staticvoid fallbackSort(uint32_t* fmap, uint32_t* eclass, uint32_t* bhtab, int32_t nblock){ int32_t ftab[257]; int32_t ftabCopy[256]; int32_t H, i, j, k, l, r, cc, cc1; int32_t nNotDone; int32_t nBhtab; uint8_t* eclass8 = (uint8_t*)eclass; /* * Initial 1-char radix sort to generate * initial fmap and initial BH bits. */ for (i = 0; i < 257; i++) ftab[i] = 0; for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; j = ftab[0]; /* bbox: optimized */ for (i = 1; i < 257; i++) { j += ftab[i]; ftab[i] = j; } for (i = 0; i < nblock; i++) { j = eclass8[i]; k = ftab[j] - 1; ftab[j] = k; fmap[k] = i; } nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ for (i = 0; i < nBhtab; i++) bhtab[i] = 0; for (i = 0; i < 256; i++) SET_BH(ftab[i]); /* * Inductively refine the buckets. Kind-of an * "exponential radix sort" (!), inspired by the * Manber-Myers suffix array construction algorithm. */ /*-- set sentinel bits for block-end detection --*/ for (i = 0; i < 32; i++) { SET_BH(nblock + 2*i); CLEAR_BH(nblock + 2*i + 1); } /*-- the log(N) loop --*/ H = 1; while (1) { j = 0; for (i = 0; i < nblock; i++) { if (ISSET_BH(i)) j = i; k = fmap[i] - H; if (k < 0) k += nblock; eclass[k] = j; } nNotDone = 0; r = -1; while (1) { /*-- find the next non-singleton bucket --*/ k = r + 1; while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; if (ISSET_BH(k)) { while (WORD_BH(k) == 0xffffffff) k += 32; while (ISSET_BH(k)) k++; } l = k - 1; if (l >= nblock) break; while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; if (!ISSET_BH(k)) { while (WORD_BH(k) == 0x00000000) k += 32; while (!ISSET_BH(k)) k++; } r = k - 1; if (r >= nblock) break; /*-- now [l, r] bracket current bucket --*/ if (r > l) { nNotDone += (r - l + 1); fallbackQSort3(fmap, eclass, l, r); /*-- scan bucket and generate header bits-- */ cc = -1; for (i = l; i <= r; i++) { cc1 = eclass[fmap[i]]; if (cc != cc1) { SET_BH(i); cc = cc1; }; } } } H *= 2; if (H > nblock || nNotDone == 0) break; } /* * Reconstruct the original block in * eclass8 [0 .. nblock-1], since the * previous phase destroyed it. */ j = 0; for (i = 0; i < nblock; i++) { while (ftabCopy[j] == 0) j++; ftabCopy[j]--; eclass8[fmap[i]] = (uint8_t)j; } AssertH(j < 256, 1005);}#undef SET_BH#undef CLEAR_BH#undef ISSET_BH#undef WORD_BH#undef UNALIGNED_BH/*---------------------------------------------*//*--- The main, O(N^2 log(N)) sorting ---*//*--- algorithm. Faster for "normal" ---*//*--- non-repetitive blocks. ---*//*---------------------------------------------*//*---------------------------------------------*/staticNOINLINEint mainGtU( uint32_t i1, uint32_t i2, uint8_t* block, uint16_t* quadrant, uint32_t nblock, int32_t* budget){ int32_t k; uint8_t c1, c2; uint16_t s1, s2;/* Loop unrolling here is actually very useful * (generated code is much simpler), * code size increase is only 270 bytes (i386) * but speeds up compression 10% overall */#if CONFIG_BZIP2_FEATURE_SPEED >= 1#define TIMES_8(code) \ code; code; code; code; \ code; code; code; code;#define TIMES_12(code) \ code; code; code; code; \ code; code; code; code; \ code; code; code; code;#else#define TIMES_8(code) \{ \ int nn = 8; \ do { \ code; \ } while (--nn); \}#define TIMES_12(code) \{ \ int nn = 12; \ do { \ code; \ } while (--nn); \}#endif AssertD(i1 != i2, "mainGtU"); TIMES_12( c1 = block[i1]; c2 = block[i2]; if (c1 != c2) return (c1 > c2); i1++; i2++; ) k = nblock + 8; do { TIMES_8( c1 = block[i1]; c2 = block[i2]; if (c1 != c2) return (c1 > c2); s1 = quadrant[i1]; s2 = quadrant[i2]; if (s1 != s2) return (s1 > s2); i1++; i2++; ) if (i1 >= nblock) i1 -= nblock; if (i2 >= nblock) i2 -= nblock; (*budget)--; k -= 8; } while (k >= 0); return False;}#undef TIMES_8#undef TIMES_12/*---------------------------------------------*//* * Knuth's increments seem to work better * than Incerpi-Sedgewick here. Possibly * because the number of elems to sort is * usually small, typically <= 20. */staticconst int32_t incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484};staticvoid mainSimpleSort(uint32_t* ptr, uint8_t* block, uint16_t* quadrant, int32_t nblock, int32_t lo, int32_t hi, int32_t d, int32_t* budget){ int32_t i, j, h, bigN, hp; uint32_t v; bigN = hi - lo + 1; if (bigN < 2) return; hp = 0; while (incs[hp] < bigN) hp++; hp--; for (; hp >= 0; hp--) { h = incs[hp]; i = lo + h; while (1) { /*-- copy 1 --*/ if (i > hi) break; v = ptr[i]; j = i; while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { ptr[j] = ptr[j-h]; j = j - h; if (j <= (lo + h - 1)) break; } ptr[j] = v; i++;/* 1.5% overall speedup, +290 bytes */#if CONFIG_BZIP2_FEATURE_SPEED >= 3 /*-- copy 2 --*/ if (i > hi) break; v = ptr[i]; j = i; while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { ptr[j] = ptr[j-h]; j = j - h; if (j <= (lo + h - 1)) break; } ptr[j] = v; i++; /*-- copy 3 --*/ if (i > hi) break; v = ptr[i]; j = i; while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { ptr[j] = ptr[j-h]; j = j - h; if (j <= (lo + h - 1)) break; } ptr[j] = v; i++;#endif if (*budget < 0) return; } }}/*---------------------------------------------*//* * The following is an implementation of * an elegant 3-way quicksort for strings, * described in a paper "Fast Algorithms for * Sorting and Searching Strings", by Robert * Sedgewick and Jon L. Bentley. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -