📄 blocksort.c
字号:
swap(zptr[unLo], zptr[unHi]); unLo++; unHi--; } AssertD ( unHi == unLo-1, "bad termination in qSort3" ); if (gtHi < ltLo) { push(lo, hi, d+1 ); continue; } n = min(ltLo-lo, unLo-ltLo); vswap(zptr, lo, unLo-n, n); m = min(hi-gtHi, gtHi-unHi); vswap(zptr, unLo, hi-m+1, m); n = lo + unLo - ltLo - 1; m = hi - (gtHi - unHi) + 1; push ( lo, n, d ); push ( n+1, m-1, d+1 ); push ( m, hi, d ); }}/*---------------------------------------------*/#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])#define SETMASK (1 << 21)#define CLEARMASK (~(SETMASK))static void sortMain ( EState* s ){ Int32 i, j, k, ss, sb; Int32 runningOrder[256]; Int32 copy[256]; Bool bigDone[256]; UChar c1, c2; Int32 numQSorted; UChar* block = s->block; UInt32* zptr = s->zptr; UInt16* quadrant = s->quadrant; Int32* ftab = s->ftab; Int32* workDone = &(s->workDone); Int32 nblock = s->nblock; Int32 workLimit = s->workLimit; Bool firstAttempt = s->firstAttempt; /*-- In the various block-sized structures, live data runs from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area for block. --*/ if (s->verbosity >= 4) VPrintf0( " sort initialise ...\n" ); for (i = 0; i < BZ_NUM_OVERSHOOT_BYTES; i++) block[nblock+i] = block[i % nblock]; for (i = 0; i < nblock+BZ_NUM_OVERSHOOT_BYTES; i++) quadrant[i] = 0; if (nblock <= 4000) { /*-- Use simpleSort(), since the full sorting mechanism has quite a large constant overhead. --*/ if (s->verbosity >= 4) VPrintf0( " simpleSort ...\n" ); for (i = 0; i < nblock; i++) zptr[i] = i; firstAttempt = False; *workDone = workLimit = 0; simpleSort ( s, 0, nblock-1, 0 ); if (s->verbosity >= 4) VPrintf0( " simpleSort done.\n" ); } else { numQSorted = 0; for (i = 0; i <= 255; i++) bigDone[i] = False; if (s->verbosity >= 4) VPrintf0( " bucket sorting ...\n" ); for (i = 0; i <= 65536; i++) ftab[i] = 0; c1 = block[nblock-1]; for (i = 0; i < nblock; i++) { c2 = block[i]; ftab[(c1 << 8) + c2]++; c1 = c2; } for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; c1 = block[0]; for (i = 0; i < nblock-1; i++) { c2 = block[i+1]; j = (c1 << 8) + c2; c1 = c2; ftab[j]--; zptr[ftab[j]] = i; } j = (block[nblock-1] << 8) + block[0]; ftab[j]--; zptr[ftab[j]] = nblock-1; /*-- Now ftab contains the first loc of every small bucket. Calculate the running order, from smallest to largest big bucket. --*/ for (i = 0; i <= 255; i++) runningOrder[i] = i; { Int32 vv; Int32 h = 1; do h = 3 * h + 1; while (h <= 256); do { h = h / 3; for (i = h; i <= 255; i++) { vv = runningOrder[i]; j = i; while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { runningOrder[j] = runningOrder[j-h]; j = j - h; if (j <= (h - 1)) goto zero; } zero: runningOrder[j] = vv; } } while (h != 1); } /*-- The main sorting loop. --*/ for (i = 0; i <= 255; i++) { /*-- Process big buckets, starting with the least full. Basically this is a 4-step process in which we call qSort3 to sort the small buckets [ss, j], but also make a big effort to avoid the calls if we can. --*/ ss = runningOrder[i]; /*-- Step 1: Complete the big bucket [ss] by quicksorting any unsorted small buckets [ss, j], for j != ss. Hopefully previous pointer-scanning phases have already completed many of the small buckets [ss, j], so we don't have to sort them at all. --*/ for (j = 0; j <= 255; j++) { if (j != ss) { sb = (ss << 8) + j; if ( ! (ftab[sb] & SETMASK) ) { Int32 lo = ftab[sb] & CLEARMASK; Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; if (hi > lo) { if (s->verbosity >= 4) VPrintf4( " qsort [0x%x, 0x%x] done %d this %d\n", ss, j, numQSorted, hi - lo + 1 ); qSort3 ( s, lo, hi, 2 ); numQSorted += ( hi - lo + 1 ); if (*workDone > workLimit && firstAttempt) return; } } ftab[sb] |= SETMASK; } } /*-- Step 2: Deal specially with case [ss, ss]. This establishes the sorted order for [ss, ss] without any comparisons. A clever trick, cryptically described as steps Q6b and Q6c in SRC-124 (aka BW94). This makes it entirely practical to not use a preliminary run-length coder, but unfortunately we are now stuck with the .bz2 file format. --*/ { Int32 put0, get0, put1, get1; Int32 sbn = (ss << 8) + ss; Int32 lo = ftab[sbn] & CLEARMASK; Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1; UChar ssc = (UChar)ss; put0 = lo; get0 = ftab[ss << 8] & CLEARMASK; put1 = hi; get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1; while (get0 < put0) { j = zptr[get0]-1; if (j < 0) j += nblock; c1 = block[j]; if (c1 == ssc) { zptr[put0] = j; put0++; }; get0++; } while (get1 > put1) { j = zptr[get1]-1; if (j < 0) j += nblock; c1 = block[j]; if (c1 == ssc) { zptr[put1] = j; put1--; }; get1--; } ftab[sbn] |= SETMASK; } /*-- Step 3: The [ss] big bucket is now done. Record this fact, and update the quadrant descriptors. Remember to update quadrants in the overshoot area too, if necessary. The "if (i < 255)" test merely skips this updating for the last bucket processed, since updating for the last bucket is pointless. The quadrant array provides a way to incrementally cache sort orderings, as they appear, so as to make subsequent comparisons in fullGtU() complete faster. For repetitive blocks this makes a big difference (but not big enough to be able to avoid randomisation for very repetitive data.) The precise meaning is: at all times: for 0 <= i < nblock and 0 <= j <= nblock if block[i] != block[j], then the relative values of quadrant[i] and quadrant[j] are meaningless. else { if quadrant[i] < quadrant[j] then the string starting at i lexicographically precedes the string starting at j else if quadrant[i] > quadrant[j] then the string starting at j lexicographically precedes the string starting at i else the relative ordering of the strings starting at i and j has not yet been determined. } --*/ bigDone[ss] = True; if (i < 255) { Int32 bbStart = ftab[ss << 8] & CLEARMASK; Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; Int32 shifts = 0; while ((bbSize >> shifts) > 65534) shifts++; for (j = 0; j < bbSize; j++) { Int32 a2update = zptr[bbStart + j]; UInt16 qVal = (UInt16)(j >> shifts); quadrant[a2update] = qVal; if (a2update < BZ_NUM_OVERSHOOT_BYTES) quadrant[a2update + nblock] = qVal; } AssertH ( ( ((bbSize-1) >> shifts) <= 65535 ), 1002 ); } /*-- Step 4: Now scan this big bucket [ss] so as to synthesise the sorted order for small buckets [t, ss] for all t != ss. This will avoid doing Real Work in subsequent Step 1's. --*/ for (j = 0; j <= 255; j++) copy[j] = ftab[(j << 8) + ss] & CLEARMASK; for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss+1) << 8] & CLEARMASK); j++) { k = zptr[j]-1; if (k < 0) k += nblock; c1 = block[k]; if ( ! bigDone[c1] ) { zptr[copy[c1]] = k; copy[c1] ++; } } for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; } if (s->verbosity >= 4) VPrintf3( " %d pointers, %d sorted, %d scanned\n", nblock, numQSorted, nblock - numQSorted ); }}/*---------------------------------------------*/static void randomiseBlock ( EState* s ){ Int32 i; BZ_RAND_INIT_MASK; for (i = 0; i < 256; i++) s->inUse[i] = False; for (i = 0; i < s->nblock; i++) { BZ_RAND_UPD_MASK; s->block[i] ^= BZ_RAND_MASK; s->inUse[s->block[i]] = True; }}/*---------------------------------------------*/void blockSort ( EState* s ){ Int32 i; s->workLimit = s->workFactor * (s->nblock - 1); s->workDone = 0; s->blockRandomised = False; s->firstAttempt = True; sortMain ( s ); if (s->verbosity >= 3) VPrintf3( " %d work, %d block, ratio %5.2f\n", s->workDone, s->nblock-1, (float)(s->workDone) / (float)(s->nblock-1) ); if (s->workDone > s->workLimit && s->firstAttempt) { if (s->verbosity >= 2) VPrintf0( " sorting aborted; randomising block\n" ); randomiseBlock ( s ); s->workLimit = s->workDone = 0; s->blockRandomised = True; s->firstAttempt = False; sortMain ( s ); if (s->verbosity >= 3) VPrintf3( " %d work, %d block, ratio %f\n", s->workDone, s->nblock-1, (float)(s->workDone) / (float)(s->nblock-1) ); } s->origPtr = -1; for (i = 0; i < s->nblock; i++) if (s->zptr[i] == 0) { s->origPtr = i; break; }; AssertH( s->origPtr != -1, 1003 );}/*-------------------------------------------------------------*//*--- end blocksort.c ---*//*-------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -