📄 icerevisitedradix.cpp
字号:
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Contains source code from the article "Radix Sort Revisited".
* \file IceRevisitedRadix.cpp
* \author Pierre Terdiman
* \date April, 4, 2000
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Revisited Radix Sort.
* This is my new radix routine:
* - it uses indices and doesn't recopy the values anymore, hence wasting less ram
* - it creates all the histograms in one run instead of four
* - it sorts words faster than dwords and bytes faster than words
* - it correctly sorts negative floating-point values by patching the offsets
* - it automatically takes advantage of temporal coherence
* - multiple keys support is a side effect of temporal coherence
* - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
*
* History:
* - 08.15.98: very first version
* - 04.04.00: recoded for the radix article
* - 12.xx.00: code lifting
* - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
* - 10.11.01: added local ram support
* - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
* - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all.
* - ranks are not "reset" anymore, but implicit on first calls
* - 07.05.02: - offsets rewritten with one less indirection.
* - 11.03.02: - "bool" replaced with RadixHint enum
*
* \class RadixSort
* \author Pierre Terdiman
* \version 1.4
* \date August, 15, 1998
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/*
To do:
- add an offset parameter between two input values (avoid some data recopy sometimes)
- unroll ? asm ?
- 11 bits trick & 3 passes as Michael did
- prefetch stuff the day I have a P3
- make a version with 16-bits indices ?
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Precompiled Header
#include "Opcode/Stdafx.h"
using namespace IceCore;
#define INVALIDATE_RANKS mCurrentSize|=0x80000000
#define VALIDATE_RANKS mCurrentSize&=0x7fffffff
#define CURRENT_SIZE (mCurrentSize&0x7fffffff)
#define INVALID_RANKS (mCurrentSize&0x80000000)
#define CHECK_RESIZE(n) \
if(n!=mPreviousSize) \
{ \
if(n>mCurrentSize) Resize(n); \
else ResetRanks(); \
mPreviousSize = n; \
}
#define CREATE_HISTOGRAMS(type, buffer) \
/* Clear counters/histograms */ \
ZeroMemory(mHistogram, 256*4*sizeof(udword)); \
\
/* Prepare to count */ \
ubyte* p = (ubyte*)input; \
ubyte* pe = &p[nb*4]; \
udword* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \
udword* h1= &mHistogram[256]; /* Histogram for second pass */ \
udword* h2= &mHistogram[512]; /* Histogram for third pass */ \
udword* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \
\
bool AlreadySorted = true; /* Optimism... */ \
\
if(INVALID_RANKS) \
{ \
/* Prepare for temporal coherence */ \
type* Running = (type*)buffer; \
type PrevVal = *Running; \
\
while(p!=pe) \
{ \
/* Read input buffer in previous sorted order */ \
type Val = *Running++; \
/* Check whether already sorted or not */ \
if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \
/* Update for next iteration */ \
PrevVal = Val; \
\
/* Create histograms */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
} \
\
/* If all input values are already sorted, we just have to return and leave the */ \
/* previous list unchanged. That way the routine may take advantage of temporal */ \
/* coherence, for example when used to sort transparent faces. */ \
if(AlreadySorted) \
{ \
mNbHits++; \
for(udword i=0;i<nb;i++) mRanks[i] = i; \
return *this; \
} \
} \
else \
{ \
/* Prepare for temporal coherence */ \
udword* Indices = mRanks; \
type PrevVal = (type)buffer[*Indices]; \
\
while(p!=pe) \
{ \
/* Read input buffer in previous sorted order */ \
type Val = (type)buffer[*Indices++]; \
/* Check whether already sorted or not */ \
if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \
/* Update for next iteration */ \
PrevVal = Val; \
\
/* Create histograms */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
} \
\
/* If all input values are already sorted, we just have to return and leave the */ \
/* previous list unchanged. That way the routine may take advantage of temporal */ \
/* coherence, for example when used to sort transparent faces. */ \
if(AlreadySorted) { mNbHits++; return *this; } \
} \
\
/* Else there has been an early out and we must finish computing the histograms */ \
while(p!=pe) \
{ \
/* Create histograms without the previous overhead */ \
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
}
#define CHECK_PASS_VALIDITY(pass) \
/* Shortcut to current counters */ \
udword* CurCount = &mHistogram[pass<<8]; \
\
/* Reset flag. The sorting pass is supposed to be performed. (default) */ \
bool PerformPass = true; \
\
/* Check pass validity */ \
\
/* If all values have the same byte, sorting is useless. */ \
/* It may happen when sorting bytes or words instead of dwords. */ \
/* This routine actually sorts words faster than dwords, and bytes */ \
/* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
/* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
\
/* Get first byte */ \
ubyte UniqueVal = *(((ubyte*)input)+pass); \
\
/* Check that byte's counter */ \
if(CurCount[UniqueVal]==nb) PerformPass=false;
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Constructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0)
{
#ifndef RADIX_LOCAL_RAM
// Allocate input-independent ram
mHistogram = new udword[256*4];
mOffset = new udword[256];
#endif
// Initialize indices
INVALIDATE_RANKS;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Destructor.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort::~RadixSort()
{
// Release everything
#ifndef RADIX_LOCAL_RAM
DELETEARRAY(mOffset);
DELETEARRAY(mHistogram);
#endif
DELETEARRAY(mRanks2);
DELETEARRAY(mRanks);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Resizes the inner lists.
* \param nb [in] new size (number of dwords)
* \return true if success
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
bool RadixSort::Resize(udword nb)
{
// Free previously used ram
DELETEARRAY(mRanks2);
DELETEARRAY(mRanks);
// Get some fresh one
mRanks = new udword[nb]; CHECKALLOC(mRanks);
mRanks2 = new udword[nb]; CHECKALLOC(mRanks2);
return true;
}
inline_ void RadixSort::CheckResize(udword nb)
{
udword CurSize = CURRENT_SIZE;
if(nb!=CurSize)
{
if(nb>CurSize) Resize(nb);
mCurrentSize = nb;
INVALIDATE_RANKS;
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Main sort routine.
* This one is for integer values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data.
* \param input [in] a list of integer values to sort
* \param nb [in] number of values to sort, must be < 2^31
* \param hint [in] RADIX_SIGNED to handle negative values, RADIX_UNSIGNED if you know your input buffer only contains positive values
* \return Self-Reference
*/
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint)
{
// Checkings
if(!input || !nb || nb&0x80000000) return *this;
// Stats
mTotalCalls++;
// Resize lists if needed
CheckResize(nb);
#ifdef RADIX_LOCAL_RAM
// Allocate histograms & offsets on the stack
udword mHistogram[256*4];
// udword mOffset[256];
udword* mLink[256];
#endif
// Create histograms (counters). Counters for all passes are created in one run.
// Pros: read input buffer once instead of four times
// Cons: mHistogram is 4Kb instead of 1Kb
// We must take care of signed/unsigned values for temporal coherence.... I just
// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
if(hint==RADIX_UNSIGNED) { CREATE_HISTOGRAMS(udword, input); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -