⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xmergesort.h

📁 归并排序的并行算法 intel 多线程优化大赛
💻 H
字号:
/*/////////////////////////////////////////
// 版权所有(C)  2000-2008 邓辉           //
// Email:      denghui0815@hotmail.com  //
// 说明:       Intel线程优化大赛参赛作品//
/////////////////////////////////////////*/

#ifndef XMERGESORT_HEAD
#define XMERGESORT_HEAD

#include "XSortPub.h"

#define XUSE_MACRO_MERGE

enum enXDataSideMode
{
	XDATA_IN_BACK  = 0,
	XDATA_IN_FRONT,
};

//合并_函数版本
template<class Type>
__inline static void XMerge(Type* pSrc1, Type* pSrc2, uint32 nMid, uint32 nSize, BYTE bModeA, BYTE bModeB)
{
	const Type *pBeg1,*pEnd1,*pBeg2,*pEnd2;
	Type *pDst;
	// bModeA为XDATA_IN_FRONT  [0,    nMid )的数据在pSrc1中 否则数据在pSrc2中
	// bModeB为XDATA_IN_FRONT  [nMid, nSize)的数据在pSrc1中 否则数据在pSrc2中
	// 根据bModeA和bModeB决定由pSrc1排序到pSrc2 还是由pSrc2排序到pSrc1
	if(bModeA)
	{
		if(!bModeB)
		{	// 将[nMid, nSize)的数据复制到pSrc1中
			for(int i = nMid; i < nSize; ++i) pSrc1[i] = pSrc2[i];
		}
		pBeg1 = pSrc1;
		pEnd1 = pBeg2 = pSrc1 + nMid;
		pEnd2 = pSrc1 + nSize;
		pDst  = pSrc2;
	}
	else
	{
		if(bModeB)
		{   // 将[nMid, nSize)的数据复制到pSrc2中
			for(int i = nMid; i < nSize; ++i) pSrc2[i] = pSrc1[i];
		}
		pBeg1 = pSrc2;
		pEnd1 = pBeg2 = pSrc2 + nMid;
		pEnd2 = pSrc2 + nSize;
		pDst  = pSrc1;
	}

	// 判断前后两段数据的最后一个值的大小,减少判断
	if( *(pEnd1 - 1) > *(pEnd2 - 1) )
	{
		const Type *pTmpB = pBeg1; pBeg1 = pBeg2; pBeg2 = pTmpB;
		const Type *pTmpE = pEnd1; pEnd1 = pEnd2; pEnd2 = pTmpE;
	}
	// 数据合并
	while(pBeg1 < pEnd1)
	{
		if(*pBeg2 < *pBeg1)
		{
			*pDst = *pBeg2;	++pDst; ++pBeg2;
		}
		else
		{
			*pDst = *pBeg1;	++pDst; ++pBeg1;
		}
	}
	do { *pDst = *pBeg2; ++pDst; ++pBeg2; }	while(pBeg2 < pEnd2);
}

//合并_宏版本
#define XMERGE(Macro_Src1, Macro_Src2, Macro_Mid, Macro_Size, Macro_ModeA, Macro_ModeB) \
{																						\
	const Type *pBeg1,*pEnd1,*pBeg2,*pEnd2;												\
	Type* pDst;																			\
																						\
	if(Macro_ModeA)																		\
	{																					\
		if(!Macro_ModeB)																\
		{																				\
			for(int i = Macro_Mid; i < Macro_Size; ++i) Macro_Src1[i] = Macro_Src2[i];	\
		}																				\
		pBeg1 = Macro_Src1;																\
		pEnd1 = pBeg2 = Macro_Src1 + Macro_Mid;											\
		pEnd2 = Macro_Src1 + Macro_Size;												\
		pDst  = Macro_Src2;																\
	}																					\
	else																				\
	{																					\
		if(Macro_ModeB)																	\
		{																				\
			for(int i = Macro_Mid; i < Macro_Size; ++i) Macro_Src2[i] = Macro_Src1[i];	\
		}																				\
		pBeg1 = Macro_Src2;																\
		pEnd1 = pBeg2 = Macro_Src2 + Macro_Mid;											\
		pEnd2 = Macro_Src2 + Macro_Size;												\
		pDst  = Macro_Src1;																\
	}																					\
																						\
	if( *(pEnd1 - 1) > *(pEnd2 - 1) )													\
	{																					\
		const Type *pTmpB = pBeg1; pBeg1 = pBeg2; pBeg2 = pTmpB;						\
		const Type *pTmpE = pEnd1; pEnd1 = pEnd2; pEnd2 = pTmpE;						\
	}																					\
																						\
	while(pBeg1 < pEnd1)																\
	{																					\
		if(*pBeg2 < *pBeg1)																\
		{																				\
			*pDst = *pBeg2;	++pDst; ++pBeg2;											\
		}																				\
		else																			\
		{																				\
			*pDst = *pBeg1;	++pDst; ++pBeg1;											\
		}																				\
	}																					\
	do { *pDst = *pBeg2; ++pDst; ++pBeg2; }	while(pBeg2 < pEnd2); 						\
}

//合并_宏版本 数据由Macro_Src合并到Macro_Dst
#define XMERGE_EXT(Macro_Src, Macro_Dst, Macro_Mid, Macro_Size)							\
{																						\
	const Type *pBeg1,*pEnd1,*pBeg2,*pEnd2;												\
	Type* pDst = Macro_Dst;																\
																						\
	if(*(Macro_Src + Macro_Mid - 1) > *(Macro_Src + Macro_Size - 1))					\
	{																					\
		pBeg2 = Macro_Src;																\
		pEnd2 = pBeg1 = Macro_Src + Macro_Mid;											\
		pEnd1 = Macro_Src + Macro_Size;													\
	}																					\
	else																				\
	{																					\
		pBeg1 = Macro_Src;																\
		pEnd1 = pBeg2 = Macro_Src + Macro_Mid;											\
		pEnd2 = Macro_Src + Macro_Size;													\
	}																					\
																						\
	while(pBeg1 < pEnd1)																\
	{																					\
		if(*pBeg2 < *pBeg1)																\
		{																				\
			*pDst = *pBeg2;	++pDst; ++pBeg2;											\
		}																				\
		else																			\
		{																				\
			*pDst = *pBeg1;	++pDst; ++pBeg1;											\
		}																				\
	}																					\
	do { *pDst = *pBeg2; ++pDst; ++pBeg2; }	while(pBeg2 < pEnd2);						\
}

//归并排序函数_递归版本
template<class Type>
__inline static BYTE XPartition(Type* pSrc1, Type* pSrc2, uint32 nSize, BYTE bMode)
{
	BYTE    bModeA = bMode, bModeB = bMode;
	uint32  nMid =  nSize >> 1;

	if( 2 == nMid)
	{	// 数据必然在pSrc1中
		if(pSrc1[0] > pSrc1[1])
		{
			const Type t = pSrc1[0]; pSrc1[0] = pSrc1[1]; pSrc1[1] = t;
		}
	}
	else if ( 2 < nMid)
	{	// 对[0, nMid)排序
		bModeA = XPartition(pSrc1, pSrc2, nMid, bMode);
	}
	
	if( nMid + 2 == nSize )
	{ 
		// 数据必然在pSrc1中
		if(pSrc1[nMid] > pSrc1[nMid + 1])
		{
			const Type t = pSrc1[nMid]; pSrc1[nMid] = pSrc1[nMid + 1]; pSrc1[nMid + 1] = t;
		}
	}
	else if( nMid + 2 < nSize )
	{   // 对[nMid, nSize)排序
		bModeB = XPartition(pSrc1 + nMid, pSrc2 + nMid, nSize - nMid, bMode);
	}

	// 合并[0, nMid)和[nMid, m_nSize)的已排序数据
#ifdef XUSE_MACRO_MERGE
	XMERGE(pSrc1, pSrc2, nMid, nSize, bModeA, bModeB);
#else
	XMerge(pSrc1, pSrc2, nMid, nSize, bModeA, bModeB);
#endif

	return !bModeA;
}

// 归并排序函数_非递归版本
template<class Type>
__inline static BYTE XPartitionStack(Type* pSrc1, Type* pSrc2, int nSize)
{
	int i,nBeforeLen;	//合并前序列的长度
	int nAfterLen = 1;  //合并后序列的长度
	BYTE bMode = XDATA_IN_FRONT;

	// 直接展开长度为2的归并操作
	--nSize;
	for (i = 0; i < nSize; i += 2)
	{	
		if(pSrc1[i] > pSrc1[i + 1])
		{
			const Type t = pSrc1[i]; pSrc1[i] = pSrc1[i + 1]; pSrc1[i + 1] = t;
		}
	}
	++nSize;

	for (nBeforeLen = 2; nAfterLen < nSize; nBeforeLen = nAfterLen, bMode = !bMode)
	{
		//开始合并时第一个序列的起始位置下标,每次都是从0开始
		//合并后序列的长度是合并前的两倍
		nAfterLen = nBeforeLen << 1; 

		if(bMode)
		{
			nSize -= nAfterLen;
			for(i = 0; i < nSize; i += nAfterLen)
			{
				XMERGE_EXT(pSrc1 + i, pSrc2 + i, nBeforeLen, nAfterLen);
			}
			nSize += nAfterLen;

			if (i + nBeforeLen < nSize) 
			{
				XMERGE_EXT(pSrc1 + i, pSrc2 + i, nBeforeLen, nSize - i);
			}
			else
			{
				for(; i < nSize; ++i) pSrc2[i] = pSrc1[i];
			}
		}
		else
		{
			nSize -= nAfterLen;
			for(i = 0; i < nSize; i += nAfterLen)
			{
				XMERGE_EXT(pSrc2 + i, pSrc1 + i, nBeforeLen, nAfterLen);
			}
			nSize += nAfterLen;

			if (i + nBeforeLen < nSize) 
			{
				XMERGE_EXT(pSrc2 + i, pSrc1 + i, nBeforeLen, nSize - i);
			}
			else
			{
				for(; i < nSize; ++i) pSrc1[i] = pSrc2[i];
			}
		}
	}
	
	return bMode;
}

// 归并排序Task类
template<class Type>
class CXMergeSortTask: public tbb::task 
{
	Type* const m_pSrc1;
	Type* const m_pSrc2;
	uint32		m_nSize;
	BYTE* const	m_pMode;
	BYTE		m_bModeA;
	BYTE		m_bModeB;
	BOOL		m_bIsContinuation;
public:
	CXMergeSortTask( Type* pSrc1, Type* pSrc2, uint32 nSize, BYTE* pMode)
		: m_pSrc1(pSrc1), m_pSrc2(pSrc2), m_nSize(nSize), m_pMode(pMode), 
		m_bModeA(*m_pMode), m_bModeB(*m_pMode), m_bIsContinuation(FALSE) { }

	tbb::task* execute() 
	{
		tbb::task	*pNextA = NULL,*pNextB = NULL;

		if(m_nSize < 1024 * 32)
		{	
			// 如果需要排序的数组小于1024 * 32直接调用递归归并排序
			//*m_pMode = XPartition(m_pSrc1, m_pSrc2, m_nSize, *m_pMode);
			// 如果需要排序的数组小于1024 * 32直接调用非递归归并排序
			*m_pMode = XPartitionStack<Type>(m_pSrc1, m_pSrc2, (int)m_nSize);
		}
		else
		{
			const uint32 nMid = m_nSize >> 1;

			if( !m_bIsContinuation ) 
			{	
				// 分配两个Task进行[0, nMid)和[nMid, m_nSize)的排序
				pNextA = new( allocate_child() ) CXMergeSortTask(m_pSrc1, m_pSrc2, nMid, &m_bModeA);

				pNextB = new( allocate_child() ) CXMergeSortTask(m_pSrc1 + nMid, m_pSrc2 + nMid, m_nSize - nMid, &m_bModeB);
				
				m_bIsContinuation = TRUE;

				recycle_as_continuation();

				set_ref_count(2);

				spawn(*pNextB);
			}
			else
			{
				// 合并[0, nMid)和[nMid, m_nSize)的已排序数据
#ifdef XUSE_MACRO_MERGE
				XMERGE(m_pSrc1, m_pSrc2, nMid, m_nSize, m_bModeA, m_bModeB);
#else
				XMerge(m_pSrc1, m_pSrc2, nMid, m_nSize, m_bModeA, m_bModeB);
#endif
				*m_pMode = !m_bModeA;
			}
		}

		return pNextA;
	}
};

//合并排序_串行版本
template<class Type>
void XMergeSortSerial(Type* pSrc, uint32 nSize)
{
	if(nSize == 2)
	{
		if(pSrc[0] > pSrc[1])
		{
			const Type t = pSrc[0]; pSrc[0] = pSrc[1]; pSrc[1] = t;
		}
	}
	else if(nSize > 2)
	{
		// 分配临时空间
		Type* pTmp = (Type*)scalable_malloc(sizeof(pSrc[0]) * nSize);

		// 进行归并排序
		if(!XPartition(pSrc, pTmp, nSize, XDATA_IN_FRONT))
		{
			// 如果最终数据存放于pTmp中将其复制到pSrc中
			XMEMCOPY(pTmp, pSrc, nSize * sizeof(pSrc[0]));
		}

		// 释放临时空间
		scalable_free(pTmp);
	}
}

//合并排序_并行版本
template<class Type>
void XMergeSortParallel(Type* pSrc, uint32 nSize) 
{
	if(nSize == 2)
	{
		if(pSrc[0] > pSrc[1])
		{
			const Type t = pSrc[0]; pSrc[0] = pSrc[1]; pSrc[1] = t;
		}
	}
	else if(nSize > 2)
	{
		BYTE bMode = XDATA_IN_FRONT;

		// 分配临时空间
		Type* pTmp = (Type*)scalable_malloc(sizeof(pSrc[0]) * nSize);
		
		// 进行归并排序
		CXMergeSortTask<Type>& xtask = *new(tbb::task::allocate_root()) CXMergeSortTask<Type>(pSrc, pTmp, nSize, &bMode);

		tbb::task::spawn_root_and_wait(xtask);

		if(!bMode)
		{
			// 如果最终数据存放于pTmp中将其复制到pSrc中
			XMEMCOPY(pTmp, pSrc, nSize * sizeof(pSrc[0]));
		}

		// 释放临时空间
		scalable_free(pTmp);
	}
}


/*
#include "XFileMap.h"

//合并排序+IO_并行版本task类
template<class Type>
class CXMergeSort_IO_Task: public tbb::task 
{
	XFILEMAPHANDLE		m_hFileMap;
	uint32		m_nFileSize;
	uint32		m_nPosBeg;
	uint32		m_nPosEnd;
	XSortArray** m_ppAry;
	XSortArray*	m_pAryTmp;
	BOOL		m_bIsContinuation;
public:
	CXMergeSort_IO_Task( XFILEMAPHANDLE hFileMap, uint32 nFileSize, uint32 nPosBeg, uint32 nPosEnd, XSortArray** ppAry)
		: m_hFileMap(hFileMap), m_nFileSize(nFileSize), 
		m_nPosBeg(nPosBeg), m_nPosEnd(nPosEnd), 
		m_ppAry(ppAry), m_pAryTmp(NULL), m_bIsContinuation(FALSE) { }

	__inline static void XMergeAry(XSortArray* pSrc1, XSortArray* pSrc2)
	{
		int nDataCnt = pSrc1->nData + pSrc2->nData;
		XSortArrayResize(pSrc1, nDataCnt);
		XSortArrayResize(pSrc2, nDataCnt);
		XMEMCOPY(pSrc1->pData, pSrc2->pData + pSrc2->nData, pSrc1->nData * sizeof(pSrc1->pData[0]));
		XMerge(pSrc1->pData, pSrc2->pData, pSrc2->nData, nDataCnt, FALSE, FALSE);
		pSrc1->nData = nDataCnt;
	}

	XSortArray* XLoadAndSort(XFILEMAPHANDLE hFileMap, uint32 nFileSize, uint32 nPosBeg, uint32 nPosEnd)
	{
		XSortArray* pArray  = XSortArrayCreate((nPosEnd - nPosBeg) / 6);
		XSortArray* pBuffer = NULL;

		const uint32 nMapSize = min(nPosEnd + 12, nFileSize) - nPosBeg;
		const char *pBeg = (const char*)XFileMapView(hFileMap, XFILEMAP_READ, nPosBeg, nMapSize);
		if(pBeg == XMAPMEMINVALID) Error("XFileMapView Error!");

		// 定位当前块的数据尾
		const char *pEnd = pBeg + nPosEnd - nPosBeg;
		const char *pTmp = pBeg + nMapSize;

		while(pEnd < pTmp && *pEnd != '\n') ++pEnd;

		// 定位当前块的数据头
		pTmp = pBeg;
		while(pTmp < pEnd && *pTmp != '\n') ++pTmp;
		while(pTmp < pEnd && (*pTmp < '0' || *pTmp > '9')) ++pTmp;

		// 开始读取数据
		uint32  nRead = 0;

		while(pTmp < pEnd) 
		{
			XREADNUM(pTmp, pEnd, pArray->pData[nRead])
			// 如果数据多于Array大小 扩展空间
			if(++nRead == pArray->nSize) XSortArrayResize(pArray, nRead * 5 / 4);
		}

		pBuffer = XSortArrayCreate(nRead);
		pArray->nData = pBuffer->nData = nRead;

		BOOL bMode = XPartition(pArray->pData, pBuffer->pData, pArray->nData);

		// 关闭映射
		XFileMapUnView((void*)pBeg, nMapSize);

		if(bMode) 
		{
			XSortArrayDestroy(&pBuffer);
			return pArray;
		}
		else
		{
			XSortArrayDestroy(&pArray);
			return pBuffer;
		}
	}

	tbb::task* execute() 
	{
		tbb::task	*pNextA = NULL, *pNextB = NULL;

		if(m_nPosEnd - m_nPosBeg <  1 << 20)
		{
			*m_ppAry = XLoadAndSort(m_hFileMap, m_nFileSize, m_nPosBeg, m_nPosEnd);
		}
		else
		{
			uint32 nPosMid = (m_nPosEnd + m_nPosBeg) >> 17 << 16;

			if( !m_bIsContinuation ) 
			{	
				pNextA = new( allocate_child() ) CXMergeSort_IO_Task(m_hFileMap, m_nFileSize, m_nPosBeg, nPosMid, m_ppAry);

				pNextB = new( allocate_child() ) CXMergeSort_IO_Task(m_hFileMap, m_nFileSize, nPosMid, m_nPosEnd, &m_pAryTmp);

				m_bIsContinuation = TRUE;

				recycle_as_continuation();

				set_ref_count(2);

				spawn(*pNextB);
			}
			else
			{
				XMergeAry(*m_ppAry, m_pAryTmp);
				XSortArrayDestroy(&m_pAryTmp);
			}
		}

		XSortArrayDestroy(&m_pAryTmp);

		return pNextA;
	}
};


//合并排序+IO_并行版本
template<class Type>
XSortArray* XMergeSortParallel_IO(const char* szFile) 
{
	XSortArray* pArray = NULL;
	
	uint32  nFileSize = 0;

	XFILEMAPHANDLE hFileMap = XFileMapOpen(szFile, XFILEMAP_READ, nFileSize);

	if(hFileMap == NULL) Error("XFileMapOpen Error!");

	CXMergeSort_IO_Task<Type>& xtask = *new(tbb::task::allocate_root()) CXMergeSort_IO_Task<Type>(hFileMap, nFileSize, 0, nFileSize, &pArray);

	tbb::task::spawn_root_and_wait(xtask);

	XFileMapClose(&hFileMap);

	return pArray;
}
*/

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -