⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xamicablenum.cpp

📁 相亲数和亲和数的快速并行求解算法 intel 多线程优化大赛参赛作品
💻 CPP
字号:
/*/////////////////////////////////////////
// 版权所有(C)  2000-2008 邓辉           //
// Email:      denghui0815@hotmail.com  //
// 说明:       Intel线程优化大赛参赛作品//
/////////////////////////////////////////*/

#include "XAmicableNum.h"
#include "XQSort.h"

void usage(void)
{
	printf("Usage: XAmicableNum.exe BegNum EndNum\n");
	exit(1);
}

void Error (char *s)
{
	fprintf(stderr, "ERROR: %s\n",s);
	exit(1);
}

void Warning (char *s)
{
	fprintf(stderr, "WARNING: %s\n",s);
}

// 保存一个数字
#define XSAVENUM(pSave, nSave)													\
{																				\
	char  pBuffer[16];															\
	char* pBufferPtr = pBuffer;													\
	while(nSave >= 10)															\
	{																			\
		*pBufferPtr++ = (nSave % 10) + '0';										\
		nSave /= 10;															\
	}																			\
	*pBufferPtr = (nSave % 10) + '0';											\
	while(pBufferPtr >= pBuffer) { *pSave++ = *pBufferPtr--;}					\
}

// 保存一个字符串
#define XSAVESTR(pSave, pStr)													\
{																				\
	char* pTmp = pStr;															\
	while(*pTmp != '\0') *pSave++ = *pTmp++;									\
}

// 素数表
static uint32 g_xPrimeTab[0xFFFF] = {0};

// 保存数的因数和信息
typedef struct tagXNum
{
	uint32 nIndex;					// 数的值
	union 
	{
		struct  
		{
			uint32 nFacs;			// 真因数的和
			uint32 nNum;			// 数的值
		};
		unsigned __int64 nRate;		// 比率
	};
}XNum;

// 对XNum的比较操作
__inline bool operator < (const XNum& lhs, const XNum& rhs)
{	
	return lhs.nRate < rhs.nRate;
}

__inline bool operator <= (const XNum& lhs, const XNum& rhs)
{	
	return lhs.nRate <= rhs.nRate;
}

__inline bool operator > (const XNum& lhs, const XNum& rhs)
{	
	return lhs.nRate > rhs.nRate;
}

// 初始化素数表
void XInitPrimeTab()
{	//使用筛选法选取小于0xFFFF的所有素数
	uint32 nPrimeTab = sizeof(g_xPrimeTab) / sizeof(g_xPrimeTab[0]);
	uint32 i,j,nMax = sqrt(nPrimeTab) + 1;

	for(i = 3; i < nMax; i += 2)
	{
		if(g_xPrimeTab[i] == 0)
		{
			for(j = i * i ;j < nPrimeTab; j += i) g_xPrimeTab[j] = 1;
		}
	}

	j = 1; g_xPrimeTab[0] = 2;
	for(i = 3; i < nPrimeTab; i += 2)
	{
		if(g_xPrimeTab[i] == 0) g_xPrimeTab[j++] = i;
	}
}

// 计算a,b的最大公约数
__inline uint32 XGcd(uint32 a, uint32 b)
{
	uint32 r;

	do
	{
		if(b == 0) return a;
		r = a % b; //r a b
		if(r == 0) return b;
		a = b % r; //a b r
		if(a == 0) return r;
		b = r % a; //r a b
	}while(1);
}


// 计算n的所有真因数的和以及比率
__inline void XSumFacs(uint32 n, XNum& xNum)
{
	uint32 r = 1, tt = n;

	for(const uint32* pPrime = g_xPrimeTab; (*pPrime) * (*pPrime) <= tt; ++pPrime)
	{
		uint32 t = 1, p = 1;

		while(tt / (*pPrime) * (*pPrime) == tt)
		{	// 如果能够整除
			tt /= *pPrime;
			p  *= *pPrime;			// i^m   若n中有i^m
			t += p;					// i^0 + i^1 + i^2 + ... + i^m
		}

		r *= t;
	}

	if(tt != 1) r *= 1 + tt;
	xNum.nIndex = xNum.nNum = n;
	xNum.nFacs = r - n;
}

void XOutput(concurrent_vector< pair<uint32, uint32> >& vecAmicable, concurrent_vector< pair<uint32, uint32> >& vecFriendly)
{
	// 对数对进行排序
	parallel_sort< concurrent_vector< pair<uint32, uint32> >::iterator >(vecAmicable.begin(), vecAmicable.end());
	parallel_sort< concurrent_vector< pair<uint32, uint32> >::iterator >(vecFriendly.begin(), vecFriendly.end());

	// 分配输出缓存
	char* pBuffer = (char*)scalable_malloc(sizeof(char) * 1024 * 1024);
	char* pCur = pBuffer;
	char* pEnd = pBuffer + 1024 * 1024 - 128;
	if(pBuffer == NULL) Error("Buffer scalable_malloc Error!");
	concurrent_vector< pair<uint32, uint32> >::iterator iVec;

	// 输出结果
	for(iVec = vecAmicable.begin(); iVec != vecAmicable.end(); ++iVec) 
	{
#ifdef XUSE_ENGLISH
		XSAVENUM(pCur, iVec->first);
		XSAVESTR(pCur, " and ");
		XSAVENUM(pCur, iVec->second);
		XSAVESTR(pCur, " are AMICABLE\n");
#else
		XSAVENUM(pCur, iVec->first);
		XSAVESTR(pCur, " 和 ");
		XSAVENUM(pCur, iVec->second);
		XSAVESTR(pCur, " 是相亲数\n");
#endif		
		if(pCur > pEnd)
		{
			fwrite(pBuffer, pCur - pBuffer, 1, stdout);
			pCur = pBuffer;
		}
	}

	for(iVec = vecFriendly.begin(); iVec != vecFriendly.end(); ++iVec) 
	{
#ifdef XUSE_ENGLISH
		XSAVENUM(pCur, iVec->first);
		XSAVESTR(pCur, " and ");
		XSAVENUM(pCur, iVec->second);
		XSAVESTR(pCur, " are FRIENDLY\n");
#else
		XSAVENUM(pCur, iVec->first);
		XSAVESTR(pCur, " 和 ");
		XSAVENUM(pCur, iVec->second);
		XSAVESTR(pCur, " 是亲和数\n");
#endif	
		if(pCur > pEnd)
		{
			fwrite(pBuffer, pCur - pBuffer, 1, stdout);
			pCur = pBuffer;
		}
	}

	if(pCur != pBuffer) fwrite(pBuffer, pCur - pBuffer, 1, stdout);

	// 释放内存
	scalable_free(pBuffer);
}


// 查找[nBeg, nEnd]之间的相亲数对和亲和数对
void XSolveSerial(uint32 nBeg, uint32 nEnd)
{
	tick_count nBegTime,nEndTime;
	int i, nThread = 32;
	uint32 nCount = nEnd - nBeg + 1;

	if(XOUT_TIME) nBegTime = tick_count::now(); 

	XNum* pNums = (XNum*)scalable_malloc( (nEnd - nBeg + 8) * sizeof(pNums[0]));
	if(pNums == NULL) Error("Nums scalable_malloc Error!");

	// 用于保存相亲数对和亲和数对
	concurrent_vector< pair<uint32, uint32> > vecAmicable;
	concurrent_vector< pair<uint32, uint32> > vecFriendly;

	// 因数分解版本, 初始化质数表
	XInitPrimeTab();

	// 计算[nBeg, nEnd]的真因数和
	for(i = 0; i < nCount; ++i) XSumFacs(i + nBeg, pNums[i]);

	// 寻找相亲数
	for(i = 0; i < nCount; ++i)
	{
		const uint32 nCur = pNums[i].nFacs;
		if(i + nBeg < nCur && nCur <= nEnd && i + nBeg == pNums[nCur - nBeg].nFacs)
		{
			vecAmicable.push_back(make_pair(i + nBeg, nCur));
		}
	}

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("寻找相亲数时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	for(i = 0; i < nCount; ++i)
	{
		pNums[i].nRate /= XGcd(pNums[i].nNum, pNums[i].nFacs + pNums[i].nNum);
	}

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("计算比率时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 对比率信息进行排序
	XQSortParallel<XNum>(pNums, nCount);

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("按比率排序时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 寻找亲和数
	pNums[nCount].nRate = 0xFFFFFFFFFFFFFFFF;
	for(i = 0; i < nCount;)
	{
		const unsigned __int64 nRate = pNums[i].nRate;
		uint32 nBeg = i++;
		while(pNums[i].nRate == nRate) ++i;
		if(i != nBeg + 1)
		{
			for(;nBeg < i; ++nBeg)
			{
				for(uint32 j = nBeg + 1; j < i; ++j)
				{
					if(pNums[nBeg].nIndex < pNums[j].nIndex)
						vecFriendly.push_back(make_pair(pNums[nBeg].nIndex, pNums[j].nIndex));
					else
						vecFriendly.push_back(make_pair(pNums[j].nIndex,  pNums[nBeg].nIndex));
				}
			}
		}
	}

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("寻找亲和数时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}
	
	scalable_free(pNums);

	// 数据结果
	XOutput(vecAmicable, vecFriendly);
	
	if(XOUT_TIME) 
	{
		nEndTime  = tick_count::now(); 
		printf("输出时间:%f秒\n", (nEndTime - nBegTime).seconds());
	}
}

// 并行计算因数和Task
class CXClacFacsTask
{
	XNum*	m_pNums;
	uint32	m_nBeg;
	uint32  m_nCnt;
	uint32  m_nThread;
public:
	CXClacFacsTask(XNum* pNums, uint32 nBeg, uint32 nCnt, uint32 nThread)
		: m_pNums(pNums), m_nBeg(nBeg), m_nCnt(nCnt), m_nThread(nThread) {};

	void operator () (const blocked_range<uint32> & r) const
	{
		XNum*   pNums = m_pNums;
		uint32	nBeg = m_nBeg;
		uint32  nCnt = m_nCnt;

		for (uint32 i = r.begin(); i != r.end(); ++i) 
		{
			// 对分块[nCurBeg, nCurEnd]进行处理
			const uint32 nCurBeg = nCnt / m_nThread * i + nBeg;
			const uint32 nCurEnd = i == m_nThread - 1 ? nBeg + nCnt: nCurBeg + nCnt / m_nThread;
			const uint32 nCurCnt = nCurEnd - nBeg;
			// 数n = a × b  且 a < b  最大的a一定小于sqrt(n);
			const uint32 nEndMax = sqrt(nCurEnd);
			
			for(uint32 j = 2; j <= nEndMax; ++j)
			{
				// 对[2,nEndMax]每个因数 累计入[nCurBeg, nCurEnd]的真因数和中。
				if(j * j < nCurBeg)
				{	// 计算出[nCurBeg, nCurEnd]中最小的可整除j的数
					uint32 k = nCurBeg / j * j;
					if(k < nCurBeg)
					{
						if(nCurEnd - k >= j) 
							k += j;
						else
							continue;
					}
					uint32 t = k / j;

					// 对[nCurBeg, nCurEnd]中的数k = j * t 累加真因数 k + t
					for( k -= nBeg; k < nCurCnt; k += j) pNums[k].nFacs += j + (t++);
				}
				else
				{	//对 k = j * j 做特殊处理 累加真因数 j
					uint32 k = j * j - nBeg;
					uint32 t = j;
					pNums[k].nFacs += j;
					// 对[nCurBeg, nCurEnd]中的数k = j * t累加真因数 k + t
					for( k += j; k < nCurCnt; k += j) pNums[k].nFacs += j + (++t);
				}
			}
		}
	}
};

// 查找[nBeg, nEnd]之间的相亲数对和亲和数对
void XSolveParallel(uint32 nBeg, uint32 nEnd)
{
	tick_count nBegTime,nEndTime;
	int i, nThread = 32;
	uint32 nCount = nEnd - nBeg + 1;

	if(XOUT_TIME) nBegTime = tick_count::now(); 

	XNum* pNums = (XNum*)scalable_malloc( (nEnd - nBeg + 8) * sizeof(pNums[0]));
	if(pNums == NULL) Error("Nums scalable_malloc Error!");

	// 用于保存相亲数对和亲和数对
	concurrent_vector< pair<uint32, uint32> > vecAmicable;
	concurrent_vector< pair<uint32, uint32> > vecFriendly;

	// 初始化
#pragma omp parallel for schedule(guided, 1)
	for(i = 0; i < nCount; ++i)
	{
		pNums[i].nNum  = i + nBeg;
		pNums[i].nFacs = 1;
	}

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("初始化:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 规划线程数量
	nThread = nCount > 1024 * 1024 ? nCount / 32 / 1024 : 32;

	// 计算[nBeg, nEnd]的真因数和
	parallel_for(blocked_range<size_t>(0, nThread), CXClacFacsTask(pNums, nBeg, nCount, nThread));

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("计算因数和:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 寻找相亲数
#pragma omp parallel for  schedule(guided, 1)
	for(i = 0; i < nCount; ++i)
	{
		const uint32 nCur = pNums[i].nFacs;
		if(i + nBeg < nCur && nCur <= nEnd && i + nBeg == pNums[nCur - nBeg].nFacs)
		{
			vecAmicable.push_back(make_pair(i + nBeg, nCur));
		}
	}

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("寻找相亲数时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 计算亲和数的比率信息
	nThread = 32; 
	int nCnt[32] = {0};
#pragma omp parallel for schedule(guided, 1)
	for(i = 0; i < nThread; ++i)
	{
		int nCurBeg = nCount / nThread * i;
		int nCurEnd = i == nThread - 1 ? nCount : nCurBeg + nCount / nThread;
		int nCurCnt = nCurBeg;
		for(int j = nCurBeg; j < nCurEnd; ++j)
		{
			uint32 nGcd = XGcd(pNums[j].nNum, pNums[j].nFacs + pNums[j].nNum);
			if(nGcd != 1)
			{	//如果分子和分母的最大公约数为1,那么该数为孤独数.
				pNums[nCurCnt].nIndex = pNums[j].nNum;
				pNums[nCurCnt++].nRate = pNums[j].nRate / nGcd;
			}
		}

		nCnt[i] = nCurCnt - nCurBeg;
	}

	// 将所有的非孤独数整理到连续的内存中
	XNum* pCurNums = pNums + nCnt[0];
	for(i = 1; i < nThread; ++i)
	{
		if(nCnt[i] > 0)
		{
			XMEMMOVE(pCurNums, pNums + nCount / nThread * i, nCnt[i] * sizeof(pNums[0]));
			pCurNums += nCnt[i];
		}
	}

	nCount = pCurNums - pNums;

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("计算比率时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 对比率信息进行排序
	XQSortParallel<XNum>(pNums, nCount);

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("按比率排序时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	// 寻找亲和数
	pNums[nCount].nRate = 0xFFFFFFFFFFFFFFFF;
	for(i = 0; i < nCount;)
	{
		const unsigned __int64 nRate = pNums[i].nRate;
		uint32 nBeg = i++;
		while(pNums[i].nRate == nRate) ++i;
		if(i != nBeg + 1)
		{
			for(;nBeg < i; ++nBeg)
			{
				for(uint32 j = nBeg + 1; j < i; ++j)
				{
					if(pNums[nBeg].nIndex < pNums[j].nIndex)
						vecFriendly.push_back(make_pair(pNums[nBeg].nIndex, pNums[j].nIndex));
					else
						vecFriendly.push_back(make_pair(pNums[j].nIndex,  pNums[nBeg].nIndex));
				}
			}
		}
	}

	if(XOUT_TIME) 
	{
		nEndTime = tick_count::now(); 
		printf("寻找亲和数时间:%f秒\n", (nEndTime - nBegTime).seconds());
		nBegTime = tick_count::now(); 
	}

	scalable_free(pNums);

	// 数据结果
	XOutput(vecAmicable, vecFriendly);

	if(XOUT_TIME) 
	{
		nEndTime  = tick_count::now(); 
		printf("输出时间:%f秒\n", (nEndTime - nBegTime).seconds());
	}
}

int main(int argc, char* argv[])
{
	// 创建任务scheduler
	task_scheduler_init init;
	tick_count nBegTime,nEndTime;
	uint32 nBeg = 2, nEnd = 5000000;
	BOOL bParallel = TRUE;

	// 参数解析
	if(argc >= 3)
	{
		sscanf(argv[1], "%ul", &nBeg);
		sscanf(argv[2], "%ul", &nEnd);
		if(nBeg == 0) nBeg = 2;
		if(argc >= 4 && argv[3][0] == 's') bParallel = FALSE;
	}

	if(nBeg >= nEnd) usage();

	nBegTime = tick_count::now(); 

	if(bParallel) 
	{	//并行算法
		XSolveParallel(nBeg, nEnd);
	}
	else
	{	//串行算法
		XSolveSerial(nBeg, nEnd);
	}

	nEndTime = tick_count::now(); 

	if(XOUT_TIME_ALL) printf("总时间:%f秒\n", (nEndTime - nBegTime).seconds());

	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -