📄 xamicablenum.cpp
字号:
/*/////////////////////////////////////////
// 版权所有(C) 2000-2008 邓辉 //
// Email: denghui0815@hotmail.com //
// 说明: Intel线程优化大赛参赛作品//
/////////////////////////////////////////*/
#include "XAmicableNum.h"
#include "XQSort.h"
void usage(void)
{
printf("Usage: XAmicableNum.exe BegNum EndNum\n");
exit(1);
}
void Error (char *s)
{
fprintf(stderr, "ERROR: %s\n",s);
exit(1);
}
void Warning (char *s)
{
fprintf(stderr, "WARNING: %s\n",s);
}
// 保存一个数字
#define XSAVENUM(pSave, nSave) \
{ \
char pBuffer[16]; \
char* pBufferPtr = pBuffer; \
while(nSave >= 10) \
{ \
*pBufferPtr++ = (nSave % 10) + '0'; \
nSave /= 10; \
} \
*pBufferPtr = (nSave % 10) + '0'; \
while(pBufferPtr >= pBuffer) { *pSave++ = *pBufferPtr--;} \
}
// 保存一个字符串
#define XSAVESTR(pSave, pStr) \
{ \
char* pTmp = pStr; \
while(*pTmp != '\0') *pSave++ = *pTmp++; \
}
// 素数表
static uint32 g_xPrimeTab[0xFFFF] = {0};
// 保存数的因数和信息
typedef struct tagXNum
{
uint32 nIndex; // 数的值
union
{
struct
{
uint32 nFacs; // 真因数的和
uint32 nNum; // 数的值
};
unsigned __int64 nRate; // 比率
};
}XNum;
// 对XNum的比较操作
__inline bool operator < (const XNum& lhs, const XNum& rhs)
{
return lhs.nRate < rhs.nRate;
}
__inline bool operator <= (const XNum& lhs, const XNum& rhs)
{
return lhs.nRate <= rhs.nRate;
}
__inline bool operator > (const XNum& lhs, const XNum& rhs)
{
return lhs.nRate > rhs.nRate;
}
// 初始化素数表
void XInitPrimeTab()
{ //使用筛选法选取小于0xFFFF的所有素数
uint32 nPrimeTab = sizeof(g_xPrimeTab) / sizeof(g_xPrimeTab[0]);
uint32 i,j,nMax = sqrt(nPrimeTab) + 1;
for(i = 3; i < nMax; i += 2)
{
if(g_xPrimeTab[i] == 0)
{
for(j = i * i ;j < nPrimeTab; j += i) g_xPrimeTab[j] = 1;
}
}
j = 1; g_xPrimeTab[0] = 2;
for(i = 3; i < nPrimeTab; i += 2)
{
if(g_xPrimeTab[i] == 0) g_xPrimeTab[j++] = i;
}
}
// 计算a,b的最大公约数
__inline uint32 XGcd(uint32 a, uint32 b)
{
uint32 r;
do
{
if(b == 0) return a;
r = a % b; //r a b
if(r == 0) return b;
a = b % r; //a b r
if(a == 0) return r;
b = r % a; //r a b
}while(1);
}
// 计算n的所有真因数的和以及比率
__inline void XSumFacs(uint32 n, XNum& xNum)
{
uint32 r = 1, tt = n;
for(const uint32* pPrime = g_xPrimeTab; (*pPrime) * (*pPrime) <= tt; ++pPrime)
{
uint32 t = 1, p = 1;
while(tt / (*pPrime) * (*pPrime) == tt)
{ // 如果能够整除
tt /= *pPrime;
p *= *pPrime; // i^m 若n中有i^m
t += p; // i^0 + i^1 + i^2 + ... + i^m
}
r *= t;
}
if(tt != 1) r *= 1 + tt;
xNum.nIndex = xNum.nNum = n;
xNum.nFacs = r - n;
}
void XOutput(concurrent_vector< pair<uint32, uint32> >& vecAmicable, concurrent_vector< pair<uint32, uint32> >& vecFriendly)
{
// 对数对进行排序
parallel_sort< concurrent_vector< pair<uint32, uint32> >::iterator >(vecAmicable.begin(), vecAmicable.end());
parallel_sort< concurrent_vector< pair<uint32, uint32> >::iterator >(vecFriendly.begin(), vecFriendly.end());
// 分配输出缓存
char* pBuffer = (char*)scalable_malloc(sizeof(char) * 1024 * 1024);
char* pCur = pBuffer;
char* pEnd = pBuffer + 1024 * 1024 - 128;
if(pBuffer == NULL) Error("Buffer scalable_malloc Error!");
concurrent_vector< pair<uint32, uint32> >::iterator iVec;
// 输出结果
for(iVec = vecAmicable.begin(); iVec != vecAmicable.end(); ++iVec)
{
#ifdef XUSE_ENGLISH
XSAVENUM(pCur, iVec->first);
XSAVESTR(pCur, " and ");
XSAVENUM(pCur, iVec->second);
XSAVESTR(pCur, " are AMICABLE\n");
#else
XSAVENUM(pCur, iVec->first);
XSAVESTR(pCur, " 和 ");
XSAVENUM(pCur, iVec->second);
XSAVESTR(pCur, " 是相亲数\n");
#endif
if(pCur > pEnd)
{
fwrite(pBuffer, pCur - pBuffer, 1, stdout);
pCur = pBuffer;
}
}
for(iVec = vecFriendly.begin(); iVec != vecFriendly.end(); ++iVec)
{
#ifdef XUSE_ENGLISH
XSAVENUM(pCur, iVec->first);
XSAVESTR(pCur, " and ");
XSAVENUM(pCur, iVec->second);
XSAVESTR(pCur, " are FRIENDLY\n");
#else
XSAVENUM(pCur, iVec->first);
XSAVESTR(pCur, " 和 ");
XSAVENUM(pCur, iVec->second);
XSAVESTR(pCur, " 是亲和数\n");
#endif
if(pCur > pEnd)
{
fwrite(pBuffer, pCur - pBuffer, 1, stdout);
pCur = pBuffer;
}
}
if(pCur != pBuffer) fwrite(pBuffer, pCur - pBuffer, 1, stdout);
// 释放内存
scalable_free(pBuffer);
}
// 查找[nBeg, nEnd]之间的相亲数对和亲和数对
void XSolveSerial(uint32 nBeg, uint32 nEnd)
{
tick_count nBegTime,nEndTime;
int i, nThread = 32;
uint32 nCount = nEnd - nBeg + 1;
if(XOUT_TIME) nBegTime = tick_count::now();
XNum* pNums = (XNum*)scalable_malloc( (nEnd - nBeg + 8) * sizeof(pNums[0]));
if(pNums == NULL) Error("Nums scalable_malloc Error!");
// 用于保存相亲数对和亲和数对
concurrent_vector< pair<uint32, uint32> > vecAmicable;
concurrent_vector< pair<uint32, uint32> > vecFriendly;
// 因数分解版本, 初始化质数表
XInitPrimeTab();
// 计算[nBeg, nEnd]的真因数和
for(i = 0; i < nCount; ++i) XSumFacs(i + nBeg, pNums[i]);
// 寻找相亲数
for(i = 0; i < nCount; ++i)
{
const uint32 nCur = pNums[i].nFacs;
if(i + nBeg < nCur && nCur <= nEnd && i + nBeg == pNums[nCur - nBeg].nFacs)
{
vecAmicable.push_back(make_pair(i + nBeg, nCur));
}
}
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("寻找相亲数时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
for(i = 0; i < nCount; ++i)
{
pNums[i].nRate /= XGcd(pNums[i].nNum, pNums[i].nFacs + pNums[i].nNum);
}
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("计算比率时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 对比率信息进行排序
XQSortParallel<XNum>(pNums, nCount);
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("按比率排序时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 寻找亲和数
pNums[nCount].nRate = 0xFFFFFFFFFFFFFFFF;
for(i = 0; i < nCount;)
{
const unsigned __int64 nRate = pNums[i].nRate;
uint32 nBeg = i++;
while(pNums[i].nRate == nRate) ++i;
if(i != nBeg + 1)
{
for(;nBeg < i; ++nBeg)
{
for(uint32 j = nBeg + 1; j < i; ++j)
{
if(pNums[nBeg].nIndex < pNums[j].nIndex)
vecFriendly.push_back(make_pair(pNums[nBeg].nIndex, pNums[j].nIndex));
else
vecFriendly.push_back(make_pair(pNums[j].nIndex, pNums[nBeg].nIndex));
}
}
}
}
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("寻找亲和数时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
scalable_free(pNums);
// 数据结果
XOutput(vecAmicable, vecFriendly);
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("输出时间:%f秒\n", (nEndTime - nBegTime).seconds());
}
}
// 并行计算因数和Task
class CXClacFacsTask
{
XNum* m_pNums;
uint32 m_nBeg;
uint32 m_nCnt;
uint32 m_nThread;
public:
CXClacFacsTask(XNum* pNums, uint32 nBeg, uint32 nCnt, uint32 nThread)
: m_pNums(pNums), m_nBeg(nBeg), m_nCnt(nCnt), m_nThread(nThread) {};
void operator () (const blocked_range<uint32> & r) const
{
XNum* pNums = m_pNums;
uint32 nBeg = m_nBeg;
uint32 nCnt = m_nCnt;
for (uint32 i = r.begin(); i != r.end(); ++i)
{
// 对分块[nCurBeg, nCurEnd]进行处理
const uint32 nCurBeg = nCnt / m_nThread * i + nBeg;
const uint32 nCurEnd = i == m_nThread - 1 ? nBeg + nCnt: nCurBeg + nCnt / m_nThread;
const uint32 nCurCnt = nCurEnd - nBeg;
// 数n = a × b 且 a < b 最大的a一定小于sqrt(n);
const uint32 nEndMax = sqrt(nCurEnd);
for(uint32 j = 2; j <= nEndMax; ++j)
{
// 对[2,nEndMax]每个因数 累计入[nCurBeg, nCurEnd]的真因数和中。
if(j * j < nCurBeg)
{ // 计算出[nCurBeg, nCurEnd]中最小的可整除j的数
uint32 k = nCurBeg / j * j;
if(k < nCurBeg)
{
if(nCurEnd - k >= j)
k += j;
else
continue;
}
uint32 t = k / j;
// 对[nCurBeg, nCurEnd]中的数k = j * t 累加真因数 k + t
for( k -= nBeg; k < nCurCnt; k += j) pNums[k].nFacs += j + (t++);
}
else
{ //对 k = j * j 做特殊处理 累加真因数 j
uint32 k = j * j - nBeg;
uint32 t = j;
pNums[k].nFacs += j;
// 对[nCurBeg, nCurEnd]中的数k = j * t累加真因数 k + t
for( k += j; k < nCurCnt; k += j) pNums[k].nFacs += j + (++t);
}
}
}
}
};
// 查找[nBeg, nEnd]之间的相亲数对和亲和数对
void XSolveParallel(uint32 nBeg, uint32 nEnd)
{
tick_count nBegTime,nEndTime;
int i, nThread = 32;
uint32 nCount = nEnd - nBeg + 1;
if(XOUT_TIME) nBegTime = tick_count::now();
XNum* pNums = (XNum*)scalable_malloc( (nEnd - nBeg + 8) * sizeof(pNums[0]));
if(pNums == NULL) Error("Nums scalable_malloc Error!");
// 用于保存相亲数对和亲和数对
concurrent_vector< pair<uint32, uint32> > vecAmicable;
concurrent_vector< pair<uint32, uint32> > vecFriendly;
// 初始化
#pragma omp parallel for schedule(guided, 1)
for(i = 0; i < nCount; ++i)
{
pNums[i].nNum = i + nBeg;
pNums[i].nFacs = 1;
}
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("初始化:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 规划线程数量
nThread = nCount > 1024 * 1024 ? nCount / 32 / 1024 : 32;
// 计算[nBeg, nEnd]的真因数和
parallel_for(blocked_range<size_t>(0, nThread), CXClacFacsTask(pNums, nBeg, nCount, nThread));
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("计算因数和:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 寻找相亲数
#pragma omp parallel for schedule(guided, 1)
for(i = 0; i < nCount; ++i)
{
const uint32 nCur = pNums[i].nFacs;
if(i + nBeg < nCur && nCur <= nEnd && i + nBeg == pNums[nCur - nBeg].nFacs)
{
vecAmicable.push_back(make_pair(i + nBeg, nCur));
}
}
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("寻找相亲数时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 计算亲和数的比率信息
nThread = 32;
int nCnt[32] = {0};
#pragma omp parallel for schedule(guided, 1)
for(i = 0; i < nThread; ++i)
{
int nCurBeg = nCount / nThread * i;
int nCurEnd = i == nThread - 1 ? nCount : nCurBeg + nCount / nThread;
int nCurCnt = nCurBeg;
for(int j = nCurBeg; j < nCurEnd; ++j)
{
uint32 nGcd = XGcd(pNums[j].nNum, pNums[j].nFacs + pNums[j].nNum);
if(nGcd != 1)
{ //如果分子和分母的最大公约数为1,那么该数为孤独数.
pNums[nCurCnt].nIndex = pNums[j].nNum;
pNums[nCurCnt++].nRate = pNums[j].nRate / nGcd;
}
}
nCnt[i] = nCurCnt - nCurBeg;
}
// 将所有的非孤独数整理到连续的内存中
XNum* pCurNums = pNums + nCnt[0];
for(i = 1; i < nThread; ++i)
{
if(nCnt[i] > 0)
{
XMEMMOVE(pCurNums, pNums + nCount / nThread * i, nCnt[i] * sizeof(pNums[0]));
pCurNums += nCnt[i];
}
}
nCount = pCurNums - pNums;
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("计算比率时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 对比率信息进行排序
XQSortParallel<XNum>(pNums, nCount);
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("按比率排序时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
// 寻找亲和数
pNums[nCount].nRate = 0xFFFFFFFFFFFFFFFF;
for(i = 0; i < nCount;)
{
const unsigned __int64 nRate = pNums[i].nRate;
uint32 nBeg = i++;
while(pNums[i].nRate == nRate) ++i;
if(i != nBeg + 1)
{
for(;nBeg < i; ++nBeg)
{
for(uint32 j = nBeg + 1; j < i; ++j)
{
if(pNums[nBeg].nIndex < pNums[j].nIndex)
vecFriendly.push_back(make_pair(pNums[nBeg].nIndex, pNums[j].nIndex));
else
vecFriendly.push_back(make_pair(pNums[j].nIndex, pNums[nBeg].nIndex));
}
}
}
}
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("寻找亲和数时间:%f秒\n", (nEndTime - nBegTime).seconds());
nBegTime = tick_count::now();
}
scalable_free(pNums);
// 数据结果
XOutput(vecAmicable, vecFriendly);
if(XOUT_TIME)
{
nEndTime = tick_count::now();
printf("输出时间:%f秒\n", (nEndTime - nBegTime).seconds());
}
}
int main(int argc, char* argv[])
{
// 创建任务scheduler
task_scheduler_init init;
tick_count nBegTime,nEndTime;
uint32 nBeg = 2, nEnd = 5000000;
BOOL bParallel = TRUE;
// 参数解析
if(argc >= 3)
{
sscanf(argv[1], "%ul", &nBeg);
sscanf(argv[2], "%ul", &nEnd);
if(nBeg == 0) nBeg = 2;
if(argc >= 4 && argv[3][0] == 's') bParallel = FALSE;
}
if(nBeg >= nEnd) usage();
nBegTime = tick_count::now();
if(bParallel)
{ //并行算法
XSolveParallel(nBeg, nEnd);
}
else
{ //串行算法
XSolveSerial(nBeg, nEnd);
}
nEndTime = tick_count::now();
if(XOUT_TIME_ALL) printf("总时间:%f秒\n", (nEndTime - nBegTime).seconds());
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -