📄 xfloyed.h
字号:
/*/////////////////////////////////////////
// 版权所有(C) 2000-2008 邓辉 //
// Email: denghui0815@hotmail.com //
// 说明: Intel线程优化大赛参赛作品//
/////////////////////////////////////////*/
#ifndef XFLOYED_HEAD
#define XFLOYED_HEAD
#include "XShortestPub.h"
// Floyed算法
template <class Type>
void XFloyedSerial(XGraph<Type>* pGraph)
{
int nNode = pGraph->nNode;
Type** ppDistance = pGraph->ppDistance;
for (int i = 0; i < nNode; ++i)
{
for (int j = 0; j < nNode; ++j)
{
if(ppDistance[j][i] != XDISTANCE_INFINITY)
{
for (int k = 0; k < nNode; ++k)
{
if(ppDistance[i][k] + ppDistance[j][i] < ppDistance[j][k])
{
ppDistance[j][k] = ppDistance[j][i] + ppDistance[i][k];
}
}
}
}
}
}
template <class Type>
void XFloyedParallel(XGraph<Type>* pGraph)
{
int nNode = pGraph->nNode;
Type** ppDistance = pGraph->ppDistance;
for (int i = 0; i < nNode; ++i)
{
#pragma omp parallel for schedule(guided, 1)
for (int j = 0; j < nNode; ++j)
{
if(ppDistance[j][i] != XDISTANCE_INFINITY)
{
for (int k = 0; k < nNode; ++k)
{
if(ppDistance[i][k] + ppDistance[j][i] < ppDistance[j][k])
{
ppDistance[j][k] = ppDistance[j][i] + ppDistance[i][k];
}
}
}
}
}
}
// Floyed算法 并行版本
template <>
void XFloyedParallel<uint32>( XGraph<uint32>* pGraph )
{
int nNode = pGraph->nNode;
int nNodeSSE = (nNode + 3) >> 2;
uint32** ppDistance = pGraph->ppDistance;
for (int i = 0; i != nNode; ++ i)
{
#pragma omp parallel for schedule(guided, 1)
for (int j = 0; j < nNode; ++j)
{
if(ppDistance[j][i] != XDISTANCE_INFINITY)
{
__m128i* pTmpJ = (__m128i*)ppDistance[j];
__m128i* pTmpI = (__m128i*)ppDistance[i];
const __m128i xIJ = _mm_set1_epi32(ppDistance[j][i]);
for (int k = 0; k < nNodeSSE; ++k)
{
#ifdef XUSE_SSE4
pTmpJ[k] = _mm_min_epi32(_mm_add_epi32(xIJ , pTmpI[k]), pTmpJ[k]);
#else
__m128i xAdd = _mm_add_epi32(xIJ, pTmpI[k]);
__m128i xMask = _mm_cmplt_epi32(xAdd, pTmpJ[k]);
pTmpJ[k] = _mm_or_si128(_mm_andnot_si128(xMask, pTmpJ[k]), _mm_and_si128(xMask, xAdd));
#endif
}
}
}
}
}
// Floyed算法 串行版本
template <>
void XFloyedSerial<uint32>( XGraph<uint32>* pGraph )
{
int nNode = pGraph->nNode;
int nNodeSSE = (nNode + 3) >> 2;
uint32** ppDistance = pGraph->ppDistance;
for (int i = 0; i != nNode; ++ i)
{
for (int j = 0; j < nNode; ++j)
{
if(ppDistance[j][i] != XDISTANCE_INFINITY)
{
__m128i* pTmpJ = (__m128i*)ppDistance[j];
__m128i* pTmpI = (__m128i*)ppDistance[i];
const __m128i xIJ = _mm_set1_epi32(ppDistance[j][i]);
for (int k = 0; k < nNodeSSE; ++k)
{
#ifdef XUSE_SSE4
pTmpJ[k] = _mm_min_epi32(_mm_add_epi32(xIJ , pTmpI[k]), pTmpJ[k]);
#else
__m128i xAdd = _mm_add_epi32(xIJ, pTmpI[k]);
__m128i xMask = _mm_cmplt_epi32(xAdd, pTmpJ[k]);
pTmpJ[k] = _mm_or_si128(_mm_andnot_si128(xMask, pTmpJ[k]), _mm_and_si128(xMask, xAdd));
#endif
}
}
}
}
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -