⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 xfloyed.h

📁 最短路径的并行算法 采用TBB 需要intel编译器 速度很快
💻 H
字号:
/*/////////////////////////////////////////
// 版权所有(C)  2000-2008 邓辉           //
// Email:      denghui0815@hotmail.com  //
// 说明:       Intel线程优化大赛参赛作品//
/////////////////////////////////////////*/

#ifndef XFLOYED_HEAD
#define XFLOYED_HEAD

#include "XShortestPub.h"

// Floyed算法
template <class Type>
void XFloyedSerial(XGraph<Type>*	pGraph)
{
	int nNode = pGraph->nNode;
	Type** ppDistance = pGraph->ppDistance;

	for (int i = 0; i < nNode; ++i)
	{
		for (int j = 0; j < nNode; ++j)
		{ 
			if(ppDistance[j][i] != XDISTANCE_INFINITY)
			{
				for (int k = 0; k < nNode; ++k)
				{
					if(ppDistance[i][k] + ppDistance[j][i] < ppDistance[j][k])
					{
						ppDistance[j][k] = ppDistance[j][i] + ppDistance[i][k];
					}
				}
			}
		}
	}
}

template <class Type>
void XFloyedParallel(XGraph<Type>* pGraph)
{
	int nNode = pGraph->nNode;
	Type** ppDistance = pGraph->ppDistance;

	for (int i = 0; i < nNode; ++i)
	{
		#pragma omp parallel for schedule(guided, 1)
		for (int j = 0; j < nNode; ++j)
		{ 
			if(ppDistance[j][i] != XDISTANCE_INFINITY)
			{
				for (int k = 0; k < nNode; ++k)
				{
					if(ppDistance[i][k] + ppDistance[j][i] < ppDistance[j][k])
					{
						ppDistance[j][k] = ppDistance[j][i] + ppDistance[i][k];
					}
				}
			}
		}
	}
}

// Floyed算法 并行版本
template <>
void XFloyedParallel<uint32>( XGraph<uint32>* pGraph ) 
{
	int nNode = pGraph->nNode;
	int nNodeSSE = (nNode + 3) >> 2;
	uint32** ppDistance = pGraph->ppDistance;

	for (int i = 0; i != nNode; ++ i) 
	{
		#pragma omp parallel for schedule(guided, 1)
		for (int j = 0; j < nNode; ++j)
		{
			if(ppDistance[j][i] != XDISTANCE_INFINITY)
			{
				__m128i* pTmpJ = (__m128i*)ppDistance[j];
				__m128i* pTmpI = (__m128i*)ppDistance[i];
				const __m128i xIJ = _mm_set1_epi32(ppDistance[j][i]);

				for (int k = 0; k < nNodeSSE; ++k)
				{
#ifdef XUSE_SSE4
					pTmpJ[k] = _mm_min_epi32(_mm_add_epi32(xIJ , pTmpI[k]), pTmpJ[k]);
#else
					__m128i xAdd  = _mm_add_epi32(xIJ, pTmpI[k]);
					__m128i xMask = _mm_cmplt_epi32(xAdd, pTmpJ[k]);
					pTmpJ[k] = _mm_or_si128(_mm_andnot_si128(xMask, pTmpJ[k]), _mm_and_si128(xMask, xAdd));
#endif
				}
			}
		}
	}
}

// Floyed算法 串行版本
template <>
void XFloyedSerial<uint32>( XGraph<uint32>* pGraph ) 
{
	int nNode = pGraph->nNode;
	int nNodeSSE = (nNode + 3) >> 2;
	uint32** ppDistance = pGraph->ppDistance;
	for (int i = 0; i != nNode; ++ i) 
	{
		for (int j = 0; j < nNode; ++j)
		{
			if(ppDistance[j][i] != XDISTANCE_INFINITY)
			{
				__m128i* pTmpJ = (__m128i*)ppDistance[j];
				__m128i* pTmpI = (__m128i*)ppDistance[i];
				const __m128i xIJ = _mm_set1_epi32(ppDistance[j][i]);
				for (int k = 0; k < nNodeSSE; ++k)
				{
#ifdef XUSE_SSE4
					pTmpJ[k] = _mm_min_epi32(_mm_add_epi32(xIJ , pTmpI[k]), pTmpJ[k]);
#else
					__m128i xAdd  = _mm_add_epi32(xIJ, pTmpI[k]);
					__m128i xMask = _mm_cmplt_epi32(xAdd, pTmpJ[k]);
					pTmpJ[k] = _mm_or_si128(_mm_andnot_si128(xMask, pTmpJ[k]), _mm_and_si128(xMask, xAdd));
#endif
				}
			}
		}
	}
}


#endif		

																				

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -