📄 parallel_radixsort.h

📁 C/C++ 多任务下的数据结构与算法（周伟明）华中科技大学出版社
💻 H
字号:
/*
 * Copyright (c) 2006-2008
 * Author: Weiming Zhou
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without fee,
 * provided that the above copyright notice appear in all copies and
 * that both that copyright notice and this permission notice appear
 * in supporting documentation.  
 */

/* 
 *	Parallel_RadixSort.h
 *
 *	DESCRIPTION
 *		Interface to a Load Balanced Parallel RadixSort function.
 *	HISTORY
 *		10-11-2007	create by zhouweiming.
 *
 */
#ifndef __PARALLEL_RADIXSORT_H__
#define __PARALLEL_RADIXSORT_H__

#include <omp.h>
#include "CapiGlobal.h"
#include "CapiCommon.h"
#include "SerialRadixSort.h"
#include "CQuickSortRange.h"


/**	负载平衡的并行基数排序
    对数组进行并行基数排序

    @param	T *ppData - 待排序数据	
    @param	UINT uDataLen - 数据长度	
    @param	UINT uRadix - 基数	
    @param	UINT uMaxKeyLen - 最大关键词长度	
    @param	GETKEY GetKeyFunc - 关键词取位回调函数	
    @return	void - 无	
*/
template <class T, class GETKEY>
void Parallel_RadixSort_LBR(T *pData, UINT uDataLen,
                               UINT uRadix, UINT uMaxKeyLen, GETKEY GetKeyFunc)
{
    int i;
    int nProcessors = omp_get_num_procs();
    UINT uCount = uDataLen / nProcessors;

    if ( uDataLen - uCount * nProcessors > 2 )
    {
        uCount++;
    }

    T *pOutData = new T[uDataLen];
    DATA_ARRAY_TEMP<T>  **ppDataArray = new DATA_ARRAY_TEMP<T> *[nProcessors];
    UINT **ppuBoxDataCount = new UINT *[nProcessors];

    for ( i = 0; i < nProcessors; i++ )
    {
        ppDataArray[i] = new DATA_ARRAY_TEMP<T>[uRadix];
        ppuBoxDataCount[i] = new UINT[uRadix];
    }

    UINT uKeyIndex;

    for ( uKeyIndex = 0; uKeyIndex < uMaxKeyLen; uKeyIndex++ )
    {
        clock_t     t1, t2;
        //下面代码完成全局的一轮将数据分配到箱子里的操作

#pragma omp parallel for num_threads(nProcessors) schedule(static, 1)
        for ( i = 0; i < nProcessors; i++ )
        {
            //每个线程计算自己的数据起始位置,其中end是开区间位置
            UINT begin = i * uCount;
            UINT end = (i+1) * uCount;
            if ( end > uDataLen )
            {
                end = uDataLen;
            }

            T * pOut = pOutData + begin;

            T *p = pData + begin;
        t1 = clock();
           Serial_Partitioned<T>(p, end-begin, uRadix, uKeyIndex, GetKeyFunc, 
                ppuBoxDataCount[i], pOut, ppDataArray[i]);
        t2 = clock();

        printf( "Serial_Partitioned, ThreadId = %ld, time = %ld\n", omp_get_thread_num(), t2 - t1);

        }//for ( i = 0; i < nProcessors; i++ )

        UINT *puCount = ppuBoxDataCount[0];
#pragma omp parallel for num_threads(dtn(uRadix, 4096)) //最小循环次数设为多少需要测试
        for ( i = 0; i < (int)uRadix; i++ )
        {
            int k;
            for ( k = 1; k < nProcessors; k++ )
            {
                UINT * pk = ppuBoxDataCount[k];
                puCount[i] += pk[i];
            }
        }

        //计算前缀和
        for ( i = 1; i < (int)uRadix; i++ )
        {
            puCount[i] += puCount[i-1];
        }  
       
        //得到各段数据的起始位置
        for ( i = uRadix - 1; i > 0; i-- )
        {
            puCount[i] = puCount[i-1];
        }
        puCount[0] = 0;

        //收集操作，将pOutData里的数据重新放回ppData里
        int m;
        t1 = clock();

#pragma omp parallel for private(i) schedule(dynamic)
        for ( m = 0; m < (int)uRadix; m++ )
        {
            int nIndex = puCount[m];
            for ( i = 0; i < nProcessors; i++ )
            {
                DATA_ARRAY_TEMP<T> *pDataArray = ppDataArray[i];
                T *pDataTemp = pDataArray[m].pData;
                int k;
                for ( k = 0; k < (int)pDataArray[m].uCount; k++ )
                {
                    pData[nIndex] = pDataTemp[k];
                    ++nIndex;
                }
            }
        }
        //进行下一轮的分配与收集操作
        t2 = clock();

        printf( "Parallel Collected, time = %ld\n", t2 - t1);

    }	//for ( uKeyIndex = 0; uKeyIndex < uMaxKeyLen; uKeyIndex++ )

    //释放表
    for ( i = 0; i < nProcessors; i++ )
    {
        delete [] ppDataArray[i];
        delete [] ppuBoxDataCount[i];
    }

    delete [] ppDataArray;
    delete [] ppuBoxDataCount;
    delete [] pOutData;
    return;
}


/**	分区的并行基数排序
    对数组进行并行基数排序

    @param	T *ppData - 待排序数据	
    @param	UINT uDataLen - 数据长度	
    @param	UINT uRadix - 基数	
    @param	UINT uMaxKeyLen - 最大关键词长度	
    @param	GETKEY GetKeyFunc - 关键词取位回调函数	
    @return	void - 无	
*/
template <class T, class GETKEY>
void Parallel_RadixSort_PPR(T *pData, UINT uDataLen,
                               UINT uRadix, UINT uMaxKeyLen, GETKEY GetKeyFunc)
{
    int i;
    int nProcessors = omp_get_num_procs();
    UINT uCount = uDataLen / nProcessors;

    if ( uDataLen - uCount * nProcessors > 2 )
    {
        uCount++;
    }

    T *pOutData = new T[uDataLen];
    DATA_ARRAY_TEMP<T>  **ppDataArray = new DATA_ARRAY_TEMP<T> *[nProcessors];
    UINT **ppuBoxDataCount = new UINT *[nProcessors];

    for ( i = 0; i < nProcessors; i++ )
    {
        ppDataArray[i] = new DATA_ARRAY_TEMP<T>[uRadix];
        ppuBoxDataCount[i] = new UINT[uRadix+1];
    }


        //下面代码完成全局的一轮将数据分配到箱子里的操作

#pragma omp parallel for num_threads(nProcessors) schedule(static, 1)
        for ( i = 0; i < nProcessors; i++ )
        {
            //每个线程计算自己的数据起始位置,其中end是开区间位置
            UINT begin = i * uCount;
            UINT end = (i+1) * uCount;
            if ( end > uDataLen )
            {
                end = uDataLen;
            }

            T * pOut = pOutData + begin;

            T *p = pData + begin;
           Serial_Partitioned<T>(p, end-begin, uRadix, uMaxKeyLen-1, GetKeyFunc, 
                ppuBoxDataCount[i], pOut, ppDataArray[i]);

        }//for ( i = 0; i < nProcessors; i++ )

        UINT *puCount = ppuBoxDataCount[0];
#pragma omp parallel for num_threads(dtn(uRadix, 4096)) //最小循环次数设为多少需要测试
        for ( i = 0; i < (int)uRadix; i++ )
        {
            int k;
            for ( k = 1; k < nProcessors; k++ )
            {
                UINT * pk = ppuBoxDataCount[k];
                puCount[i] += pk[i];
            }
        }

        //计算前缀和
        for ( i = 1; i < (int)uRadix; i++ )
        {
            puCount[i] += puCount[i-1];
        }
       
        //得到各段数据的起始位置
        for ( i = uRadix - 1; i > 0; i-- )
        {
            puCount[i] = puCount[i-1];
        }
        puCount[0] = 0;

        //收集操作，将pOutData里的数据重新放回pData里
        int m;

#pragma omp parallel for private(i) schedule(dynamic)
        for ( m = 0; m < (int)uRadix; m++ )
        {
            int nIndex = puCount[m];
            for ( i = 0; i < nProcessors; i++ )
            {
                DATA_ARRAY_TEMP<T> *pDataArray = ppDataArray[i];
                T *pDataTemp = pDataArray[m].pData;
                int k;
                for ( k = 0; k < (int)pDataArray[m].uCount; k++ )
                {
                    pData[nIndex] = pDataTemp[k];
                    ++nIndex;
                }
            }
        }

    puCount[uRadix] = uDataLen;

//#pragma omp parallel for
    for( m = 0; m < (int)uRadix; m++ )
    {
        T *pInData = pData + puCount[m];
        T *pOut = pOutData + puCount[m];
        T *pTempData;
        UINT *puBoxData = new UINT[uRadix];
        DATA_ARRAY_TEMP<T> *pDataArray = new DATA_ARRAY_TEMP<T>[uRadix];

        for ( i = 0; i < (int)uMaxKeyLen - 1; i++ )
        {
            Serial_Partitioned(pInData, puCount[m+1] - puCount[m], uRadix, i, GetKeyFunc, 
                puBoxData, pOut, pDataArray);
            pTempData = pInData;
            pInData = pOut;
            pOut = pTempData;
        }
        if ( pOut != (pData + puCount[m]) )
        {
            for ( i = (int)puCount[m]; i < (int)puCount[m+1]; i++ )
            {
                pData[i] = pOutData[i];
            }
        }
        delete[] puBoxData;
        delete[] pDataArray;
    }
    

    //释放表
    for ( i = 0; i < nProcessors; i++ )
    {
        delete [] ppDataArray[i];
        delete [] ppuBoxDataCount[i];
    }

    delete [] ppDataArray;
    delete [] ppuBoxDataCount;
    delete [] pOutData;
    return;
}


#endif //__PARALLEL_RADIXSORT_H__
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -