⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 is.c

📁 openmp版的banchmark
💻 C
📖 第 1 页 / 共 2 页
字号:
/*--------------------------------------------------------------------    NAS Parallel Benchmarks 2.3 OpenMP C versions - IS  This benchmark is an OpenMP C version of the NPB IS code.    The OpenMP C versions are developed by RWCP and derived from the serial  Fortran versions in "NPB 2.3-serial" developed by NAS.  Permission to use, copy, distribute and modify this software for any  purpose with or without fee is hereby granted.  This software is provided "as is" without express or implied warranty.    Send comments on the OpenMP C versions to pdp-openmp@rwcp.or.jp  Information on OpenMP activities at RWCP is available at:           http://pdplab.trc.rwcp.or.jp/pdperf/Omni/    Information on NAS Parallel Benchmarks 2.3 is available at:             http://www.nas.nasa.gov/NAS/NPB/--------------------------------------------------------------------*//*--------------------------------------------------------------------  Author: M. Yarrow  OpenMP C version: S. Satoh  --------------------------------------------------------------------*/#include "npbparams.h"#include <stdlib.h>#include <stdio.h>#if defined(_OPENMP)#include <omp.h>#endif /* _OPENMP *//*****************************************************************//* For serial IS, buckets are not really req'd to solve NPB1 IS  *//* spec, but their use on some machines improves performance, on *//* other machines the use of buckets compromises performance,    *//* probably because it is extra computation which is not req'd.  *//* (Note: Mechanism not understood, probably cache related)      *//* Example:  SP2-66MhzWN:  50% speedup with buckets              *//* Example:  SGI Indy5000: 50% slowdown with buckets             *//* Example:  SGI O2000:   400% slowdown with buckets (Wow!)      *//*****************************************************************//* #define USE_BUCKETS  *//* buckets are not used in the OpenMP C version *//******************//* default values *//******************/#ifndef CLASS#define CLASS 'S'#endif/*************//*  CLASS S  *//*************/#if CLASS == 'S'#define  TOTAL_KEYS_LOG_2    16#define  MAX_KEY_LOG_2       11#define  NUM_BUCKETS_LOG_2   9#endif/*************//*  CLASS W  *//*************/#if CLASS == 'W'#define  TOTAL_KEYS_LOG_2    20#define  MAX_KEY_LOG_2       16#define  NUM_BUCKETS_LOG_2   10#endif/*************//*  CLASS A  *//*************/#if CLASS == 'A'#define  TOTAL_KEYS_LOG_2    23#define  MAX_KEY_LOG_2       19#define  NUM_BUCKETS_LOG_2   10#endif/*************//*  CLASS B  *//*************/#if CLASS == 'B'#define  TOTAL_KEYS_LOG_2    25#define  MAX_KEY_LOG_2       21#define  NUM_BUCKETS_LOG_2   10#endif/*************//*  CLASS C  *//*************/#if CLASS == 'C'#define  TOTAL_KEYS_LOG_2    27#define  MAX_KEY_LOG_2       23#define  NUM_BUCKETS_LOG_2   10#endif#define  TOTAL_KEYS          (1 << TOTAL_KEYS_LOG_2)#define  MAX_KEY             (1 << MAX_KEY_LOG_2)#define  NUM_BUCKETS         (1 << NUM_BUCKETS_LOG_2)#define  NUM_KEYS            TOTAL_KEYS#define  SIZE_OF_BUFFERS     NUM_KEYS                                             #define  MAX_ITERATIONS      10#define  TEST_ARRAY_SIZE     5/*************************************//* Typedef: if necessary, change the *//* size of int here by changing the  *//* int type to, say, long            *//*************************************/typedef  int  INT_TYPE;/********************//* Some global info *//********************/INT_TYPE *key_buff_ptr_global;         /* used by full_verify to get */                                       /* copies of rank info        */int      passed_verification;                                 /************************************//* These are the three main arrays. *//* See SIZE_OF_BUFFERS def above    *//************************************/INT_TYPE key_array[SIZE_OF_BUFFERS],             key_buff1[SIZE_OF_BUFFERS],             key_buff2[SIZE_OF_BUFFERS],         partial_verify_vals[TEST_ARRAY_SIZE];#ifdef USE_BUCKETSINT_TYPE bucket_size[NUM_BUCKETS],                             bucket_ptrs[NUM_BUCKETS];#endif/**********************//* Partial verif info *//**********************/INT_TYPE test_index_array[TEST_ARRAY_SIZE],         test_rank_array[TEST_ARRAY_SIZE],         S_test_index_array[TEST_ARRAY_SIZE] =                              {48427,17148,23627,62548,4431},         S_test_rank_array[TEST_ARRAY_SIZE] =                              {0,18,346,64917,65463},         W_test_index_array[TEST_ARRAY_SIZE] =                              {357773,934767,875723,898999,404505},         W_test_rank_array[TEST_ARRAY_SIZE] =                              {1249,11698,1039987,1043896,1048018},         A_test_index_array[TEST_ARRAY_SIZE] =                              {2112377,662041,5336171,3642833,4250760},         A_test_rank_array[TEST_ARRAY_SIZE] =                              {104,17523,123928,8288932,8388264},         B_test_index_array[TEST_ARRAY_SIZE] =                              {41869,812306,5102857,18232239,26860214},         B_test_rank_array[TEST_ARRAY_SIZE] =                              {33422937,10244,59149,33135281,99},          C_test_index_array[TEST_ARRAY_SIZE] =                              {44172927,72999161,74326391,129606274,21736814},         C_test_rank_array[TEST_ARRAY_SIZE] =                              {61147,882988,266290,133997595,133525895};/***********************//* function prototypes *//***********************/double	randlc( double *X, double *A );void full_verify( void );/* *    FUNCTION RANDLC (X, A) * *  This routine returns a uniform pseudorandom double precision number in the *  range (0, 1) by using the linear congruential generator * *  x_{k+1} = a x_k  (mod 2^46) * *  where 0 < x_k < 2^46 and 0 < a < 2^46.  This scheme generates 2^44 numbers *  before repeating.  The argument A is the same as 'a' in the above formula, *  and X is the same as x_0.  A and X must be odd double precision integers *  in the range (1, 2^46).  The returned value RANDLC is normalized to be *  between 0 and 1, i.e. RANDLC = 2^(-46) * x_1.  X is updated to contain *  the new seed x_1, so that subsequent calls to RANDLC using the same *  arguments will generate a continuous sequence. * *  This routine should produce the same results on any computer with at least *  48 mantissa bits in double precision floating point data.  On Cray systems, *  double precision should be disabled. * *  David H. Bailey     October 26, 1990 * *     IMPLICIT DOUBLE PRECISION (A-H, O-Z) *     SAVE KS, R23, R46, T23, T46 *     DATA KS/0/ * *  If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, *  T23 = 2 ^ 23, and T46 = 2 ^ 46.  These are computed in loops, rather than *  by merely using the ** operator, in order to insure that the results are *  exact on all systems.  This code assumes that 0.5D0 is represented exactly. *//*****************************************************************//*************           R  A  N  D  L  C             ************//*************                                        ************//*************    portable random number generator    ************//*****************************************************************/double	randlc(X, A)double *X;double *A;{      static int        KS=0;      static double	R23, R46, T23, T46;      double		T1, T2, T3, T4;      double		A1;      double		A2;      double		X1;      double		X2;      double		Z;      int     		i, j;      if (KS == 0)       {        R23 = 1.0;        R46 = 1.0;        T23 = 1.0;        T46 = 1.0;            for (i=1; i<=23; i++)        {          R23 = 0.50 * R23;          T23 = 2.0 * T23;        }        for (i=1; i<=46; i++)        {          R46 = 0.50 * R46;          T46 = 2.0 * T46;        }        KS = 1;      }/*  Break A into two parts such that A = 2^23 * A1 + A2 and set X = N.  */      T1 = R23 * *A;      j  = T1;      A1 = j;      A2 = *A - T23 * A1;/*  Break X into two parts such that X = 2^23 * X1 + X2, compute    Z = A1 * X2 + A2 * X1  (mod 2^23), and then    X = 2^23 * Z + A2 * X2  (mod 2^46).                            */      T1 = R23 * *X;      j  = T1;      X1 = j;      X2 = *X - T23 * X1;      T1 = A1 * X2 + A2 * X1;            j  = R23 * T1;      T2 = j;      Z = T1 - T23 * T2;      T3 = T23 * Z + A2 * X2;      j  = R46 * T3;      T4 = j;      *X = T3 - T46 * T4;      return(R46 * *X);} /*****************************************************************//*************      C  R  E  A  T  E  _  S  E  Q      ************//*****************************************************************/void	create_seq( double seed, double a ){	double x;	int    i, j, k;        k = MAX_KEY/4;	for (i=0; i<NUM_KEYS; i++)	{	    x = randlc(&seed, &a);	    x += randlc(&seed, &a);    	    x += randlc(&seed, &a);	    x += randlc(&seed, &a);              key_array[i] = k*x;	}}/*****************************************************************//*************    F  U  L  L  _  V  E  R  I  F  Y     ************//*****************************************************************/void full_verify(){    INT_TYPE    i, j;    INT_TYPE    k;    INT_TYPE    m, unique_keys;    /*  Now, finally, sort the keys:  */    for( i=0; i<NUM_KEYS; i++ )        key_array[--key_buff_ptr_global[key_buff2[i]]] = key_buff2[i];/*  Confirm keys correctly sorted: count incorrectly sorted keys, if any */    j = 0;    for( i=1; i<NUM_KEYS; i++ )        if( key_array[i-1] > key_array[i] )            j++;    if( j != 0 )    {        printf( "Full_verify: number of keys out of sort: %d\n",                j );    }    else        passed_verification++;           

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -