ppc_altivec_util.h

来自「最著名最快的分子模拟软件」· C头文件 代码 · 共 1,756 行 · 第 1/5 页

H
1,756
字号
/* -*- mode: c; tab-width: 4; indent-tabs-mode: n; c-basic-offset: 4 -*-  * * $Id: ppc_altivec_util.h,v 1.4 2005/08/31 20:03:09 lindahl Exp $ *  * This file is part of Gromacs        Copyright (c) 1991-2004 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org *  * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifndef _ALTIVEC_UTIL_H_#define _ALTIVEC_UTIL_H_/** @file ppc_altivec_util.h * *  @brief Altivec utility functions for optimized kernels. * * This file contains static inline utility functions that accomplish * tasks like loading/storing coordinates and forces, generating constaints, * and loading table data. * * Due to all the static functions it might take a while to compile files * that include this header, but from a performance point of view it makes * a tremendous difference. */#include<stdio.h>/* altivec.h must be included on vanilla gcc-4.0, * but not on Apple gcc or the IBM compilers. */#ifdef HAVE_ALTIVEC_H#include <altivec.h>#endif/** Write contents of a SIMD FP variable on standard out. * * @internal *  * @param v    SIMD floating-point variable to print. */static void printvec(vector float v){	int i;	printf(" ");	for(i=0;i<4;i++)		printf("%8.5f ",*(((float *)&v)+i));	printf("\n");}/** Set SIMD unit to use non-java rounding mode. * * @internal *  * On most PowerPC processors, FP operations take an additional clock  * cycle when the default java rounding mode is used. We couldn't care less, * so we can save a couple of percent of runtime by using classical IEEE mode. */static void set_non_java_mode(void){	vector unsigned short vsr1,vsr2;	vector unsigned int tmp;	vsr1=vec_mfvscr();	tmp=vec_sl(vec_splat_u32(1),vec_splat_u32(8));	vsr2=(vector unsigned short)vec_sl(tmp,vec_splat_u32(8));	vsr1=vec_or(vsr1,vsr2);	vec_mtvscr(vsr1);}  /** Create the SIMD FP constant 0.0 * *  This routine returns a SIMD variable filled with 0.0 in all four elements, *  without loading any data from memory. * *  @return SIMD FP 0.0 */static inline vector float vec_zero(void){	return vec_ctf(vec_splat_u32(0),0);}/** Create the SIMD FP constant 0.5**  This routine returns a SIMD variable filled with 0.5 in all four elements,*  without loading any data from memory.**  @return SIMD FP 0.5*/static inline vector float vec_half(void){	return vec_ctf(vec_splat_u32(1),1);}/** Create the SIMD FP constant 1.0**  This routine returns a SIMD variable filled with 1.0 in all four elements,*  without loading any data from memory.**  @return SIMD FP 1.0*/static inline vector float vec_one(void){	return vec_ctf(vec_splat_u32(1),0);}/** Create the SIMD FP constant 2.0**  This routine returns a SIMD variable filled with 2.0 in all four elements,*  without loading any data from memory.**  @return SIMD FP 2.0*/static inline vector float vec_two(void){	return vec_ctf(vec_splat_u32(2),0);}/** Create the SIMD FP constant 3.0**  This routine returns a SIMD variable filled with 3.0 in all four elements,*  without loading any data from memory.**  @return SIMD FP 3.0*/static inline vector float vec_three(void){	return vec_ctf(vec_splat_u32(3),0);}/** Create the SIMD FP constant 6.0**  This routine returns a SIMD variable filled with 6.0 in all four elements,*  without loading any data from memory.**  @return SIMD FP 6.0*/static inline vector float vec_six(void){	return vec_ctf(vec_splat_u32(6),0);}/** Create the SIMD FP constant 12.0**  This routine returns a SIMD variable filled with 12.0 in all four elements,*  without loading any data from memory.**  @return SIMD FP 12.0*/static inline vector float vec_twelve(void){	return vec_ctf(vec_splat_u32(12),0);}/** Load 3 floats from memory into elements 0-2 of a SIMD variable. * *  This routine loads 3 floating-point values from memory, which does not *  have to be aligned, and returns a SIMD variable with the values in the *  lower three elements. * *  @param address Pointer to values in memory. * *  @return SIMD FP variable with values in lower three elements. */static inline vector float load_xyz(float *address){	vector float c1,c2,c3;	vector unsigned char perm;  	perm              = vec_lvsl( 0, address ); 	c1                = vec_lde( 0, address );	c2                = vec_lde( 4, address );	c3                = vec_lde( 8, address );	c1                = vec_perm(c1,c1,perm);	c2                = vec_perm(c2,c2,perm);	c3                = vec_perm(c3,c3,perm);	c2                = vec_sld(c2,c2,4);	c3                = vec_sld(c3,c3,8);	c1                = vec_mergeh(c1,c3);  	return vec_mergeh(c1,c2);}/** Load four floats from unaligned memory **  This routine loads 4 floating-point values from memory, which does not*  have to be aligned, and returns a SIMD variable with the values.**  @param address Pointer to values in memory.**  @return SIMD FP variable with values.*/static inline vector float load_vector_unaligned(float *address){	vector unsigned char perm;	vector float low,high;  	perm              = vec_lvsl( 0, (int *) address ); 	low               = vec_ld(  0, address ); 	high              = vec_ld( 16, address );   	return vec_perm(low,high,perm);}/** Load FP variable from memory, spread into all elements of SIMD variable.**  This routine loads a single floating-point values from memory, which does not*  have to be aligned, and returns a SIMD variable with this values in all*  four elements.**  @param address Pointer to value in memory.**  @return SIMD FP variable with values in all elements.*/static inline vector float load_float_and_splat(float *address){	vector unsigned char perm;	vector float tmp;	tmp               = vec_lde(0,address);	perm              = vec_lvsl(0,address);	tmp               = vec_perm(tmp,tmp,perm);	return vec_splat(tmp,0);}/** Load 4 non-consecutive floats into a single SIMD variable.**  This routine loads four floating-point values from different memory *  locations, which do not have to be aligned, and returns a SIMD variable *  with the four elements.**  @param float1  Pointer to first value.*  @param float2  Pointer to first value.*  @param float3  Pointer to first value.*  @param float4  Pointer to first value.**  @return SIMD FP variable with the four values.*/static inline vector float load_4_float(float *float1,             float *float2,             float *float3,             float *float4){	vector unsigned char xshift = vec_lvsl( 12, float1 ); 	vector unsigned char yshift = vec_lvsl( 12, float2 ); 	vector unsigned char zshift = vec_lvsl( 0, float3 ); 	vector unsigned char wshift = vec_lvsl( 0, float4 );   	vector float X = vec_lde( 0, float1 ); 	vector float Y = vec_lde( 0, float2 ); 	vector float Z = vec_lde( 0, float3 ); 	vector float W = vec_lde( 0, float4 );   	X = vec_perm( X, X, xshift); 	Y = vec_perm( Y, Y, yshift); 	Z = vec_perm( Z, Z, zshift); 	W = vec_perm( W, W, wshift);   	X = vec_mergeh( X, Y ); 	Z = vec_mergeh( Z, W ); 	return vec_sld( X, Z, 8 );}/** Load 3 non-consecutive floats into a single SIMD variable.**  This routine loads three floating-point values from different memory *  locations, which do not have to be aligned, and returns a SIMD variable *  with the values.**  @param float1  Pointer to first value.*  @param float2  Pointer to first value.*  @param float3  Pointer to first value.**  @return SIMD FP variable with values in lower three elements. The fourth*          element is undefined.*/static inline vector float load_3_float(float *float1,             float *float2,             float *float3){	vector unsigned char xshift = vec_lvsl( 12, float1 ); 	vector unsigned char yshift = vec_lvsl( 12, float2 ); 	vector unsigned char zshift = vec_lvsl( 0, float3 ); 	vector float X = vec_lde( 0, float1 ); 	vector float Y = vec_lde( 0, float2 ); 	vector float Z = vec_lde( 0, float3 );   	X = vec_perm( X, X, xshift); 	Y = vec_perm( Y, Y, yshift); 	Z = vec_perm( Z, Z, zshift);   	X = vec_mergeh( X, Y ); 	return vec_sld( X, Z, 8 );}/** Load 2 non-consecutive floats into a single SIMD variable.**  This routine loads two floating-point values from different memory *  locations, which do not have to be aligned, and returns a SIMD variable *  with the values.**  @param float1  Pointer to first value.*  @param float2  Pointer to first value.**  @return SIMD FP variable with values in lower two elements. *          Elements 3 and 4 are undefined.*/static inline vector float load_2_float(float *float1,             float *float2){	vector unsigned char xshift = vec_lvsl( 8, float1 ); 	vector unsigned char yshift = vec_lvsl( 8, float2 );   	vector float X = vec_lde( 0, float1 ); 	vector float Y = vec_lde( 0, float2 ); 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?