ppc_altivec_util.h
来自「最著名最快的分子模拟软件」· C头文件 代码 · 共 1,756 行 · 第 1/5 页
H
1,756 行
/* -*- mode: c; tab-width: 4; indent-tabs-mode: n; c-basic-offset: 4 -*- * * $Id: ppc_altivec_util.h,v 1.4 2005/08/31 20:03:09 lindahl Exp $ * * This file is part of Gromacs Copyright (c) 1991-2004 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org * * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifndef _ALTIVEC_UTIL_H_#define _ALTIVEC_UTIL_H_/** @file ppc_altivec_util.h * * @brief Altivec utility functions for optimized kernels. * * This file contains static inline utility functions that accomplish * tasks like loading/storing coordinates and forces, generating constaints, * and loading table data. * * Due to all the static functions it might take a while to compile files * that include this header, but from a performance point of view it makes * a tremendous difference. */#include<stdio.h>/* altivec.h must be included on vanilla gcc-4.0, * but not on Apple gcc or the IBM compilers. */#ifdef HAVE_ALTIVEC_H#include <altivec.h>#endif/** Write contents of a SIMD FP variable on standard out. * * @internal * * @param v SIMD floating-point variable to print. */static void printvec(vector float v){ int i; printf(" "); for(i=0;i<4;i++) printf("%8.5f ",*(((float *)&v)+i)); printf("\n");}/** Set SIMD unit to use non-java rounding mode. * * @internal * * On most PowerPC processors, FP operations take an additional clock * cycle when the default java rounding mode is used. We couldn't care less, * so we can save a couple of percent of runtime by using classical IEEE mode. */static void set_non_java_mode(void){ vector unsigned short vsr1,vsr2; vector unsigned int tmp; vsr1=vec_mfvscr(); tmp=vec_sl(vec_splat_u32(1),vec_splat_u32(8)); vsr2=(vector unsigned short)vec_sl(tmp,vec_splat_u32(8)); vsr1=vec_or(vsr1,vsr2); vec_mtvscr(vsr1);} /** Create the SIMD FP constant 0.0 * * This routine returns a SIMD variable filled with 0.0 in all four elements, * without loading any data from memory. * * @return SIMD FP 0.0 */static inline vector float vec_zero(void){ return vec_ctf(vec_splat_u32(0),0);}/** Create the SIMD FP constant 0.5** This routine returns a SIMD variable filled with 0.5 in all four elements,* without loading any data from memory.** @return SIMD FP 0.5*/static inline vector float vec_half(void){ return vec_ctf(vec_splat_u32(1),1);}/** Create the SIMD FP constant 1.0** This routine returns a SIMD variable filled with 1.0 in all four elements,* without loading any data from memory.** @return SIMD FP 1.0*/static inline vector float vec_one(void){ return vec_ctf(vec_splat_u32(1),0);}/** Create the SIMD FP constant 2.0** This routine returns a SIMD variable filled with 2.0 in all four elements,* without loading any data from memory.** @return SIMD FP 2.0*/static inline vector float vec_two(void){ return vec_ctf(vec_splat_u32(2),0);}/** Create the SIMD FP constant 3.0** This routine returns a SIMD variable filled with 3.0 in all four elements,* without loading any data from memory.** @return SIMD FP 3.0*/static inline vector float vec_three(void){ return vec_ctf(vec_splat_u32(3),0);}/** Create the SIMD FP constant 6.0** This routine returns a SIMD variable filled with 6.0 in all four elements,* without loading any data from memory.** @return SIMD FP 6.0*/static inline vector float vec_six(void){ return vec_ctf(vec_splat_u32(6),0);}/** Create the SIMD FP constant 12.0** This routine returns a SIMD variable filled with 12.0 in all four elements,* without loading any data from memory.** @return SIMD FP 12.0*/static inline vector float vec_twelve(void){ return vec_ctf(vec_splat_u32(12),0);}/** Load 3 floats from memory into elements 0-2 of a SIMD variable. * * This routine loads 3 floating-point values from memory, which does not * have to be aligned, and returns a SIMD variable with the values in the * lower three elements. * * @param address Pointer to values in memory. * * @return SIMD FP variable with values in lower three elements. */static inline vector float load_xyz(float *address){ vector float c1,c2,c3; vector unsigned char perm; perm = vec_lvsl( 0, address ); c1 = vec_lde( 0, address ); c2 = vec_lde( 4, address ); c3 = vec_lde( 8, address ); c1 = vec_perm(c1,c1,perm); c2 = vec_perm(c2,c2,perm); c3 = vec_perm(c3,c3,perm); c2 = vec_sld(c2,c2,4); c3 = vec_sld(c3,c3,8); c1 = vec_mergeh(c1,c3); return vec_mergeh(c1,c2);}/** Load four floats from unaligned memory ** This routine loads 4 floating-point values from memory, which does not* have to be aligned, and returns a SIMD variable with the values.** @param address Pointer to values in memory.** @return SIMD FP variable with values.*/static inline vector float load_vector_unaligned(float *address){ vector unsigned char perm; vector float low,high; perm = vec_lvsl( 0, (int *) address ); low = vec_ld( 0, address ); high = vec_ld( 16, address ); return vec_perm(low,high,perm);}/** Load FP variable from memory, spread into all elements of SIMD variable.** This routine loads a single floating-point values from memory, which does not* have to be aligned, and returns a SIMD variable with this values in all* four elements.** @param address Pointer to value in memory.** @return SIMD FP variable with values in all elements.*/static inline vector float load_float_and_splat(float *address){ vector unsigned char perm; vector float tmp; tmp = vec_lde(0,address); perm = vec_lvsl(0,address); tmp = vec_perm(tmp,tmp,perm); return vec_splat(tmp,0);}/** Load 4 non-consecutive floats into a single SIMD variable.** This routine loads four floating-point values from different memory * locations, which do not have to be aligned, and returns a SIMD variable * with the four elements.** @param float1 Pointer to first value.* @param float2 Pointer to first value.* @param float3 Pointer to first value.* @param float4 Pointer to first value.** @return SIMD FP variable with the four values.*/static inline vector float load_4_float(float *float1, float *float2, float *float3, float *float4){ vector unsigned char xshift = vec_lvsl( 12, float1 ); vector unsigned char yshift = vec_lvsl( 12, float2 ); vector unsigned char zshift = vec_lvsl( 0, float3 ); vector unsigned char wshift = vec_lvsl( 0, float4 ); vector float X = vec_lde( 0, float1 ); vector float Y = vec_lde( 0, float2 ); vector float Z = vec_lde( 0, float3 ); vector float W = vec_lde( 0, float4 ); X = vec_perm( X, X, xshift); Y = vec_perm( Y, Y, yshift); Z = vec_perm( Z, Z, zshift); W = vec_perm( W, W, wshift); X = vec_mergeh( X, Y ); Z = vec_mergeh( Z, W ); return vec_sld( X, Z, 8 );}/** Load 3 non-consecutive floats into a single SIMD variable.** This routine loads three floating-point values from different memory * locations, which do not have to be aligned, and returns a SIMD variable * with the values.** @param float1 Pointer to first value.* @param float2 Pointer to first value.* @param float3 Pointer to first value.** @return SIMD FP variable with values in lower three elements. The fourth* element is undefined.*/static inline vector float load_3_float(float *float1, float *float2, float *float3){ vector unsigned char xshift = vec_lvsl( 12, float1 ); vector unsigned char yshift = vec_lvsl( 12, float2 ); vector unsigned char zshift = vec_lvsl( 0, float3 ); vector float X = vec_lde( 0, float1 ); vector float Y = vec_lde( 0, float2 ); vector float Z = vec_lde( 0, float3 ); X = vec_perm( X, X, xshift); Y = vec_perm( Y, Y, yshift); Z = vec_perm( Z, Z, zshift); X = vec_mergeh( X, Y ); return vec_sld( X, Z, 8 );}/** Load 2 non-consecutive floats into a single SIMD variable.** This routine loads two floating-point values from different memory * locations, which do not have to be aligned, and returns a SIMD variable * with the values.** @param float1 Pointer to first value.* @param float2 Pointer to first value.** @return SIMD FP variable with values in lower two elements. * Elements 3 and 4 are undefined.*/static inline vector float load_2_float(float *float1, float *float2){ vector unsigned char xshift = vec_lvsl( 8, float1 ); vector unsigned char yshift = vec_lvsl( 8, float2 ); vector float X = vec_lde( 0, float1 ); vector float Y = vec_lde( 0, float2 );
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?