📄 ppc_vec.c
字号:
/* * PearPC * ppc_vec.cc * * Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nsmu.edu) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* Pages marked: v.??? * From: IBM PowerPC MicroProcessor Family: Altivec(tm) Technology... * Programming Environments Manual */#include <math.h>/* * FIXME: put somewhere appropriate */#ifndef HAS_LOG2#define log2(x) log(x)/log(2)#endif /* HAS_LOG2 */ #ifndef HAS_EXP2#define exp2(x) pow(2, x)#endif /* HAS_EXP2 *///#include "debug/tracers.h"#include "ppc_cpu.h"#include "ppc_dec.h"#include "ppc_fpu.h"#include "ppc_vec.h"#define SIGN32 0x80000000/* PACK_PIXEL Packs a uint32 pixel to uint16 pixel * v.219 */static inline uint16 PACK_PIXEL(uint32 clr){ return (((clr & 0x000000f8) >> 3) | \ ((clr & 0x0000f800) >> 6) | \ ((clr & 0x01f80000) >> 9));}/* UNPACK_PIXEL Unpacks a uint16 pixel to uint32 pixel * v.276 & v.279 */static inline uint32 UNPACK_PIXEL(uint16 clr){ return (((uint32)(clr & 0x001f)) | \ ((uint32)(clr & 0x03E0) << 3) | \ ((uint32)(clr & 0x7c00) << 6) | \ (((clr) & 0x8000) ? 0xff000000 : 0));}static inline uint8 SATURATE_UB(uint16 val){ if (val & 0xff00) { gCPU.vscr |= VSCR_SAT; return 0xff; } return val;}static inline uint8 SATURATE_0B(uint16 val){ if (val & 0xff00) { gCPU.vscr |= VSCR_SAT; return 0; } return val;}static inline uint16 SATURATE_UH(uint32 val){ if (val & 0xffff0000) { gCPU.vscr |= VSCR_SAT; return 0xffff; } return val;}static inline uint16 SATURATE_0H(uint32 val){ if (val & 0xffff0000) { gCPU.vscr |= VSCR_SAT; return 0; } return val;}static inline sint8 SATURATE_SB(sint16 val){ if (val > 127) { // 0x7F gCPU.vscr |= VSCR_SAT; return 127; } else if (val < -128) { // 0x80 gCPU.vscr |= VSCR_SAT; return -128; } return val;}static inline uint8 SATURATE_USB(sint16 val){ if (val > 0xff) { gCPU.vscr |= VSCR_SAT; return 0xff; } else if (val < 0) { gCPU.vscr |= VSCR_SAT; return 0; } return (uint8)val;}static inline sint16 SATURATE_SH(sint32 val){ if (val > 32767) { // 0x7fff gCPU.vscr |= VSCR_SAT; return 32767; } else if (val < -32768) { // 0x8000 gCPU.vscr |= VSCR_SAT; return -32768; } return val;}static inline uint16 SATURATE_USH(sint32 val){ if (val > 0xffff) { gCPU.vscr |= VSCR_SAT; return 0xffff; } else if (val < 0) { gCPU.vscr |= VSCR_SAT; return 0; } return (uint16)val;}static inline sint32 SATURATE_UW(sint64 val){ if (val > 0xffffffffLL) { gCPU.vscr |= VSCR_SAT; return 0xffffffffLL; } return val;}static inline sint32 SATURATE_SW(sint64 val){ if (val > 2147483647LL) { // 0x7fffffff gCPU.vscr |= VSCR_SAT; return 2147483647LL; } else if (val < -2147483648LL) { // 0x80000000 gCPU.vscr |= VSCR_SAT; return -2147483648LL; } return val;}/* vperm Vector Permutation * v.218 */void ppc_opc_vperm(){ VECTOR_DEBUG_COMMON; int vrD, vrA, vrB, vrC; int sel; Vector_t r; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<16; i++) { sel = gCPU.vr[vrC].b[i]; if (sel & 0x10) r.b[i] = VECT_B(gCPU.vr[vrB], sel & 0xf); else r.b[i] = VECT_B(gCPU.vr[vrA], sel & 0xf); } gCPU.vr[vrD] = r;}/* vsel Vector Select * v.238 */void ppc_opc_vsel(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; uint64 mask, val; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); mask = gCPU.vr[vrC].d[0]; val = gCPU.vr[vrB].d[0] & mask; val |= gCPU.vr[vrA].d[0] & ~mask; gCPU.vr[vrD].d[0] = val; mask = gCPU.vr[vrC].d[1]; val = gCPU.vr[vrB].d[1] & mask; val |= gCPU.vr[vrA].d[1] & ~mask; gCPU.vr[vrD].d[1] = val;}/* vsrb Vector Shift Right Byte * v.256 */void ppc_opc_vsrb(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for ( i=0; i<16; i++) { gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] >> (gCPU.vr[vrB].b[i] & 0x7); }}/* vsrh Vector Shift Right Half Word * v.257 */void ppc_opc_vsrh(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] >> (gCPU.vr[vrB].h[i] & 0xf); }}/* vsrw Vector Shift Right Word * v.259 */void ppc_opc_vsrw(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] >> (gCPU.vr[vrB].w[i] & 0x1f); }}/* vsrab Vector Shift Right Arithmetic Byte * v.253 */void ppc_opc_vsrab(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { gCPU.vr[vrD].sb[i] = gCPU.vr[vrA].sb[i] >> (gCPU.vr[vrB].b[i] & 0x7); }}/* vsrah Vector Shift Right Arithmetic Half Word * v.254 */void ppc_opc_vsrah(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { gCPU.vr[vrD].sh[i] = gCPU.vr[vrA].sh[i] >> (gCPU.vr[vrB].h[i] & 0xf); }}/* vsraw Vector Shift Right Arithmetic Word * v.255 */void ppc_opc_vsraw(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { gCPU.vr[vrD].sw[i] = gCPU.vr[vrA].sw[i] >> (gCPU.vr[vrB].w[i] & 0x1f); }}/* vslb Vector Shift Left Byte * v.240 */void ppc_opc_vslb(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] << (gCPU.vr[vrB].b[i] & 0x7); }}/* vslh Vector Shift Left Half Word * v.242 */void ppc_opc_vslh(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] << (gCPU.vr[vrB].h[i] & 0xf); }}/* vslw Vector Shift Left Word * v.244 */void ppc_opc_vslw(){ VECTOR_DEBUG; int vrD, vrA, vrB; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] << (gCPU.vr[vrB].w[i] & 0x1f); }}/* vsr Vector Shift Right * v.251 */void ppc_opc_vsr(){ VECTOR_DEBUG; int vrD, vrA, vrB; Vector_t r; int shift; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); /* Specs say that the low-order 3 bits of all byte elements in vB * must be the same, or the result is undefined. So we can just * use the same low-order 3 bits for all of our shifts. */ shift = gCPU.vr[vrB].w[0] & 0x7; r.d[0] = gCPU.vr[vrA].d[0] >> shift; r.d[1] = gCPU.vr[vrA].d[1] >> shift; VECT_D(r, 1) |= VECT_D(gCPU.vr[vrA], 0) << (64 - shift); gCPU.vr[vrD] = r;}/* vsro Vector Shift Right Octet * v.258 */void ppc_opc_vsro(){ VECTOR_DEBUG; int vrD, vrA, vrB; Vector_t r; int shift, i; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;#if HOST_ENDIANESS == HOST_ENDIANESS_LE for (i=0; i<(16-shift); i++) { r.b[i] = gCPU.vr[vrA].b[i+shift]; } for (; i<16; i++) { r.b[i] = 0; }#elif HOST_ENDIANESS == HOST_ENDIANESS_BE for (i=0; i<shift; i++) { r.b[i] = 0; } for (; i<16; i++) { r.b[i] = gCPU.vr[vrA].b[i-shift]; }#else#error Endianess not supported!#endif gCPU.vr[vrD] = r;}/* vsl Vector Shift Left * v.239 */void ppc_opc_vsl(){ VECTOR_DEBUG; int vrD, vrA, vrB; Vector_t r; int shift; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); /* Specs say that the low-order 3 bits of all byte elements in vB * must be the same, or the result is undefined. So we can just * use the same low-order 3 bits for all of our shifts. */ shift = gCPU.vr[vrB].w[0] & 0x7; r.d[0] = gCPU.vr[vrA].d[0] << shift; r.d[1] = gCPU.vr[vrA].d[1] << shift; VECT_D(r, 0) |= VECT_D(gCPU.vr[vrA], 1) >> (64 - shift); gCPU.vr[vrD] = r;}/* vslo Vector Shift Left Octet * v.243 */void ppc_opc_vslo(){ VECTOR_DEBUG; int vrD, vrA, vrB; Vector_t r; int shift, i; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;#if HOST_ENDIANESS == HOST_ENDIANESS_LE for (i=0; i<shift; i++) { r.b[i] = 0; } for (; i<16; i++) { r.b[i] = gCPU.vr[vrA].b[i-shift]; }#elif HOST_ENDIANESS == HOST_ENDIANESS_BE for (i=0; i<(16-shift); i++) { r.b[i] = gCPU.vr[vrA].b[i+shift]; } for (; i<16; i++) { r.b[i] = 0; }#else#error Endianess not supported!#endif gCPU.vr[vrD] = r;}/* vsldoi Vector Shift Left Double by Octet Immediate * v.241 */void ppc_opc_vsldoi(){ VECTOR_DEBUG_COMMON; int vrD, vrA, vrB, shift, ashift; int i; Vector_t r; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, shift); shift &= 0xf; ashift = 16 - shift;#if HOST_ENDIANESS == HOST_ENDIANESS_LE for (i=0; i<shift; i++) { r.b[i] = gCPU.vr[vrB].b[i+ashift]; } for (; i<16; i++) { r.b[i] = gCPU.vr[vrA].b[i-shift]; }#elif HOST_ENDIANESS == HOST_ENDIANESS_BE for (i=0; i<ashift; i++) { r.b[i] = gCPU.vr[vrA].b[i+shift]; } for (; i<16; i++) { r.b[i] = gCPU.vr[vrB].b[i-ashift]; }#else#error Endianess not supported!#endif gCPU.vr[vrD] = r;}/* vrlb Vector Rotate Left Byte * v.234 */void ppc_opc_vrlb(){ VECTOR_DEBUG; int vrD, vrA, vrB, shift; Vector_t r; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { shift = (gCPU.vr[vrB].b[i] & 0x7); r.b[i] = gCPU.vr[vrA].b[i] << shift; r.b[i] |= gCPU.vr[vrA].b[i] >> (8 - shift); } gCPU.vr[vrD] = r;}/* vrlh Vector Rotate Left Half Word * v.235 */void ppc_opc_vrlh(){ VECTOR_DEBUG; int vrD, vrA, vrB, shift; Vector_t r; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { shift = (gCPU.vr[vrB].h[i] & 0xf); r.h[i] = gCPU.vr[vrA].h[i] << shift; r.h[i] |= gCPU.vr[vrA].h[i] >> (16 - shift); } gCPU.vr[vrD] = r;}/* vrlw Vector Rotate Left Word * v.236 */void ppc_opc_vrlw(){ VECTOR_DEBUG; int vrD, vrA, vrB, shift; Vector_t r; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { shift = (gCPU.vr[vrB].w[i] & 0x1F); r.w[i] = gCPU.vr[vrA].w[i] << shift; r.w[i] |= gCPU.vr[vrA].w[i] >> (32 - shift); } gCPU.vr[vrD] = r;}/* With the merges, I just don't see any point in risking that a compiler * might generate actual alu code to calculate anything when it's * compile-time known. Plus, it's easier to validate it like this. *//* vmrghb Vector Merge High Byte * v.195 */void ppc_opc_vmrghb(){ VECTOR_DEBUG; int vrD, vrA, vrB; Vector_t r; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 0); VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 0); VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 1); VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 1); VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 2); VECT_B(r, 5) = VECT_B(gCPU.vr[vrB], 2); VECT_B(r, 6) = VECT_B(gCPU.vr[vrA], 3); VECT_B(r, 7) = VECT_B(gCPU.vr[vrB], 3); VECT_B(r, 8) = VECT_B(gCPU.vr[vrA], 4); VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 4); VECT_B(r,10) = VECT_B(gCPU.vr[vrA], 5); VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 5); VECT_B(r,12) = VECT_B(gCPU.vr[vrA], 6); VECT_B(r,13) = VECT_B(gCPU.vr[vrB], 6); VECT_B(r,14) = VECT_B(gCPU.vr[vrA], 7); VECT_B(r,15) = VECT_B(gCPU.vr[vrB], 7); gCPU.vr[vrD] = r;}/* vmrghh Vector Merge High Half Word * v.196 */void ppc_opc_vmrghh(){ VECTOR_DEBUG; int vrD, vrA, vrB; Vector_t r; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 0); VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 0); VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 1); VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 1); VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 2); VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 2); VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 3); VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 3); gCPU.vr[vrD] = r;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -