⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ppc_vec.c

📁 SkyEye是一个可以运行嵌入式操作系统的硬件仿真工具
💻 C
📖 第 1 页 / 共 5 页
字号:
/* *	PearPC *	ppc_vec.cc * *	Copyright (C) 2004 Daniel Foesch (dfoesch@cs.nsmu.edu) * *	This program is free software; you can redistribute it and/or modify *	it under the terms of the GNU General Public License version 2 as *	published by the Free Software Foundation. * *	This program is distributed in the hope that it will be useful, *	but WITHOUT ANY WARRANTY; without even the implied warranty of *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *	GNU General Public License for more details. * *	You should have received a copy of the GNU General Public License *	along with this program; if not, write to the Free Software *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /*	Pages marked: v.??? *	From: IBM PowerPC MicroProcessor Family: Altivec(tm) Technology... *		Programming Environments Manual */#include <math.h>/* *	FIXME: put somewhere appropriate */#ifndef HAS_LOG2#define log2(x) log(x)/log(2)#endif /* HAS_LOG2 */ #ifndef HAS_EXP2#define exp2(x)	pow(2, x)#endif /* HAS_EXP2 *///#include "debug/tracers.h"#include "ppc_cpu.h"#include "ppc_dec.h"#include "ppc_fpu.h"#include "ppc_vec.h"#define	SIGN32 0x80000000/*	PACK_PIXEL	Packs a uint32 pixel to uint16 pixel *	v.219 */static inline uint16 PACK_PIXEL(uint32 clr){	return	(((clr & 0x000000f8) >> 3) | \		 ((clr & 0x0000f800) >> 6) | \		 ((clr & 0x01f80000) >> 9));}/*	UNPACK_PIXEL	Unpacks a uint16 pixel to uint32 pixel *	v.276 & v.279 */static inline uint32 UNPACK_PIXEL(uint16 clr){	return	(((uint32)(clr & 0x001f)) | \		 ((uint32)(clr & 0x03E0) << 3) | \		 ((uint32)(clr & 0x7c00) << 6) | \		 (((clr) & 0x8000) ? 0xff000000 : 0));}static inline uint8 SATURATE_UB(uint16 val){	if (val & 0xff00) {		gCPU.vscr |= VSCR_SAT;		return 0xff;	}	return val;}static inline uint8 SATURATE_0B(uint16 val){	if (val & 0xff00) {		gCPU.vscr |= VSCR_SAT;		return 0;	}	return val;}static inline uint16 SATURATE_UH(uint32 val){	if (val & 0xffff0000) {		gCPU.vscr |= VSCR_SAT;		return 0xffff;	}	return val;}static inline uint16 SATURATE_0H(uint32 val){	if (val & 0xffff0000) {		gCPU.vscr |= VSCR_SAT;		return 0;	}	return val;}static inline sint8 SATURATE_SB(sint16 val){	if (val > 127) {			// 0x7F		gCPU.vscr |= VSCR_SAT;		return 127;	} else if (val < -128) {		// 0x80		gCPU.vscr |= VSCR_SAT;		return -128;	}	return val;}static inline uint8 SATURATE_USB(sint16 val){	if (val > 0xff) {		gCPU.vscr |= VSCR_SAT;		return 0xff;	} else if (val < 0) {		gCPU.vscr |= VSCR_SAT;		return 0;	}	return (uint8)val;}static inline sint16 SATURATE_SH(sint32 val){	if (val > 32767) {			// 0x7fff		gCPU.vscr |= VSCR_SAT;		return 32767;	} else if (val < -32768) {		// 0x8000		gCPU.vscr |= VSCR_SAT;		return -32768;	}	return val;}static inline uint16 SATURATE_USH(sint32 val){	if (val > 0xffff) {		gCPU.vscr |= VSCR_SAT;		return 0xffff;	} else if (val < 0) {		gCPU.vscr |= VSCR_SAT;		return 0;	}	return (uint16)val;}static inline sint32 SATURATE_UW(sint64 val){	if (val > 0xffffffffLL) {		gCPU.vscr |= VSCR_SAT;		return 0xffffffffLL;	}	return val;}static inline sint32 SATURATE_SW(sint64 val){	if (val > 2147483647LL) {			// 0x7fffffff		gCPU.vscr |= VSCR_SAT;		return 2147483647LL;	} else if (val < -2147483648LL) {		// 0x80000000		gCPU.vscr |= VSCR_SAT;		return -2147483648LL;	}	return val;}/*	vperm		Vector Permutation *	v.218 */void ppc_opc_vperm(){	VECTOR_DEBUG_COMMON;	int vrD, vrA, vrB, vrC;	int sel;	Vector_t r;	PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);	int i;	for (i=0; i<16; i++) {		sel = gCPU.vr[vrC].b[i];		if (sel & 0x10)			r.b[i] = VECT_B(gCPU.vr[vrB], sel & 0xf);		else			r.b[i] = VECT_B(gCPU.vr[vrA], sel & 0xf);	}	gCPU.vr[vrD] = r;}/*	vsel		Vector Select *	v.238 */void ppc_opc_vsel(){	VECTOR_DEBUG;	int vrD, vrA, vrB, vrC;	uint64 mask, val;	PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC);	mask = gCPU.vr[vrC].d[0];	val = gCPU.vr[vrB].d[0] & mask;	val |= gCPU.vr[vrA].d[0] & ~mask;	gCPU.vr[vrD].d[0] = val;	mask = gCPU.vr[vrC].d[1];	val = gCPU.vr[vrB].d[1] & mask;	val |= gCPU.vr[vrA].d[1] & ~mask;	gCPU.vr[vrD].d[1] = val;}/*	vsrb		Vector Shift Right Byte *	v.256 */void ppc_opc_vsrb(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for ( i=0; i<16; i++) {		gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] >> (gCPU.vr[vrB].b[i] & 0x7);	}}/*	vsrh		Vector Shift Right Half Word *	v.257 */void ppc_opc_vsrh(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<8; i++) {		gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] >> (gCPU.vr[vrB].h[i] & 0xf);	}}/*	vsrw		Vector Shift Right Word *	v.259 */void ppc_opc_vsrw(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<4; i++) {		gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] >> (gCPU.vr[vrB].w[i] & 0x1f);	}}/*	vsrab		Vector Shift Right Arithmetic Byte *	v.253 */void ppc_opc_vsrab(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<16; i++) {		gCPU.vr[vrD].sb[i] = gCPU.vr[vrA].sb[i] >> (gCPU.vr[vrB].b[i] & 0x7);	}}/*	vsrah		Vector Shift Right Arithmetic Half Word *	v.254 */void ppc_opc_vsrah(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<8; i++) {		gCPU.vr[vrD].sh[i] = gCPU.vr[vrA].sh[i] >> (gCPU.vr[vrB].h[i] & 0xf);	}}/*	vsraw		Vector Shift Right Arithmetic Word *	v.255 */void ppc_opc_vsraw(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<4; i++) {		gCPU.vr[vrD].sw[i] = gCPU.vr[vrA].sw[i] >> (gCPU.vr[vrB].w[i] & 0x1f);	}}/*	vslb		Vector Shift Left Byte *	v.240 */void ppc_opc_vslb(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<16; i++) {		gCPU.vr[vrD].b[i] = gCPU.vr[vrA].b[i] << (gCPU.vr[vrB].b[i] & 0x7);	}}/*	vslh		Vector Shift Left Half Word *	v.242 */void ppc_opc_vslh(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<8; i++) {		gCPU.vr[vrD].h[i] = gCPU.vr[vrA].h[i] << (gCPU.vr[vrB].h[i] & 0xf);	}}/*	vslw		Vector Shift Left Word *	v.244 */void ppc_opc_vslw(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<4; i++) {		gCPU.vr[vrD].w[i] = gCPU.vr[vrA].w[i] << (gCPU.vr[vrB].w[i] & 0x1f);	}}/*	vsr		Vector Shift Right *	v.251 */void ppc_opc_vsr(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	Vector_t r;	int shift;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	/* Specs say that the low-order 3 bits of all byte elements in vB	 *   must be the same, or the result is undefined.  So we can just	 *   use the same low-order 3 bits for all of our shifts.	 */	shift = gCPU.vr[vrB].w[0] & 0x7;	r.d[0] = gCPU.vr[vrA].d[0] >> shift;	r.d[1] = gCPU.vr[vrA].d[1] >> shift;	VECT_D(r, 1) |= VECT_D(gCPU.vr[vrA], 0) << (64 - shift);	gCPU.vr[vrD] = r;}/*	vsro		Vector Shift Right Octet *	v.258 */void ppc_opc_vsro(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	Vector_t r;	int shift, i;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;#if HOST_ENDIANESS == HOST_ENDIANESS_LE	for (i=0; i<(16-shift); i++) {		r.b[i] = gCPU.vr[vrA].b[i+shift];	}	for (; i<16; i++) {		r.b[i] = 0;	}#elif HOST_ENDIANESS == HOST_ENDIANESS_BE	for (i=0; i<shift; i++) {		r.b[i] = 0;	}	for (; i<16; i++) {		r.b[i] = gCPU.vr[vrA].b[i-shift];	}#else#error Endianess not supported!#endif	gCPU.vr[vrD] = r;}/*	vsl		Vector Shift Left *	v.239 */void ppc_opc_vsl(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	Vector_t r;	int shift;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	/* Specs say that the low-order 3 bits of all byte elements in vB	 *   must be the same, or the result is undefined.  So we can just	 *   use the same low-order 3 bits for all of our shifts.	 */	shift = gCPU.vr[vrB].w[0] & 0x7;	r.d[0] = gCPU.vr[vrA].d[0] << shift;	r.d[1] = gCPU.vr[vrA].d[1] << shift;	VECT_D(r, 0) |= VECT_D(gCPU.vr[vrA], 1) >> (64 - shift);	gCPU.vr[vrD] = r;}/*	vslo		Vector Shift Left Octet *	v.243 */void ppc_opc_vslo(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	Vector_t r;	int shift, i;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	shift = (gCPU.vr[vrB].w[0] >> 3) & 0xf;#if HOST_ENDIANESS == HOST_ENDIANESS_LE	for (i=0; i<shift; i++) {		r.b[i] = 0;	}	for (; i<16; i++) {		r.b[i] = gCPU.vr[vrA].b[i-shift];	}#elif HOST_ENDIANESS == HOST_ENDIANESS_BE	for (i=0; i<(16-shift); i++) {		r.b[i] = gCPU.vr[vrA].b[i+shift];	}	for (; i<16; i++) {		r.b[i] = 0;	}#else#error Endianess not supported!#endif	gCPU.vr[vrD] = r;}/*	vsldoi		Vector Shift Left Double by Octet Immediate *	v.241 */void ppc_opc_vsldoi(){	VECTOR_DEBUG_COMMON;	int vrD, vrA, vrB, shift, ashift;	int i;	Vector_t r;	PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, shift);	shift &= 0xf;	ashift = 16 - shift;#if HOST_ENDIANESS == HOST_ENDIANESS_LE	for (i=0; i<shift; i++) {		r.b[i] = gCPU.vr[vrB].b[i+ashift];	}	for (; i<16; i++) {		r.b[i] = gCPU.vr[vrA].b[i-shift];	}#elif HOST_ENDIANESS == HOST_ENDIANESS_BE	for (i=0; i<ashift; i++) {		r.b[i] = gCPU.vr[vrA].b[i+shift];	}	for (; i<16; i++) {		r.b[i] = gCPU.vr[vrB].b[i-ashift];	}#else#error Endianess not supported!#endif	gCPU.vr[vrD] = r;}/*	vrlb		Vector Rotate Left Byte *	v.234 */void ppc_opc_vrlb(){	VECTOR_DEBUG;	int vrD, vrA, vrB, shift;	Vector_t r;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<16; i++) {		shift = (gCPU.vr[vrB].b[i] & 0x7);		r.b[i] = gCPU.vr[vrA].b[i] << shift;		r.b[i] |= gCPU.vr[vrA].b[i] >> (8 - shift);	}	gCPU.vr[vrD] = r;}/*	vrlh		Vector Rotate Left Half Word *	v.235 */void ppc_opc_vrlh(){	VECTOR_DEBUG;	int vrD, vrA, vrB, shift;	Vector_t r;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<8; i++) {		shift = (gCPU.vr[vrB].h[i] & 0xf);		r.h[i] = gCPU.vr[vrA].h[i] << shift;		r.h[i] |= gCPU.vr[vrA].h[i] >> (16 - shift);	}	gCPU.vr[vrD] = r;}/*	vrlw		Vector Rotate Left Word *	v.236 */void ppc_opc_vrlw(){	VECTOR_DEBUG;	int vrD, vrA, vrB, shift;	Vector_t r;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	int i;	for (i=0; i<4; i++) {		shift = (gCPU.vr[vrB].w[i] & 0x1F);		r.w[i] = gCPU.vr[vrA].w[i] << shift;		r.w[i] |= gCPU.vr[vrA].w[i] >> (32 - shift);	}	gCPU.vr[vrD] = r;}/* With the merges, I just don't see any point in risking that a compiler *   might generate actual alu code to calculate anything when it's *   compile-time known.  Plus, it's easier to validate it like this. *//*	vmrghb		Vector Merge High Byte *	v.195 */void ppc_opc_vmrghb(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	Vector_t r;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	VECT_B(r, 0) = VECT_B(gCPU.vr[vrA], 0);	VECT_B(r, 1) = VECT_B(gCPU.vr[vrB], 0);	VECT_B(r, 2) = VECT_B(gCPU.vr[vrA], 1);	VECT_B(r, 3) = VECT_B(gCPU.vr[vrB], 1);	VECT_B(r, 4) = VECT_B(gCPU.vr[vrA], 2);	VECT_B(r, 5) = VECT_B(gCPU.vr[vrB], 2);	VECT_B(r, 6) = VECT_B(gCPU.vr[vrA], 3);	VECT_B(r, 7) = VECT_B(gCPU.vr[vrB], 3);	VECT_B(r, 8) = VECT_B(gCPU.vr[vrA], 4);	VECT_B(r, 9) = VECT_B(gCPU.vr[vrB], 4);	VECT_B(r,10) = VECT_B(gCPU.vr[vrA], 5);	VECT_B(r,11) = VECT_B(gCPU.vr[vrB], 5);	VECT_B(r,12) = VECT_B(gCPU.vr[vrA], 6);	VECT_B(r,13) = VECT_B(gCPU.vr[vrB], 6);	VECT_B(r,14) = VECT_B(gCPU.vr[vrA], 7);	VECT_B(r,15) = VECT_B(gCPU.vr[vrB], 7);	gCPU.vr[vrD] = r;}/*	vmrghh		Vector Merge High Half Word *	v.196 */void ppc_opc_vmrghh(){	VECTOR_DEBUG;	int vrD, vrA, vrB;	Vector_t r;	PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB);	VECT_H(r, 0) = VECT_H(gCPU.vr[vrA], 0);	VECT_H(r, 1) = VECT_H(gCPU.vr[vrB], 0);	VECT_H(r, 2) = VECT_H(gCPU.vr[vrA], 1);	VECT_H(r, 3) = VECT_H(gCPU.vr[vrB], 1);	VECT_H(r, 4) = VECT_H(gCPU.vr[vrA], 2);	VECT_H(r, 5) = VECT_H(gCPU.vr[vrB], 2);	VECT_H(r, 6) = VECT_H(gCPU.vr[vrA], 3);	VECT_H(r, 7) = VECT_H(gCPU.vr[vrB], 3);	gCPU.vr[vrD] = r;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -