📄 ppc_vec.c
字号:
uint32 prod; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for ( i=0; i<8; i++) { prod = (uint32)gCPU.vr[vrA].h[i] * (uint32)gCPU.vr[vrB].h[i]; prod = prod + (uint32)gCPU.vr[vrC].h[i]; gCPU.vr[vrD].h[i] = prod; }}/* vmhraddshs Vector Multiply High Round and Add Signed Half Word Saturate * v.186 */void ppc_opc_vmhraddshs(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; sint32 prod; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<8; i++) { prod = (sint32)gCPU.vr[vrA].sh[i] * (sint32)gCPU.vr[vrB].sh[i]; prod += 0x4000; prod = (prod >> 15) + (sint32)gCPU.vr[vrC].sh[i]; gCPU.vr[vrD].sh[i] = SATURATE_SH(prod); }}/* vmsumubm Vector Multiply Sum Unsigned Byte Modulo * v.204 */void ppc_opc_vmsumubm(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; uint32 temp; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<4; i++) { temp = gCPU.vr[vrC].w[i]; temp += (uint16)gCPU.vr[vrA].b[i<<2] * (uint16)gCPU.vr[vrB].b[i<<2]; temp += (uint16)gCPU.vr[vrA].b[(i<<2)+1] * (uint16)gCPU.vr[vrB].b[(i<<2)+1]; temp += (uint16)gCPU.vr[vrA].b[(i<<2)+2] * (uint16)gCPU.vr[vrB].b[(i<<2)+2]; temp += (uint16)gCPU.vr[vrA].b[(i<<2)+3] * (uint16)gCPU.vr[vrB].b[(i<<2)+3]; gCPU.vr[vrD].w[i] = temp; }}/* vmsumuhm Vector Multiply Sum Unsigned Half Word Modulo * v.205 */void ppc_opc_vmsumuhm(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; uint32 temp; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<4; i++) { temp = gCPU.vr[vrC].w[i]; temp += (uint32)gCPU.vr[vrA].h[i<<1] * (uint32)gCPU.vr[vrB].h[i<<1]; temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] * (uint32)gCPU.vr[vrB].h[(i<<1)+1]; gCPU.vr[vrD].w[i] = temp; }}/* vmsummbm Vector Multiply Sum Mixed-Sign Byte Modulo * v.201 */void ppc_opc_vmsummbm(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; sint32 temp; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<4; i++) { temp = gCPU.vr[vrC].sw[i]; temp += (sint16)gCPU.vr[vrA].sb[i<<2] * (uint16)gCPU.vr[vrB].b[i<<2]; temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+1] * (uint16)gCPU.vr[vrB].b[(i<<2)+1]; temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+2] * (uint16)gCPU.vr[vrB].b[(i<<2)+2]; temp += (sint16)gCPU.vr[vrA].sb[(i<<2)+3] * (uint16)gCPU.vr[vrB].b[(i<<2)+3]; gCPU.vr[vrD].sw[i] = temp; }}/* vmsumshm Vector Multiply Sum Signed Half Word Modulo * v.202 */void ppc_opc_vmsumshm(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; sint32 temp; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<4; i++) { temp = gCPU.vr[vrC].sw[i]; temp += (sint32)gCPU.vr[vrA].sh[i<<1] * (sint32)gCPU.vr[vrB].sh[i<<1]; temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] * (sint32)gCPU.vr[vrB].sh[(i<<1)+1]; gCPU.vr[vrD].sw[i] = temp; }}/* vmsumuhs Vector Multiply Sum Unsigned Half Word Saturate * v.206 */void ppc_opc_vmsumuhs(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; uint64 temp; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); /* For this, there's no way to get around 64-bit math. If we use * the hacks used before, then we have to do it so often, that * we'll outpace the 64-bit math in execution time. */ int i; for (i=0; i<4; i++) { temp = gCPU.vr[vrC].w[i]; temp += (uint32)gCPU.vr[vrA].h[i<<1] * (uint32)gCPU.vr[vrB].h[i<<1]; temp += (uint32)gCPU.vr[vrA].h[(i<<1)+1] * (uint32)gCPU.vr[vrB].h[(i<<1)+1]; gCPU.vr[vrD].w[i] = SATURATE_UW(temp); }}/* vmsumshs Vector Multiply Sum Signed Half Word Saturate * v.203 */void ppc_opc_vmsumshs(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; sint64 temp; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); /* For this, there's no way to get around 64-bit math. If we use * the hacks used before, then we have to do it so often, that * we'll outpace the 64-bit math in execution time. */ int i; for (i=0; i<4; i++) { temp = gCPU.vr[vrC].sw[i]; temp += (sint32)gCPU.vr[vrA].sh[i<<1] * (sint32)gCPU.vr[vrB].sh[i<<1]; temp += (sint32)gCPU.vr[vrA].sh[(i<<1)+1] * (sint32)gCPU.vr[vrB].sh[(i<<1)+1]; gCPU.vr[vrD].sw[i] = SATURATE_SW(temp); }}/* vsum4ubs Vector Sum Across Partial (1/4) Unsigned Byte Saturate * v.275 */void ppc_opc_vsum4ubs(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); /* For this, there's no way to get around 64-bit math. If we use * the hacks used before, then we have to do it so often, that * we'll outpace the 64-bit math in execution time. */ int i; for (i=0; i<4; i++) { res = (uint64)gCPU.vr[vrB].w[i]; res += (uint64)gCPU.vr[vrA].b[(i<<2)]; res += (uint64)gCPU.vr[vrA].b[(i<<2)+1]; res += (uint64)gCPU.vr[vrA].b[(i<<2)+2]; res += (uint64)gCPU.vr[vrA].b[(i<<2)+3]; gCPU.vr[vrD].w[i] = SATURATE_UW(res); }}/* vsum4sbs Vector Sum Across Partial (1/4) Signed Byte Saturate * v.273 */void ppc_opc_vsum4sbs(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { res = (sint64)gCPU.vr[vrB].sw[i]; res += (sint64)gCPU.vr[vrA].sb[(i<<2)]; res += (sint64)gCPU.vr[vrA].sb[(i<<2)+1]; res += (sint64)gCPU.vr[vrA].sb[(i<<2)+2]; res += (sint64)gCPU.vr[vrA].sb[(i<<2)+3]; gCPU.vr[vrD].sw[i] = SATURATE_SW(res); }}/* vsum4shs Vector Sum Across Partial (1/4) Signed Half Word Saturate * v.274 */void ppc_opc_vsum4shs(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { res = (sint64)gCPU.vr[vrB].sw[i]; res += (sint64)gCPU.vr[vrA].sh[(i<<1)]; res += (sint64)gCPU.vr[vrA].sh[(i<<1)+1]; gCPU.vr[vrD].sw[i] = SATURATE_SW(res); }}/* vsum2sws Vector Sum Across Partial (1/2) Signed Word Saturate * v.272 */void ppc_opc_vsum2sws(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1]; res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(0)]; gCPU.vr[vrD].w[VECT_ODD(0)] = SATURATE_SW(res); gCPU.vr[vrD].w[VECT_EVEN(0)] = 0; res = (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3]; res += (sint64)gCPU.vr[vrB].sw[VECT_ODD(1)]; gCPU.vr[vrD].w[VECT_ODD(1)] = SATURATE_SW(res); gCPU.vr[vrD].w[VECT_EVEN(1)] = 0;}/* vsumsws Vector Sum Across Signed Word Saturate * v.271 */void ppc_opc_vsumsws(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); res = (sint64)gCPU.vr[vrA].sw[0] + (sint64)gCPU.vr[vrA].sw[1]; res += (sint64)gCPU.vr[vrA].sw[2] + (sint64)gCPU.vr[vrA].sw[3]; res += (sint64)VECT_W(gCPU.vr[vrB], 3); VECT_W(gCPU.vr[vrD], 3) = SATURATE_SW(res); VECT_W(gCPU.vr[vrD], 2) = 0; VECT_W(gCPU.vr[vrD], 1) = 0; VECT_W(gCPU.vr[vrD], 0) = 0;}/* vnmsubfp Vector Negative Multiply-Subtract Floating Point * v.215 */void ppc_opc_vnmsubfp(){ VECTOR_DEBUG; int vrD, vrA, vrB, vrC; double res; PPC_OPC_TEMPL_A(gCPU.current_opc, vrD, vrA, vrB, vrC); int i; for (i=0; i<4; i++) { //FIXME: This might not comply with Java FP res = (double)gCPU.vr[vrA].f[i] * (double)gCPU.vr[vrC].f[i]; res = (double)gCPU.vr[vrB].f[i] - res; gCPU.vr[vrD].f[i] = (float)res; }}/* vavgub Vector Average Unsigned Byte * v.152 */void ppc_opc_vavgub(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint16 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { res = (uint16)gCPU.vr[vrA].b[i] + (uint16)gCPU.vr[vrB].b[i] + 1; gCPU.vr[vrD].b[i] = (res >> 1); }}/* vavguh Vector Average Unsigned Half Word * v.153 */void ppc_opc_vavguh(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint32 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { res = (uint32)gCPU.vr[vrA].h[i] + (uint32)gCPU.vr[vrB].h[i] + 1; gCPU.vr[vrD].h[i] = (res >> 1); }}/* vavguw Vector Average Unsigned Word * v.154 */void ppc_opc_vavguw(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { res = (uint64)gCPU.vr[vrA].w[i] + (uint64)gCPU.vr[vrB].w[i] + 1; gCPU.vr[vrD].w[i] = (res >> 1); }}/* vavgsb Vector Average Signed Byte * v.149 */void ppc_opc_vavgsb(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint16 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { res = (sint16)gCPU.vr[vrA].sb[i] + (sint16)gCPU.vr[vrB].sb[i] + 1; gCPU.vr[vrD].sb[i] = (res >> 1); }}/* vavgsh Vector Average Signed Half Word * v.150 */void ppc_opc_vavgsh(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint32 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { res = (sint32)gCPU.vr[vrA].sh[i] + (sint32)gCPU.vr[vrB].sh[i] + 1; gCPU.vr[vrD].sh[i] = (res >> 1); }}/* vavgsw Vector Average Signed Word * v.151 */void ppc_opc_vavgsw(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint64 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { res = (sint64)gCPU.vr[vrA].sw[i] + (sint64)gCPU.vr[vrB].sw[i] + 1; gCPU.vr[vrD].sw[i] = (res >> 1); }}/* vmaxub Vector Maximum Unsigned Byte * v.182 */void ppc_opc_vmaxub(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint8 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { res = gCPU.vr[vrA].b[i]; if (res < gCPU.vr[vrB].b[i]) res = gCPU.vr[vrB].b[i]; gCPU.vr[vrD].b[i] = res; }}/* vmaxuh Vector Maximum Unsigned Half Word * v.183 */void ppc_opc_vmaxuh(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint16 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { res = gCPU.vr[vrA].h[i]; if (res < gCPU.vr[vrB].h[i]) res = gCPU.vr[vrB].h[i]; gCPU.vr[vrD].h[i] = res; }}/* vmaxuw Vector Maximum Unsigned Word * v.184 */void ppc_opc_vmaxuw(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint32 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { res = gCPU.vr[vrA].w[i]; if (res < gCPU.vr[vrB].w[i]) res = gCPU.vr[vrB].w[i]; gCPU.vr[vrD].w[i] = res; }}/* vmaxsb Vector Maximum Signed Byte * v.179 */void ppc_opc_vmaxsb(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint8 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { res = gCPU.vr[vrA].sb[i]; if (res < gCPU.vr[vrB].sb[i]) res = gCPU.vr[vrB].sb[i]; gCPU.vr[vrD].sb[i] = res; }}/* vmaxsh Vector Maximum Signed Half Word * v.180 */void ppc_opc_vmaxsh(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint16 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { res = gCPU.vr[vrA].sh[i]; if (res < gCPU.vr[vrB].sh[i]) res = gCPU.vr[vrB].sh[i]; gCPU.vr[vrD].sh[i] = res; }}/* vmaxsw Vector Maximum Signed Word * v.181 */void ppc_opc_vmaxsw(){ VECTOR_DEBUG; int vrD, vrA, vrB; sint32 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { res = gCPU.vr[vrA].sw[i]; if (res < gCPU.vr[vrB].sw[i]) res = gCPU.vr[vrB].sw[i]; gCPU.vr[vrD].sw[i] = res; }}/* vmaxfp Vector Maximum Floating Point * v.178 */void ppc_opc_vmaxfp(){ VECTOR_DEBUG; int vrD, vrA, vrB; float res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<4; i++) { //FIXME: This might not comply with Java FP res = gCPU.vr[vrA].f[i]; if (res < gCPU.vr[vrB].f[i]) res = gCPU.vr[vrB].f[i]; gCPU.vr[vrD].f[i] = res; }}/* vminub Vector Minimum Unsigned Byte * v.191 */void ppc_opc_vminub(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint8 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<16; i++) { res = gCPU.vr[vrA].b[i]; if (res > gCPU.vr[vrB].b[i]) res = gCPU.vr[vrB].b[i]; gCPU.vr[vrD].b[i] = res; }}/* vminuh Vector Minimum Unsigned Half Word * v.192 */void ppc_opc_vminuh(){ VECTOR_DEBUG; int vrD, vrA, vrB; uint16 res; PPC_OPC_TEMPL_X(gCPU.current_opc, vrD, vrA, vrB); int i; for (i=0; i<8; i++) { res = gCPU.vr[vrA].h[i]; if (res > gCPU.vr[vrB].h[i]) res = gCPU.vr[vrB].h[i]; gCPU.vr[vrD].h[i] = res; }}/* vminuw Vector Minimum Unsigned Word * v.193 */void ppc_opc_vminuw(){ VECTOR_DEBUG; int vrD, vrA, vrB;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -