📄 radeon_program_alu.c
字号:
inst->SrcReg[1], negate(inst->SrcReg[2])); emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);}static void transform_POW(struct radeon_transform_context* t, struct prog_instruction* inst){ int tempreg = radeonFindFreeTemporary(t); struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); tempdst.WriteMask = WRITEMASK_W; tempsrc.Swizzle = SWIZZLE_WWWW; emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0])); emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1])); emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);}static void transform_RSQ(struct radeon_transform_context* t, struct prog_instruction* inst){ emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));}static void transform_SGE(struct radeon_transform_context* t, struct prog_instruction* inst){ int tempreg = radeonFindFreeTemporary(t); emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);}static void transform_SLT(struct radeon_transform_context* t, struct prog_instruction* inst){ int tempreg = radeonFindFreeTemporary(t); emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);}static void transform_SUB(struct radeon_transform_context* t, struct prog_instruction* inst){ emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));}static void transform_SWZ(struct radeon_transform_context* t, struct prog_instruction* inst){ emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);}static void transform_XPD(struct radeon_transform_context* t, struct prog_instruction* inst){ int tempreg = radeonFindFreeTemporary(t); emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg), swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg, swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), negate(srcreg(PROGRAM_TEMPORARY, tempreg)));}/** * Can be used as a transformation for @ref radeonClauseLocalTransform, * no userData necessary. * * Eliminates the following ALU instructions: * ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD * using: * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * * Transforms RSQ to Radeon's native RSQ by explicitly setting * absolute value. * * @note should be applicable to R300 and R500 fragment programs. */GLboolean radeonTransformALU(struct radeon_transform_context* t, struct prog_instruction* inst, void* unused){ switch(inst->Opcode) { case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE; case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE; case OPCODE_DST: transform_DST(t, inst); return GL_TRUE; case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE; case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE; case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE; case OPCODE_POW: transform_POW(t, inst); return GL_TRUE; case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE; case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE; case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE; case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE; case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE; case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE; default: return GL_FALSE; }}static void sincos_constants(struct radeon_transform_context* t, GLuint *constants){ static const GLfloat SinCosConsts[2][4] = { { 1.273239545, // 4/PI -0.405284735, // -4/(PI*PI) 3.141592654, // PI 0.2225 // weight }, { 0.75, 0.5, 0.159154943, // 1/(2*PI) 6.283185307 // 2*PI } }; int i; for(i = 0; i < 2; ++i) { GLuint swz; constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz); ASSERT(swz == SWIZZLE_NOOP); }}/** * Approximate sin(x), where x is clamped to (-pi/2, pi/2). * * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } * MAD tmp.x, tmp.y, |src|, tmp.x * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x * MAD dest, tmp.y, weight, tmp.x */static void sin_approx(struct radeon_transform_context* t, struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants){ GLuint tempreg = radeonFindFreeTemporary(t); emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY), swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcreg(PROGRAM_CONSTANT, constants[0])); emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)), negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X))); emit3(t->Program, OPCODE_MAD, 0, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));}/** * Translate the trigonometric functions COS, SIN, and SCS * using only the basic instructions * MOV, ADD, MUL, MAD, FRC */GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t, struct prog_instruction* inst, void* unused){ if (inst->Opcode != OPCODE_COS && inst->Opcode != OPCODE_SIN && inst->Opcode != OPCODE_SCS) return GL_FALSE; GLuint constants[2]; GLuint tempreg = radeonFindFreeTemporary(t); sincos_constants(t, constants); if (inst->Opcode == OPCODE_COS) { // MAD tmp.x, src, 1/(2*PI), 0.75 // FRC tmp.x, tmp.x // MAD tmp.z, tmp.x, 2*PI, -PI emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)); emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); sin_approx(t, inst->DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); } else if (inst->Opcode == OPCODE_SIN) { emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y)); emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W)); emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W), swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); sin_approx(t, inst->DstReg, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), constants); } else { emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)); emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg)); emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY), srcreg(PROGRAM_TEMPORARY, tempreg), swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W), negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z))); struct prog_dst_register dst = inst->DstReg; dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X; sin_approx(t, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), constants); dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y; sin_approx(t, dst, swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), constants); } return GL_TRUE;}/** * Transform the trigonometric functions COS, SIN, and SCS * to include pre-scaling by 1/(2*PI) and taking the fractional * part, so that the input to COS and SIN is always in the range [0,1). * SCS is replaced by one COS and one SIN instruction. * * @warning This transformation implicitly changes the semantics of SIN and COS! */GLboolean radeonTransformTrigScale(struct radeon_transform_context* t, struct prog_instruction* inst, void* unused){ if (inst->Opcode != OPCODE_COS && inst->Opcode != OPCODE_SIN && inst->Opcode != OPCODE_SCS) return GL_FALSE; static const GLfloat RCP_2PI[] = { 0.15915494309189535 }; GLuint temp; GLuint constant; GLuint constant_swizzle; temp = radeonFindFreeTemporary(t); constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle); emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W), swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)); emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W), srcreg(PROGRAM_TEMPORARY, temp)); if (inst->Opcode == OPCODE_COS) { emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } else if (inst->Opcode == OPCODE_SIN) { emit1(t->Program, OPCODE_SIN, inst->SaturateMode, inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } else if (inst->Opcode == OPCODE_SCS) { struct prog_dst_register moddst = inst->DstReg; if (inst->DstReg.WriteMask & WRITEMASK_X) { moddst.WriteMask = WRITEMASK_X; emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } if (inst->DstReg.WriteMask & WRITEMASK_Y) { moddst.WriteMask = WRITEMASK_Y; emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW)); } } return GL_TRUE;}/** * Rewrite DDX/DDY instructions to properly work with r5xx shaders. * The r5xx MDH/MDV instruction provides per-quad partial derivatives. * It takes the form A*B+C. A and C are set by setting src0. B should be -1. * * @warning This explicitly changes the form of DDX and DDY! */GLboolean radeonTransformDeriv(struct radeon_transform_context* t, struct prog_instruction* inst, void* unused){ if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY) return GL_FALSE; struct prog_src_register B = inst->SrcReg[1]; B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE); B.NegateBase = NEGATE_XYZW; emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], B); return GL_TRUE;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -