⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 radeon_program_alu.c

📁 Mesa is an open-source implementation of the OpenGL specification - a system for rendering interacti
💻 C
📖 第 1 页 / 共 2 页
字号:
		inst->SrcReg[1], negate(inst->SrcReg[2]));	emit3(t->Program, OPCODE_MAD, inst->SaturateMode,		inst->DstReg,		inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);}static void transform_POW(struct radeon_transform_context* t,	struct prog_instruction* inst){	int tempreg = radeonFindFreeTemporary(t);	struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);	struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);	tempdst.WriteMask = WRITEMASK_W;	tempsrc.Swizzle = SWIZZLE_WWWW;	emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));	emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));	emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);}static void transform_RSQ(struct radeon_transform_context* t,	struct prog_instruction* inst){	emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));}static void transform_SGE(struct radeon_transform_context* t,	struct prog_instruction* inst){	int tempreg = radeonFindFreeTemporary(t);	emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));	emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,		srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);}static void transform_SLT(struct radeon_transform_context* t,	struct prog_instruction* inst){	int tempreg = radeonFindFreeTemporary(t);	emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));	emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,		srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);}static void transform_SUB(struct radeon_transform_context* t,	struct prog_instruction* inst){	emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));}static void transform_SWZ(struct radeon_transform_context* t,	struct prog_instruction* inst){	emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);}static void transform_XPD(struct radeon_transform_context* t,	struct prog_instruction* inst){	int tempreg = radeonFindFreeTemporary(t);	emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),		swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),		swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));	emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,		swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),		swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),		negate(srcreg(PROGRAM_TEMPORARY, tempreg)));}/** * Can be used as a transformation for @ref radeonClauseLocalTransform, * no userData necessary. * * Eliminates the following ALU instructions: *  ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD * using: *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP * * Transforms RSQ to Radeon's native RSQ by explicitly setting * absolute value. * * @note should be applicable to R300 and R500 fragment programs. */GLboolean radeonTransformALU(struct radeon_transform_context* t,	struct prog_instruction* inst,	void* unused){	switch(inst->Opcode) {	case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;	case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;	case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;	case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;	case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;	case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;	case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;	case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;	case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;	case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;	case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;	case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;	case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;	default:		return GL_FALSE;	}}static void sincos_constants(struct radeon_transform_context* t, GLuint *constants){	static const GLfloat SinCosConsts[2][4] = {		{			1.273239545,		// 4/PI			-0.405284735,		// -4/(PI*PI)			3.141592654,		// PI			0.2225			// weight		},		{			0.75,			0.5,			0.159154943,		// 1/(2*PI)			6.283185307		// 2*PI		}	};	int i;	for(i = 0; i < 2; ++i) {		GLuint swz;		constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);		ASSERT(swz == SWIZZLE_NOOP);	}}/** * Approximate sin(x), where x is clamped to (-pi/2, pi/2). * * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } * MAD tmp.x, tmp.y, |src|, tmp.x * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x * MAD dest, tmp.y, weight, tmp.x */static void sin_approx(struct radeon_transform_context* t,	struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants){	GLuint tempreg = radeonFindFreeTemporary(t);	emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),		swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),		srcreg(PROGRAM_CONSTANT, constants[0]));	emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),		swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),		absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),		swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));	emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),		swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),		absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),		negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));	emit3(t->Program, OPCODE_MAD, 0, dst,		swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),		swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),		swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));}/** * Translate the trigonometric functions COS, SIN, and SCS * using only the basic instructions *  MOV, ADD, MUL, MAD, FRC */GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,	struct prog_instruction* inst,	void* unused){	if (inst->Opcode != OPCODE_COS &&	    inst->Opcode != OPCODE_SIN &&	    inst->Opcode != OPCODE_SCS)		return GL_FALSE;	GLuint constants[2];	GLuint tempreg = radeonFindFreeTemporary(t);	sincos_constants(t, constants);	if (inst->Opcode == OPCODE_COS) {		// MAD tmp.x, src, 1/(2*PI), 0.75		// FRC tmp.x, tmp.x		// MAD tmp.z, tmp.x, 2*PI, -PI		emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),			swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));		emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));		emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));		sin_approx(t, inst->DstReg,			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			constants);	} else if (inst->Opcode == OPCODE_SIN) {		emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),			swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));		emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));		emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));		sin_approx(t, inst->DstReg,			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			constants);	} else {		emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),			swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));		emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),			srcreg(PROGRAM_TEMPORARY, tempreg));		emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),			srcreg(PROGRAM_TEMPORARY, tempreg),			swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),			negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));		struct prog_dst_register dst = inst->DstReg;		dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;		sin_approx(t, dst,			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),			constants);		dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;		sin_approx(t, dst,			swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),			constants);	}	return GL_TRUE;}/** * Transform the trigonometric functions COS, SIN, and SCS * to include pre-scaling by 1/(2*PI) and taking the fractional * part, so that the input to COS and SIN is always in the range [0,1). * SCS is replaced by one COS and one SIN instruction. * * @warning This transformation implicitly changes the semantics of SIN and COS! */GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,	struct prog_instruction* inst,	void* unused){	if (inst->Opcode != OPCODE_COS &&	    inst->Opcode != OPCODE_SIN &&	    inst->Opcode != OPCODE_SCS)		return GL_FALSE;	static const GLfloat RCP_2PI[] = { 0.15915494309189535 };	GLuint temp;	GLuint constant;	GLuint constant_swizzle;	temp = radeonFindFreeTemporary(t);	constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);	emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),		swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),		srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));	emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),		srcreg(PROGRAM_TEMPORARY, temp));	if (inst->Opcode == OPCODE_COS) {		emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,			srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));	} else if (inst->Opcode == OPCODE_SIN) {		emit1(t->Program, OPCODE_SIN, inst->SaturateMode,			inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));	} else if (inst->Opcode == OPCODE_SCS) {		struct prog_dst_register moddst = inst->DstReg;		if (inst->DstReg.WriteMask & WRITEMASK_X) {			moddst.WriteMask = WRITEMASK_X;			emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,				srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));		}		if (inst->DstReg.WriteMask & WRITEMASK_Y) {			moddst.WriteMask = WRITEMASK_Y;			emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,				srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));		}	}	return GL_TRUE;}/** * Rewrite DDX/DDY instructions to properly work with r5xx shaders. * The r5xx MDH/MDV instruction provides per-quad partial derivatives. * It takes the form A*B+C. A and C are set by setting src0. B should be -1. * * @warning This explicitly changes the form of DDX and DDY! */GLboolean radeonTransformDeriv(struct radeon_transform_context* t,	struct prog_instruction* inst,	void* unused){	if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)		return GL_FALSE;	struct prog_src_register B = inst->SrcReg[1];	B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,						SWIZZLE_ONE, SWIZZLE_ONE);	B.NegateBase = NEGATE_XYZW;	emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,		inst->SrcReg[0], B);	return GL_TRUE;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -