⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 radeon_program_pair.c

📁 Mesa is an open-source implementation of the OpenGL specification - a system for rendering interacti
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (C) 2008 Nicolai Haehnle. * * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * *//** * @file * * Perform temporary register allocation and attempt to pair off instructions * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction * vs. ALU instruction scheduling. */#include "radeon_program_pair.h"#include "radeon_context.h"#include "shader/prog_print.h"#define error(fmt, args...) do { \	_mesa_problem(s->Ctx, "%s::%s(): " fmt "\n",	\		__FILE__, __FUNCTION__, ##args);	\	s->Error = GL_TRUE;				\} while(0)struct pair_state_instruction {	GLuint IsTex:1; /**< Is a texture instruction */	GLuint NeedRGB:1; /**< Needs the RGB ALU */	GLuint NeedAlpha:1; /**< Needs the Alpha ALU */	GLuint IsTranscendent:1; /**< Is a special transcendent instruction */	/**	 * Number of (read and write) dependencies that must be resolved before	 * this instruction can be scheduled.	 */	GLuint NumDependencies:5;	/**	 * Next instruction in the linked list of ready instructions.	 */	struct pair_state_instruction *NextReady;	/**	 * Values that this instruction writes	 */	struct reg_value *Values[4];};/** * Used to keep track of which instructions read a value. */struct reg_value_reader {	GLuint IP; /**< IP of the instruction that performs this access */	struct reg_value_reader *Next;};/** * Used to keep track which values are stored in each component of a * PROGRAM_TEMPORARY. */struct reg_value {	GLuint IP; /**< IP of the instruction that writes this value */	struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */	/**	 * Unordered linked list of instructions that read from this value.	 */	struct reg_value_reader *Readers;	/**	 * Number of readers of this value. This is calculated during @ref scan_instructions	 * and continually decremented during code emission.	 * When this count reaches zero, the instruction that writes the @ref Next value	 * can be scheduled.	 */	GLuint NumReaders;};/** * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register * to the proper hardware temporary. */struct pair_register_translation {	GLuint Allocated:1;	GLuint HwIndex:8;	GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */	/**	 * Notes the value that is currently contained in each component	 * (only used for PROGRAM_TEMPORARY registers).	 */	struct reg_value *Value[4];};struct pair_state {	GLcontext *Ctx;	struct gl_program *Program;	const struct radeon_pair_handler *Handler;	GLboolean Error;	GLboolean Debug;	GLboolean Verbose;	void *UserData;	/**	 * Translate Mesa registers to hardware registers	 */	struct pair_register_translation Inputs[FRAG_ATTRIB_MAX];	struct pair_register_translation Temps[MAX_PROGRAM_TEMPS];	/**	 * Derived information about program instructions.	 */	struct pair_state_instruction *Instructions;	struct {		GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */	} HwTemps[128];	/**	 * Linked list of instructions that can be scheduled right now,	 * based on which ALU/TEX resources they require.	 */	struct pair_state_instruction *ReadyFullALU;	struct pair_state_instruction *ReadyRGB;	struct pair_state_instruction *ReadyAlpha;	struct pair_state_instruction *ReadyTEX;	/**	 * Pool of @ref reg_value structures for fast allocation.	 */	struct reg_value *ValuePool;	GLuint ValuePoolUsed;	struct reg_value_reader *ReaderPool;	GLuint ReaderPoolUsed;};static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index){	switch(file) {	case PROGRAM_TEMPORARY: return &s->Temps[index];	case PROGRAM_INPUT: return &s->Inputs[index];	default: return 0;	}}static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex){	struct pair_register_translation *t = get_register(s, file, index);	ASSERT(!s->HwTemps[hwindex].RefCount);	ASSERT(!t->Allocated);	s->HwTemps[hwindex].RefCount = t->RefCount;	t->Allocated = 1;	t->HwIndex = hwindex;}static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index){	GLuint hwindex;	struct pair_register_translation *t = get_register(s, file, index);	if (!t) {		_mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index);		return 0;	}	if (t->Allocated)		return t->HwIndex;	for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex)		if (!s->HwTemps[hwindex].RefCount)			break;	if (hwindex >= s->Handler->MaxHwTemps) {		error("Ran out of hardware temporaries");		return 0;	}	alloc_hw_reg(s, file, index, hwindex);	return hwindex;}static void deref_hw_reg(struct pair_state *s, GLuint hwindex){	if (!s->HwTemps[hwindex].RefCount) {		error("Hwindex %i refcount error", hwindex);		return;	}	s->HwTemps[hwindex].RefCount--;}static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst){	pairinst->NextReady = *list;	*list = pairinst;}/** * The instruction at the given IP has become ready. Link it into the ready * instructions. */static void instruction_ready(struct pair_state *s, int ip){	struct pair_state_instruction *pairinst = s->Instructions + ip;	if (s->Verbose)		_mesa_printf("instruction_ready(%i)\n", ip);	if (pairinst->IsTex)		add_pairinst_to_list(&s->ReadyTEX, pairinst);	else if (!pairinst->NeedAlpha)		add_pairinst_to_list(&s->ReadyRGB, pairinst);	else if (!pairinst->NeedRGB)		add_pairinst_to_list(&s->ReadyAlpha, pairinst);	else		add_pairinst_to_list(&s->ReadyFullALU, pairinst);}/** * Finally rewrite ADD, MOV, MUL as the appropriate native instruction * and reverse the order of arguments for CMP. */static void final_rewrite(struct pair_state *s, struct prog_instruction *inst){	struct prog_src_register tmp;	switch(inst->Opcode) {	case OPCODE_ADD:		inst->SrcReg[2] = inst->SrcReg[1];		inst->SrcReg[1].File = PROGRAM_BUILTIN;		inst->SrcReg[1].Swizzle = SWIZZLE_1111;		inst->SrcReg[1].NegateBase = 0;		inst->SrcReg[1].NegateAbs = 0;		inst->Opcode = OPCODE_MAD;		break;	case OPCODE_CMP:		tmp = inst->SrcReg[2];		inst->SrcReg[2] = inst->SrcReg[0];		inst->SrcReg[0] = tmp;		break;	case OPCODE_MOV:		/* AMD say we should use CMP.		 * However, when we transform		 *  KIL -r0;		 * into		 *  CMP tmp, -r0, -r0, 0;		 *  KIL tmp;		 * we get incorrect behaviour on R500 when r0 == 0.0.		 * It appears that the R500 KIL hardware treats -0.0 as less		 * than zero.		 */		inst->SrcReg[1].File = PROGRAM_BUILTIN;		inst->SrcReg[1].Swizzle = SWIZZLE_1111;		inst->SrcReg[2].File = PROGRAM_BUILTIN;		inst->SrcReg[2].Swizzle = SWIZZLE_0000;		inst->Opcode = OPCODE_MAD;		break;	case OPCODE_MUL:		inst->SrcReg[2].File = PROGRAM_BUILTIN;		inst->SrcReg[2].Swizzle = SWIZZLE_0000;		inst->Opcode = OPCODE_MAD;		break;	default:		/* nothing to do */		break;	}}/** * Classify an instruction according to which ALUs etc. it needs */static void classify_instruction(struct pair_state *s,	struct prog_instruction *inst, struct pair_state_instruction *pairinst){	pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0;	pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0;	switch(inst->Opcode) {	case OPCODE_ADD:	case OPCODE_CMP:	case OPCODE_DDX:	case OPCODE_DDY:	case OPCODE_FRC:	case OPCODE_MAD:	case OPCODE_MAX:	case OPCODE_MIN:	case OPCODE_MOV:	case OPCODE_MUL:		break;	case OPCODE_COS:	case OPCODE_EX2:	case OPCODE_LG2:	case OPCODE_RCP:	case OPCODE_RSQ:	case OPCODE_SIN:		pairinst->IsTranscendent = 1;		pairinst->NeedAlpha = 1;		break;	case OPCODE_DP4:		pairinst->NeedAlpha = 1;		/* fall through */	case OPCODE_DP3:		pairinst->NeedRGB = 1;		break;	case OPCODE_KIL:	case OPCODE_TEX:	case OPCODE_TXB:	case OPCODE_TXP:	case OPCODE_END:		pairinst->IsTex = 1;		break;	default:		error("Unknown opcode %d\n", inst->Opcode);		break;	}}/** * Count which (input, temporary) register is read and written how often, * and scan the instruction stream to find dependencies. */static void scan_instructions(struct pair_state *s){	struct prog_instruction *inst;	struct pair_state_instruction *pairinst;	GLuint ip;	for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0;	    inst->Opcode != OPCODE_END;	    ++inst, ++pairinst, ++ip) {		final_rewrite(s, inst);		classify_instruction(s, inst, pairinst);		int nsrc = _mesa_num_inst_src_regs(inst->Opcode);		int j;		for(j = 0; j < nsrc; j++) {			struct pair_register_translation *t =				get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index);			if (!t)				continue;			t->RefCount++;			if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {				int i;				for(i = 0; i < 4; ++i) {					GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i);					if (swz >= 4)						continue; /* constant or NIL swizzle */					if (!t->Value[swz])						continue; /* this is an undefined read */					/* Do not add a dependency if this instruction					 * also rewrites the value. The code below adds					 * a dependency for the DstReg, which is a superset					 * of the SrcReg dependency. */					if (inst->DstReg.File == PROGRAM_TEMPORARY &&					    inst->DstReg.Index == inst->SrcReg[j].Index &&					    GET_BIT(inst->DstReg.WriteMask, swz))						continue;					struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++];					pairinst->NumDependencies++;					t->Value[swz]->NumReaders++;					r->IP = ip;					r->Next = t->Value[swz]->Readers;					t->Value[swz]->Readers = r;				}			}		}		int ndst = _mesa_num_inst_dst_regs(inst->Opcode);		if (ndst) {			struct pair_register_translation *t =				get_register(s, inst->DstReg.File, inst->DstReg.Index);			if (t) {				t->RefCount++;				if (inst->DstReg.File == PROGRAM_TEMPORARY) {					int j;					for(j = 0; j < 4; ++j) {						if (!GET_BIT(inst->DstReg.WriteMask, j))							continue;						struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++];						v->IP = ip;						if (t->Value[j]) {							pairinst->NumDependencies++;							t->Value[j]->Next = v;						}						t->Value[j] = v;						pairinst->Values[j] = v;					}				}			}		}		if (s->Verbose)			_mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies);		if (!pairinst->NumDependencies)			instruction_ready(s, ip);	}	/* Clear the PROGRAM_TEMPORARY state */	int i, j;	for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) {		for(j = 0; j < 4; ++j)			s->Temps[i].Value[j] = 0;	}}/** * Reserve hardware temporary registers for the program inputs. * * @note This allocation is performed explicitly, because the order of inputs * is determined by the RS hardware. */static void allocate_input_registers(struct pair_state *s){	GLuint InputsRead = s->Program->InputsRead;	int i;	GLuint hwindex = 0;	/* Texcoords come first */	for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {		if (InputsRead & (FRAG_BIT_TEX0 << i))			alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);	}	InputsRead &= ~FRAG_BITS_TEX_ANY;	/* fragment position treated as a texcoord */	if (InputsRead & FRAG_BIT_WPOS)		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);	InputsRead &= ~FRAG_BIT_WPOS;	/* Then primary colour */	if (InputsRead & FRAG_BIT_COL0)		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);	InputsRead &= ~FRAG_BIT_COL0;	/* Secondary color */	if (InputsRead & FRAG_BIT_COL1)		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);	InputsRead &= ~FRAG_BIT_COL1;	/* Anything else */	if (InputsRead)		error("Don't know how to handle inputs 0x%x\n", InputsRead);}static void decrement_dependencies(struct pair_state *s, int ip){	struct pair_state_instruction *pairinst = s->Instructions + ip;	ASSERT(pairinst->NumDependencies > 0);	if (!--pairinst->NumDependencies)		instruction_ready(s, ip);}/** * Update the dependency tracking state based on what the instruction * at the given IP does. */static void commit_instruction(struct pair_state *s, int ip){	struct prog_instruction *inst = s->Program->Instructions + ip;	struct pair_state_instruction *pairinst = s->Instructions + ip;	if (s->Verbose)		_mesa_printf("commit_instruction(%i)\n", ip);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -