📄 radeon_program_pair.c
字号:
/* * Copyright (C) 2008 Nicolai Haehnle. * * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * *//** * @file * * Perform temporary register allocation and attempt to pair off instructions * in RGB and Alpha pairs. Also attempts to optimize the TEX instruction * vs. ALU instruction scheduling. */#include "radeon_program_pair.h"#include "radeon_context.h"#include "shader/prog_print.h"#define error(fmt, args...) do { \ _mesa_problem(s->Ctx, "%s::%s(): " fmt "\n", \ __FILE__, __FUNCTION__, ##args); \ s->Error = GL_TRUE; \} while(0)struct pair_state_instruction { GLuint IsTex:1; /**< Is a texture instruction */ GLuint NeedRGB:1; /**< Needs the RGB ALU */ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ /** * Number of (read and write) dependencies that must be resolved before * this instruction can be scheduled. */ GLuint NumDependencies:5; /** * Next instruction in the linked list of ready instructions. */ struct pair_state_instruction *NextReady; /** * Values that this instruction writes */ struct reg_value *Values[4];};/** * Used to keep track of which instructions read a value. */struct reg_value_reader { GLuint IP; /**< IP of the instruction that performs this access */ struct reg_value_reader *Next;};/** * Used to keep track which values are stored in each component of a * PROGRAM_TEMPORARY. */struct reg_value { GLuint IP; /**< IP of the instruction that writes this value */ struct reg_value *Next; /**< Pointer to the next value to be written to the same PROGRAM_TEMPORARY component */ /** * Unordered linked list of instructions that read from this value. */ struct reg_value_reader *Readers; /** * Number of readers of this value. This is calculated during @ref scan_instructions * and continually decremented during code emission. * When this count reaches zero, the instruction that writes the @ref Next value * can be scheduled. */ GLuint NumReaders;};/** * Used to translate a PROGRAM_INPUT or PROGRAM_TEMPORARY Mesa register * to the proper hardware temporary. */struct pair_register_translation { GLuint Allocated:1; GLuint HwIndex:8; GLuint RefCount:23; /**< # of times this occurs in an unscheduled instruction SrcReg or DstReg */ /** * Notes the value that is currently contained in each component * (only used for PROGRAM_TEMPORARY registers). */ struct reg_value *Value[4];};struct pair_state { GLcontext *Ctx; struct gl_program *Program; const struct radeon_pair_handler *Handler; GLboolean Error; GLboolean Debug; GLboolean Verbose; void *UserData; /** * Translate Mesa registers to hardware registers */ struct pair_register_translation Inputs[FRAG_ATTRIB_MAX]; struct pair_register_translation Temps[MAX_PROGRAM_TEMPS]; /** * Derived information about program instructions. */ struct pair_state_instruction *Instructions; struct { GLuint RefCount; /**< # of times this occurs in an unscheduled SrcReg or DstReg */ } HwTemps[128]; /** * Linked list of instructions that can be scheduled right now, * based on which ALU/TEX resources they require. */ struct pair_state_instruction *ReadyFullALU; struct pair_state_instruction *ReadyRGB; struct pair_state_instruction *ReadyAlpha; struct pair_state_instruction *ReadyTEX; /** * Pool of @ref reg_value structures for fast allocation. */ struct reg_value *ValuePool; GLuint ValuePoolUsed; struct reg_value_reader *ReaderPool; GLuint ReaderPoolUsed;};static struct pair_register_translation *get_register(struct pair_state *s, GLuint file, GLuint index){ switch(file) { case PROGRAM_TEMPORARY: return &s->Temps[index]; case PROGRAM_INPUT: return &s->Inputs[index]; default: return 0; }}static void alloc_hw_reg(struct pair_state *s, GLuint file, GLuint index, GLuint hwindex){ struct pair_register_translation *t = get_register(s, file, index); ASSERT(!s->HwTemps[hwindex].RefCount); ASSERT(!t->Allocated); s->HwTemps[hwindex].RefCount = t->RefCount; t->Allocated = 1; t->HwIndex = hwindex;}static GLuint get_hw_reg(struct pair_state *s, GLuint file, GLuint index){ GLuint hwindex; struct pair_register_translation *t = get_register(s, file, index); if (!t) { _mesa_problem(s->Ctx, "get_hw_reg: %i[%i]\n", file, index); return 0; } if (t->Allocated) return t->HwIndex; for(hwindex = 0; hwindex < s->Handler->MaxHwTemps; ++hwindex) if (!s->HwTemps[hwindex].RefCount) break; if (hwindex >= s->Handler->MaxHwTemps) { error("Ran out of hardware temporaries"); return 0; } alloc_hw_reg(s, file, index, hwindex); return hwindex;}static void deref_hw_reg(struct pair_state *s, GLuint hwindex){ if (!s->HwTemps[hwindex].RefCount) { error("Hwindex %i refcount error", hwindex); return; } s->HwTemps[hwindex].RefCount--;}static void add_pairinst_to_list(struct pair_state_instruction **list, struct pair_state_instruction *pairinst){ pairinst->NextReady = *list; *list = pairinst;}/** * The instruction at the given IP has become ready. Link it into the ready * instructions. */static void instruction_ready(struct pair_state *s, int ip){ struct pair_state_instruction *pairinst = s->Instructions + ip; if (s->Verbose) _mesa_printf("instruction_ready(%i)\n", ip); if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); else if (!pairinst->NeedAlpha) add_pairinst_to_list(&s->ReadyRGB, pairinst); else if (!pairinst->NeedRGB) add_pairinst_to_list(&s->ReadyAlpha, pairinst); else add_pairinst_to_list(&s->ReadyFullALU, pairinst);}/** * Finally rewrite ADD, MOV, MUL as the appropriate native instruction * and reverse the order of arguments for CMP. */static void final_rewrite(struct pair_state *s, struct prog_instruction *inst){ struct prog_src_register tmp; switch(inst->Opcode) { case OPCODE_ADD: inst->SrcReg[2] = inst->SrcReg[1]; inst->SrcReg[1].File = PROGRAM_BUILTIN; inst->SrcReg[1].Swizzle = SWIZZLE_1111; inst->SrcReg[1].NegateBase = 0; inst->SrcReg[1].NegateAbs = 0; inst->Opcode = OPCODE_MAD; break; case OPCODE_CMP: tmp = inst->SrcReg[2]; inst->SrcReg[2] = inst->SrcReg[0]; inst->SrcReg[0] = tmp; break; case OPCODE_MOV: /* AMD say we should use CMP. * However, when we transform * KIL -r0; * into * CMP tmp, -r0, -r0, 0; * KIL tmp; * we get incorrect behaviour on R500 when r0 == 0.0. * It appears that the R500 KIL hardware treats -0.0 as less * than zero. */ inst->SrcReg[1].File = PROGRAM_BUILTIN; inst->SrcReg[1].Swizzle = SWIZZLE_1111; inst->SrcReg[2].File = PROGRAM_BUILTIN; inst->SrcReg[2].Swizzle = SWIZZLE_0000; inst->Opcode = OPCODE_MAD; break; case OPCODE_MUL: inst->SrcReg[2].File = PROGRAM_BUILTIN; inst->SrcReg[2].Swizzle = SWIZZLE_0000; inst->Opcode = OPCODE_MAD; break; default: /* nothing to do */ break; }}/** * Classify an instruction according to which ALUs etc. it needs */static void classify_instruction(struct pair_state *s, struct prog_instruction *inst, struct pair_state_instruction *pairinst){ pairinst->NeedRGB = (inst->DstReg.WriteMask & WRITEMASK_XYZ) ? 1 : 0; pairinst->NeedAlpha = (inst->DstReg.WriteMask & WRITEMASK_W) ? 1 : 0; switch(inst->Opcode) { case OPCODE_ADD: case OPCODE_CMP: case OPCODE_DDX: case OPCODE_DDY: case OPCODE_FRC: case OPCODE_MAD: case OPCODE_MAX: case OPCODE_MIN: case OPCODE_MOV: case OPCODE_MUL: break; case OPCODE_COS: case OPCODE_EX2: case OPCODE_LG2: case OPCODE_RCP: case OPCODE_RSQ: case OPCODE_SIN: pairinst->IsTranscendent = 1; pairinst->NeedAlpha = 1; break; case OPCODE_DP4: pairinst->NeedAlpha = 1; /* fall through */ case OPCODE_DP3: pairinst->NeedRGB = 1; break; case OPCODE_KIL: case OPCODE_TEX: case OPCODE_TXB: case OPCODE_TXP: case OPCODE_END: pairinst->IsTex = 1; break; default: error("Unknown opcode %d\n", inst->Opcode); break; }}/** * Count which (input, temporary) register is read and written how often, * and scan the instruction stream to find dependencies. */static void scan_instructions(struct pair_state *s){ struct prog_instruction *inst; struct pair_state_instruction *pairinst; GLuint ip; for(inst = s->Program->Instructions, pairinst = s->Instructions, ip = 0; inst->Opcode != OPCODE_END; ++inst, ++pairinst, ++ip) { final_rewrite(s, inst); classify_instruction(s, inst, pairinst); int nsrc = _mesa_num_inst_src_regs(inst->Opcode); int j; for(j = 0; j < nsrc; j++) { struct pair_register_translation *t = get_register(s, inst->SrcReg[j].File, inst->SrcReg[j].Index); if (!t) continue; t->RefCount++; if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { int i; for(i = 0; i < 4; ++i) { GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, i); if (swz >= 4) continue; /* constant or NIL swizzle */ if (!t->Value[swz]) continue; /* this is an undefined read */ /* Do not add a dependency if this instruction * also rewrites the value. The code below adds * a dependency for the DstReg, which is a superset * of the SrcReg dependency. */ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == inst->SrcReg[j].Index && GET_BIT(inst->DstReg.WriteMask, swz)) continue; struct reg_value_reader* r = &s->ReaderPool[s->ReaderPoolUsed++]; pairinst->NumDependencies++; t->Value[swz]->NumReaders++; r->IP = ip; r->Next = t->Value[swz]->Readers; t->Value[swz]->Readers = r; } } } int ndst = _mesa_num_inst_dst_regs(inst->Opcode); if (ndst) { struct pair_register_translation *t = get_register(s, inst->DstReg.File, inst->DstReg.Index); if (t) { t->RefCount++; if (inst->DstReg.File == PROGRAM_TEMPORARY) { int j; for(j = 0; j < 4; ++j) { if (!GET_BIT(inst->DstReg.WriteMask, j)) continue; struct reg_value* v = &s->ValuePool[s->ValuePoolUsed++]; v->IP = ip; if (t->Value[j]) { pairinst->NumDependencies++; t->Value[j]->Next = v; } t->Value[j] = v; pairinst->Values[j] = v; } } } } if (s->Verbose) _mesa_printf("scan(%i): NumDeps = %i\n", ip, pairinst->NumDependencies); if (!pairinst->NumDependencies) instruction_ready(s, ip); } /* Clear the PROGRAM_TEMPORARY state */ int i, j; for(i = 0; i < MAX_PROGRAM_TEMPS; ++i) { for(j = 0; j < 4; ++j) s->Temps[i].Value[j] = 0; }}/** * Reserve hardware temporary registers for the program inputs. * * @note This allocation is performed explicitly, because the order of inputs * is determined by the RS hardware. */static void allocate_input_registers(struct pair_state *s){ GLuint InputsRead = s->Program->InputsRead; int i; GLuint hwindex = 0; /* Texcoords come first */ for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++); } InputsRead &= ~FRAG_BITS_TEX_ANY; /* fragment position treated as a texcoord */ if (InputsRead & FRAG_BIT_WPOS) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++); InputsRead &= ~FRAG_BIT_WPOS; /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++); InputsRead &= ~FRAG_BIT_COL0; /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++); InputsRead &= ~FRAG_BIT_COL1; /* Anything else */ if (InputsRead) error("Don't know how to handle inputs 0x%x\n", InputsRead);}static void decrement_dependencies(struct pair_state *s, int ip){ struct pair_state_instruction *pairinst = s->Instructions + ip; ASSERT(pairinst->NumDependencies > 0); if (!--pairinst->NumDependencies) instruction_ready(s, ip);}/** * Update the dependency tracking state based on what the instruction * at the given IP does. */static void commit_instruction(struct pair_state *s, int ip){ struct prog_instruction *inst = s->Program->Instructions + ip; struct pair_state_instruction *pairinst = s->Instructions + ip; if (s->Verbose) _mesa_printf("commit_instruction(%i)\n", ip);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -