📄 radeon_program_pair.c
字号:
if (inst->DstReg.File == PROGRAM_TEMPORARY) { struct pair_register_translation *t = &s->Temps[inst->DstReg.Index]; deref_hw_reg(s, t->HwIndex); int i; for(i = 0; i < 4; ++i) { if (!GET_BIT(inst->DstReg.WriteMask, i)) continue; t->Value[i] = pairinst->Values[i]; if (t->Value[i]->NumReaders) { struct reg_value_reader *r; for(r = pairinst->Values[i]->Readers; r; r = r->Next) decrement_dependencies(s, r->IP); } else if (t->Value[i]->Next) { /* This happens when the only reader writes * the register at the same time */ decrement_dependencies(s, t->Value[i]->Next->IP); } } } int nsrc = _mesa_num_inst_src_regs(inst->Opcode); int i; for(i = 0; i < nsrc; i++) { struct pair_register_translation *t = get_register(s, inst->SrcReg[i].File, inst->SrcReg[i].Index); if (!t) continue; deref_hw_reg(s, get_hw_reg(s, inst->SrcReg[i].File, inst->SrcReg[i].Index)); if (inst->SrcReg[i].File != PROGRAM_TEMPORARY) continue; int j; for(j = 0; j < 4; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz >= 4) continue; if (!t->Value[swz]) continue; /* Do not free a dependency if this instruction * also rewrites the value. See scan_instructions. */ if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == inst->SrcReg[i].Index && GET_BIT(inst->DstReg.WriteMask, swz)) continue; if (!--t->Value[swz]->NumReaders) { if (t->Value[swz]->Next) decrement_dependencies(s, t->Value[swz]->Next->IP); } } }}/** * Emit all ready texture instructions in a single block. * * Emit as a single block to (hopefully) sample many textures in parallel, * and to avoid hardware indirections on R300. * * In R500, we don't really know when the result of a texture instruction * arrives. So allocate all destinations first, to make sure they do not * arrive early and overwrite a texture coordinate we're going to use later * in the block. */static void emit_all_tex(struct pair_state *s){ struct pair_state_instruction *readytex; struct pair_state_instruction *pairinst; ASSERT(s->ReadyTEX); // Don't let the ready list change under us! readytex = s->ReadyTEX; s->ReadyTEX = 0; // Allocate destination hardware registers in one block to avoid conflicts. for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { int ip = pairinst - s->Instructions; struct prog_instruction *inst = s->Program->Instructions + ip; if (inst->Opcode != OPCODE_KIL) get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); } if (s->Debug) _mesa_printf(" BEGIN_TEX\n"); if (s->Handler->BeginTexBlock) s->Error = s->Error || !s->Handler->BeginTexBlock(s->UserData); for(pairinst = readytex; pairinst; pairinst = pairinst->NextReady) { int ip = pairinst - s->Instructions; struct prog_instruction *inst = s->Program->Instructions + ip; commit_instruction(s, ip); if (inst->Opcode != OPCODE_KIL) inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); if (s->Debug) { _mesa_printf(" "); _mesa_print_instruction(inst); } s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); } if (s->Debug) _mesa_printf(" END_TEX\n");}static int alloc_pair_source(struct pair_state *s, struct radeon_pair_instruction *pair, struct prog_src_register src, GLboolean rgb, GLboolean alpha){ int candidate = -1; int candidate_quality = -1; int i; if (!rgb && !alpha) return 0; GLuint constant; GLuint index; if (src.File == PROGRAM_TEMPORARY || src.File == PROGRAM_INPUT) { constant = 0; index = get_hw_reg(s, src.File, src.Index); } else { constant = 1; s->Error |= !s->Handler->EmitConst(s->UserData, src.File, src.Index, &index); } for(i = 0; i < 3; ++i) { int q = 0; if (rgb) { if (pair->RGB.Src[i].Used) { if (pair->RGB.Src[i].Constant != constant || pair->RGB.Src[i].Index != index) continue; q++; } } if (alpha) { if (pair->Alpha.Src[i].Used) { if (pair->Alpha.Src[i].Constant != constant || pair->Alpha.Src[i].Index != index) continue; q++; } } if (q > candidate_quality) { candidate_quality = q; candidate = i; } } if (candidate >= 0) { if (rgb) { pair->RGB.Src[candidate].Used = 1; pair->RGB.Src[candidate].Constant = constant; pair->RGB.Src[candidate].Index = index; } if (alpha) { pair->Alpha.Src[candidate].Used = 1; pair->Alpha.Src[candidate].Constant = constant; pair->Alpha.Src[candidate].Index = index; } } return candidate;}/** * Fill the given ALU instruction's opcodes and source operands into the given pair, * if possible. */static GLboolean fill_instruction_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip){ struct pair_state_instruction *pairinst = s->Instructions + ip; struct prog_instruction *inst = s->Program->Instructions + ip; ASSERT(!pairinst->NeedRGB || pair->RGB.Opcode == OPCODE_NOP); ASSERT(!pairinst->NeedAlpha || pair->Alpha.Opcode == OPCODE_NOP); if (pairinst->NeedRGB) { if (pairinst->IsTranscendent) pair->RGB.Opcode = OPCODE_REPL_ALPHA; else pair->RGB.Opcode = inst->Opcode; if (inst->SaturateMode == SATURATE_ZERO_ONE) pair->RGB.Saturate = 1; } if (pairinst->NeedAlpha) { pair->Alpha.Opcode = inst->Opcode; if (inst->SaturateMode == SATURATE_ZERO_ONE) pair->Alpha.Saturate = 1; } int nargs = _mesa_num_inst_src_regs(inst->Opcode); int i; /* Special case for DDX/DDY (MDH/MDV). */ if (inst->Opcode == OPCODE_DDX || inst->Opcode == OPCODE_DDY) { if (pair->RGB.Src[0].Used || pair->Alpha.Src[0].Used) return GL_FALSE; else nargs++; } for(i = 0; i < nargs; ++i) { int source; if (pairinst->NeedRGB && !pairinst->IsTranscendent) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; GLuint negatebase = 0; int j; for(j = 0; j < 3; ++j) { GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); if (swz < 3) srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; if (swz != SWIZZLE_NIL && GET_BIT(inst->SrcReg[i].NegateBase, j)) negatebase = 1; } source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) return GL_FALSE; pair->RGB.Arg[i].Source = source; pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; pair->RGB.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; } if (pairinst->NeedAlpha) { GLboolean srcrgb = GL_FALSE; GLboolean srcalpha = GL_FALSE; GLuint negatebase = GET_BIT(inst->SrcReg[i].NegateBase, pairinst->IsTranscendent ? 0 : 3); GLuint swz = GET_SWZ(inst->SrcReg[i].Swizzle, pairinst->IsTranscendent ? 0 : 3); if (swz < 3) srcrgb = GL_TRUE; else if (swz < 4) srcalpha = GL_TRUE; source = alloc_pair_source(s, pair, inst->SrcReg[i], srcrgb, srcalpha); if (source < 0) return GL_FALSE; pair->Alpha.Arg[i].Source = source; pair->Alpha.Arg[i].Swizzle = swz; pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; pair->Alpha.Arg[i].Negate = (negatebase & ~pair->RGB.Arg[i].Abs) ^ inst->SrcReg[i].NegateAbs; } } return GL_TRUE;}/** * Fill in the destination register information. * * This is split from filling in source registers because we want * to avoid allocating hardware temporaries for destinations until * we are absolutely certain that we're going to emit a certain * instruction pairing. */static void fill_dest_into_pair(struct pair_state *s, struct radeon_pair_instruction *pair, int ip){ struct pair_state_instruction *pairinst = s->Instructions + ip; struct prog_instruction *inst = s->Program->Instructions + ip; if (inst->DstReg.File == PROGRAM_OUTPUT) { if (inst->DstReg.Index == FRAG_RESULT_COLR) { pair->RGB.OutputWriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; pair->Alpha.OutputWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } else if (inst->DstReg.Index == FRAG_RESULT_DEPR) { pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } } else { GLuint hwindex = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); if (pairinst->NeedRGB) { pair->RGB.DestIndex = hwindex; pair->RGB.WriteMask |= inst->DstReg.WriteMask & WRITEMASK_XYZ; } if (pairinst->NeedAlpha) { pair->Alpha.DestIndex = hwindex; pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); } }}/** * Find a good ALU instruction or pair of ALU instruction and emit it. * * Prefer emitting full ALU instructions, so that when we reach a point * where no full ALU instruction can be emitted, we have more candidates * for RGB/Alpha pairing. */static void emit_alu(struct pair_state *s){ struct radeon_pair_instruction pair; if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) { int ip; if (s->ReadyFullALU) { ip = s->ReadyFullALU - s->Instructions; s->ReadyFullALU = s->ReadyFullALU->NextReady; } else if (s->ReadyRGB) { ip = s->ReadyRGB - s->Instructions; s->ReadyRGB = s->ReadyRGB->NextReady; } else { ip = s->ReadyAlpha - s->Instructions; s->ReadyAlpha = s->ReadyAlpha->NextReady; } _mesa_bzero(&pair, sizeof(pair)); fill_instruction_into_pair(s, &pair, ip); fill_dest_into_pair(s, &pair, ip); commit_instruction(s, ip); } else { struct pair_state_instruction **prgb; struct pair_state_instruction **palpha; /* Some pairings might fail because they require too * many source slots; try all possible pairings if necessary */ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) { for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) { int rgbip = *prgb - s->Instructions; int alphaip = *palpha - s->Instructions; _mesa_bzero(&pair, sizeof(pair)); fill_instruction_into_pair(s, &pair, rgbip); if (!fill_instruction_into_pair(s, &pair, alphaip)) continue; *prgb = (*prgb)->NextReady; *palpha = (*palpha)->NextReady; fill_dest_into_pair(s, &pair, rgbip); fill_dest_into_pair(s, &pair, alphaip); commit_instruction(s, rgbip); commit_instruction(s, alphaip); goto success; } } /* No success in pairing; just take the first RGB instruction */ int ip = s->ReadyRGB - s->Instructions; s->ReadyRGB = s->ReadyRGB->NextReady; _mesa_bzero(&pair, sizeof(pair)); fill_instruction_into_pair(s, &pair, ip); fill_dest_into_pair(s, &pair, ip); commit_instruction(s, ip); success: ; } if (s->Debug) radeonPrintPairInstruction(&pair); s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair);}GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, const struct radeon_pair_handler* handler, void *userdata){ struct pair_state s; _mesa_bzero(&s, sizeof(s)); s.Ctx = ctx; s.Program = program; s.Handler = handler; s.UserData = userdata; s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; s.Verbose = GL_FALSE && s.Debug; s.Instructions = (struct pair_state_instruction*)_mesa_calloc( sizeof(struct pair_state_instruction)*s.Program->NumInstructions); s.ValuePool = (struct reg_value*)_mesa_calloc(sizeof(struct reg_value)*s.Program->NumInstructions*4); s.ReaderPool = (struct reg_value_reader*)_mesa_calloc( sizeof(struct reg_value_reader)*s.Program->NumInstructions*12); if (s.Debug) _mesa_printf("Emit paired program\n"); scan_instructions(&s); allocate_input_registers(&s); while(!s.Error && (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) { if (s.ReadyTEX) emit_all_tex(&s); while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) emit_alu(&s); } if (s.Debug) _mesa_printf(" END\n"); _mesa_free(s.Instructions); _mesa_free(s.ValuePool); _mesa_free(s.ReaderPool); return !s.Error;}static void print_pair_src(int i, struct radeon_pair_instruction_source* src){ _mesa_printf(" Src%i = %s[%i]", i, src->Constant ? "CNST" : "TEMP", src->Index);}static const char* opcode_string(GLuint opcode){ if (opcode == OPCODE_REPL_ALPHA) return "SOP"; else return _mesa_opcode_string(opcode);}static int num_pairinst_args(GLuint opcode){ if (opcode == OPCODE_REPL_ALPHA) return 0; else return _mesa_num_inst_src_regs(opcode);}static char swizzle_char(GLuint swz){ switch(swz) { case SWIZZLE_X: return 'x'; case SWIZZLE_Y: return 'y'; case SWIZZLE_Z: return 'z'; case SWIZZLE_W: return 'w'; case SWIZZLE_ZERO: return '0'; case SWIZZLE_ONE: return '1'; case SWIZZLE_NIL: return '_'; default: return '?'; }}void radeonPrintPairInstruction(struct radeon_pair_instruction *inst){ int nargs; int i; _mesa_printf(" RGB: "); for(i = 0; i < 3; ++i) { if (inst->RGB.Src[i].Used) print_pair_src(i, inst->RGB.Src + i); } _mesa_printf("\n"); _mesa_printf(" Alpha:"); for(i = 0; i < 3; ++i) { if (inst->Alpha.Src[i].Used) print_pair_src(i, inst->Alpha.Src + i); } _mesa_printf("\n"); _mesa_printf(" %s%s", opcode_string(inst->RGB.Opcode), inst->RGB.Saturate ? "_SAT" : ""); if (inst->RGB.WriteMask) _mesa_printf(" TEMP[%i].%s%s%s", inst->RGB.DestIndex, (inst->RGB.WriteMask & 1) ? "x" : "", (inst->RGB.WriteMask & 2) ? "y" : "", (inst->RGB.WriteMask & 4) ? "z" : ""); if (inst->RGB.OutputWriteMask) _mesa_printf(" COLOR.%s%s%s", (inst->RGB.OutputWriteMask & 1) ? "x" : "", (inst->RGB.OutputWriteMask & 2) ? "y" : "", (inst->RGB.OutputWriteMask & 4) ? "z" : ""); nargs = num_pairinst_args(inst->RGB.Opcode); for(i = 0; i < nargs; ++i) { const char* abs = inst->RGB.Arg[i].Abs ? "|" : ""; const char* neg = inst->RGB.Arg[i].Negate ? "-" : ""; _mesa_printf(", %s%sSrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[i].Source, swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 0)), swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 1)), swizzle_char(GET_SWZ(inst->RGB.Arg[i].Swizzle, 2)), abs); } _mesa_printf("\n"); _mesa_printf(" %s%s", opcode_string(inst->Alpha.Opcode), inst->Alpha.Saturate ? "_SAT" : ""); if (inst->Alpha.WriteMask) _mesa_printf(" TEMP[%i].w", inst->Alpha.DestIndex); if (inst->Alpha.OutputWriteMask) _mesa_printf(" COLOR.w"); if (inst->Alpha.DepthWriteMask) _mesa_printf(" DEPTH.w"); nargs = num_pairinst_args(inst->Alpha.Opcode); for(i = 0; i < nargs; ++i) { const char* abs = inst->Alpha.Arg[i].Abs ? "|" : ""; const char* neg = inst->Alpha.Arg[i].Negate ? "-" : ""; _mesa_printf(", %s%sSrc%i.%c%s", neg, abs, inst->Alpha.Arg[i].Source, swizzle_char(inst->Alpha.Arg[i].Swizzle), abs); } _mesa_printf("\n");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -