📄 r300_fragprog.c
字号:
flags = 0; if (fpi->Opcode != OPCODE_KIL) { dest = t_dst(rp, fpi->DstReg); mask = fpi->DstReg.WriteMask; } switch (fpi->Opcode) { case OPCODE_ABS: src[0] = t_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_MAD, dest, mask, absolute(src[0]), pfs_one, pfs_zero, flags); break; case OPCODE_ADD: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], pfs_one, src[1], flags); break; case OPCODE_CMP: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); src[2] = t_src(rp, fpi->SrcReg[2]); /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c * r300 - if src2.c < 0.0 ? src1.c : src0.c */ emit_arith(rp, PFS_OP_CMP, dest, mask, src[2], src[1], src[0], flags); break; case OPCODE_COS: ERROR("COS not implemented\n"); break; case OPCODE_DP3: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_DP3, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_DP4: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_DP4, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_DPH: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); /* src0.xyz1 -> temp * DP4 dest, temp, src1 */#if 0 temp = get_temp_reg(rp); src[0].s_swz = SWIZZLE_ONE; emit_arith(rp, PFS_OP_MAD, temp, mask, src[0], pfs_one, pfs_zero, 0); emit_arith(rp, PFS_OP_DP4, dest, mask, temp, src[1], undef, flags); free_temp(rp, temp);#else emit_arith(rp, PFS_OP_DP4, dest, mask, swizzle(src[0], X, Y, Z, ONE), src[1], undef, flags); #endif break; case OPCODE_DST: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); /* dest.y = src0.y * src1.y */ if (mask & WRITEMASK_Y) emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Y, keep(src[0]), keep(src[1]), pfs_zero, flags); /* dest.z = src0.z */ if (mask & WRITEMASK_Z) emit_arith(rp, PFS_OP_MAD, dest, WRITEMASK_Z, src[0], pfs_one, pfs_zero, flags); /* result.x = 1.0 * result.w = src1.w */ if (mask & WRITEMASK_XW) { src[1].v_swz = SWIZZLE_111; /* Cheat.. */ emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XW, src[1], pfs_one, pfs_zero, flags); } break; case OPCODE_EX2: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_EX2, dest, mask, src[0], undef, undef, flags); break; case OPCODE_FLR: src[0] = t_src(rp, fpi->SrcReg[0]); temp = get_temp_reg(rp); /* FRC temp, src0 * MAD dest, src0, 1.0, -temp */ emit_arith(rp, PFS_OP_FRC, temp, mask, keep(src[0]), undef, undef, 0); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], pfs_one, negate(temp), flags); free_temp(rp, temp); break; case OPCODE_FRC: src[0] = t_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_FRC, dest, mask, src[0], undef, undef, flags); break; case OPCODE_KIL: emit_tex(rp, fpi, R300_FPITX_OP_KIL); break; case OPCODE_LG2: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_LG2, dest, mask, src[0], undef, undef, flags); break; case OPCODE_LIT: ERROR("LIT not implemented\n"); break; case OPCODE_LRP: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); src[2] = t_src(rp, fpi->SrcReg[2]); /* result = tmp0tmp1 + (1 - tmp0)tmp2 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 * MAD temp, -tmp0, tmp2, tmp2 * MAD result, tmp0, tmp1, temp */ temp = get_temp_reg(rp); emit_arith(rp, PFS_OP_MAD, temp, mask, negate(keep(src[0])), keep(src[2]), src[2], 0); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], src[1], temp, flags); free_temp(rp, temp); break; case OPCODE_MAD: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); src[2] = t_src(rp, fpi->SrcReg[2]); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], src[1], src[2], flags); break; case OPCODE_MAX: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_MAX, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_MIN: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_MIN, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_MOV: case OPCODE_SWZ: src[0] = t_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], pfs_one, pfs_zero, flags); break; case OPCODE_MUL: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], src[1], pfs_zero, flags); break; case OPCODE_POW: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); src[1] = t_scalar_src(rp, fpi->SrcReg[1]); temp = get_temp_reg(rp); emit_arith(rp, PFS_OP_LG2, temp, WRITEMASK_W, src[0], undef, undef, 0); emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_W, temp, src[1], pfs_zero, 0); emit_arith(rp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, temp, undef, undef, 0); free_temp(rp, temp); break; case OPCODE_RCP: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_RCP, dest, mask, src[0], undef, undef, flags); break; case OPCODE_RSQ: src[0] = t_scalar_src(rp, fpi->SrcReg[0]); emit_arith(rp, PFS_OP_RSQ, dest, mask, absolute(src[0]), pfs_zero, pfs_zero, flags); break; case OPCODE_SCS: ERROR("SCS not implemented\n"); break; case OPCODE_SGE: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); temp = get_temp_reg(rp); /* temp = src0 - src1 * dest.c = (temp.c < 0.0) ? 0 : 1 */ emit_arith(rp, PFS_OP_MAD, temp, mask, src[0], pfs_one, negate(src[1]), 0); emit_arith(rp, PFS_OP_CMP, dest, mask, pfs_one, pfs_zero, temp, 0); free_temp(rp, temp); break; case OPCODE_SIN: ERROR("SIN not implemented\n"); break; case OPCODE_SLT: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); temp = get_temp_reg(rp); /* temp = src0 - src1 * dest.c = (temp.c < 0.0) ? 1 : 0 */ emit_arith(rp, PFS_OP_MAD, temp, mask, src[0], pfs_one, negate(src[1]), 0); emit_arith(rp, PFS_OP_CMP, dest, mask, pfs_zero, pfs_one, temp, 0); free_temp(rp, temp); break; case OPCODE_SUB: src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); emit_arith(rp, PFS_OP_MAD, dest, mask, src[0], pfs_one, negate(src[1]), flags); break; case OPCODE_TEX: emit_tex(rp, fpi, R300_FPITX_OP_TEX); break; case OPCODE_TXB: emit_tex(rp, fpi, R300_FPITX_OP_TXB); break; case OPCODE_TXP: emit_tex(rp, fpi, R300_FPITX_OP_TXP); break; case OPCODE_XPD: { src[0] = t_src(rp, fpi->SrcReg[0]); src[1] = t_src(rp, fpi->SrcReg[1]); temp = get_temp_reg(rp); /* temp = src0.zxy * src1.yzx */ emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZ, swizzle(keep(src[0]), Z, X, Y, W), swizzle(keep(src[1]), Y, Z, X, W), pfs_zero, 0); /* dest.xyz = src0.yzx * src1.zxy - temp * dest.w = undefined * */ emit_arith(rp, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ, swizzle(src[0], Y, Z, X, W), swizzle(src[1], Z, X, Y, W), negate(temp), flags); /* cleanup */ free_temp(rp, temp); break; } default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; } if (rp->error) return GL_FALSE; } return GL_TRUE;}/* - Init structures * - Determine what hwregs each input corresponds to */static void init_program(struct r300_fragment_program *rp){ struct r300_pfs_compile_state *cs = NULL; struct fragment_program *mp = &rp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; GLuint temps_used = 0; /* for rp->temps[] */ int i,j; /* New compile, reset tracking data */ rp->translated = GL_FALSE; rp->error = GL_FALSE; rp->cs = cs = &(R300_CONTEXT(rp->ctx)->state.pfs_compile); rp->tex.length = 0; rp->cur_node = 0; rp->first_node_has_tex = 0; rp->const_nr = 0; rp->param_nr = 0; rp->params_uptodate = GL_FALSE; rp->max_temp_idx = 0; rp->node[0].alu_end = -1; rp->node[0].tex_end = -1; _mesa_memset(cs, 0, sizeof(*rp->cs)); for (i=0;i<PFS_MAX_ALU_INST;i++) { for (j=0;j<3;j++) { cs->slot[i].vsrc[j] = SRC_CONST; cs->slot[i].ssrc[j] = SRC_CONST; } } /* Work out what temps the Mesa inputs correspond to, this must match * what setup_rs_unit does, which shouldn't be a problem as rs_unit * configures itself based on the fragprog's InputsRead * * NOTE: this depends on get_hw_temp() allocating registers in order, * starting from register 0. */ /* Texcoords come first */ for (i=0;i<rp->ctx->Const.MaxTextureUnits;i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { cs->inputs[FRAG_ATTRIB_TEX0+i].refcount = 0; cs->inputs[FRAG_ATTRIB_TEX0+i].reg = get_hw_temp(rp); } } InputsRead &= ~FRAG_BITS_TEX_ANY; /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) { cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(rp); } InputsRead &= ~FRAG_BIT_COL0; /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(rp); } InputsRead &= ~FRAG_BIT_COL1; /* Anything else */ if (InputsRead) { WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); /* force read from hwreg 0 for now */ for (i=0;i<32;i++) if (InputsRead & (1<<i)) cs->inputs[i].reg = 0; } /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. * That way, we can free up the reg when it's no longer needed */ if (!mp->Base.Instructions) { ERROR("No instructions found in program\n"); return; } for (fpi=mp->Base.Instructions;fpi->Opcode != OPCODE_END; fpi++) { int idx; for (i=0;i<3;i++) { idx = fpi->SrcReg[i].Index; switch (fpi->SrcReg[i].File) { case PROGRAM_TEMPORARY: if (!(temps_used & (1<<idx))) { cs->temps[idx].reg = -1; cs->temps[idx].refcount = 1; temps_used |= (1 << idx); } else cs->temps[idx].refcount++; break; case PROGRAM_INPUT: cs->inputs[idx].refcount++; break; default: break; } } idx = fpi->DstReg.Index; if (fpi->DstReg.File == PROGRAM_TEMPORARY) { if (!(temps_used & (1<<idx))) { cs->temps[idx].reg = -1; cs->temps[idx].refcount = 1; temps_used |= (1 << idx); } else cs->temps[idx].refcount++; } } cs->temp_in_use = temps_used;}static void update_params(struct r300_fragment_program *rp){ struct fragment_program *mp = &rp->mesa_program; int i; /* Ask Mesa nicely to fill in ParameterValues for us */ if (rp->param_nr) _mesa_load_state_parameters(rp->ctx, mp->Base.Parameters); for (i=0;i<rp->param_nr;i++) COPY_4V(rp->constant[rp->param[i].idx], rp->param[i].values); rp->params_uptodate = GL_TRUE;}void r300_translate_fragment_shader(struct r300_fragment_program *rp){ struct r300_pfs_compile_state *cs = NULL; if (!rp->translated) { init_program(rp); cs = rp->cs; if (parse_program(rp) == GL_FALSE) { dump_program(rp); return; } /* Finish off */ cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); rp->node[rp->cur_node].alu_end = cs->v_pos - rp->node[rp->cur_node].alu_offset - 1; if (rp->node[rp->cur_node].tex_end < 0) rp->node[rp->cur_node].tex_end = 0; rp->alu_offset = 0; rp->alu_end = cs->v_pos - 1; rp->tex_offset = 0; rp->tex_end = rp->tex.length ? rp->tex.length - 1 : 0; assert(rp->node[rp->cur_node].alu_end >= 0); assert(rp->alu_end >= 0); rp->translated = GL_TRUE; if (0) dump_program(rp); } update_params(rp);}/* just some random things... */static void dump_program(struct r300_fragment_program *rp){ int i; static int pc = 0; fprintf(stderr, "pc=%d*************************************\n", pc++); fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&rp->mesa_program.Base); fflush(stdout); fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); fprintf(stderr, "tex:\n"); for(i=0;i<rp->tex.length;i++) { fprintf(stderr, "%08x\n", rp->tex.inst[i]); } for (i=0;i<(rp->cur_node+1);i++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "\ "alu_end: %d, tex_end: %d\n", i, rp->node[i].alu_offset, rp->node[i].tex_offset, rp->node[i].alu_end, rp->node[i].tex_end); } fprintf(stderr, "%08x\n", ((rp->tex_end << 16) | (R300_PFS_TEXI_0 >> 2))); for (i=0;i<=rp->tex_end;i++) fprintf(stderr, "%08x\n", rp->tex.inst[i]); /* dump program in pretty_print_command_stream.tcl-readable format */ fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR0_0 >> 2))); for (i=0;i<=rp->alu_end;i++) fprintf(stderr, "%08x\n", rp->alu.inst[i].inst0); fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR1_0 >> 2))); for (i=0;i<=rp->alu_end;i++) fprintf(stderr, "%08x\n", rp->alu.inst[i].inst1); fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR2_0 >> 2))); for (i=0;i<=rp->alu_end;i++) fprintf(stderr, "%08x\n", rp->alu.inst[i].inst2); fprintf(stderr, "%08x\n", ((rp->alu_end << 16) | (R300_PFS_INSTR3_0 >> 2))); for (i=0;i<=rp->alu_end;i++) fprintf(stderr, "%08x\n", rp->alu.inst[i].inst3); fprintf(stderr, "00000000\n");}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -