📄 r300_fragprog.c
字号:
ERROR("unknown SrcReg->File %x\n", fpsrc.File); return r; } /* no point swizzling ONE/ZERO/HALF constants... */ if (r.v_swz < SWIZZLE_111 && r.s_swz < SWIZZLE_ZERO) r = do_swizzle(rp, r, fpsrc.Swizzle, fpsrc.NegateBase);#if 0 /* WRONG! Need to be able to do individual component negation, * should probably handle this in the swizzling code unless * all components are negated, then we can do this natively */ if ((fpsrc.NegateBase & 0xf) == 0xf) r.negate = GL_TRUE; r.negate_s = (fpsrc.NegateBase >> 3) & 1; if ((fpsrc.NegateBase & 0x7) == 0x0) { r.negate_v = 0; } else if ((fpsrc.NegateBase & 0x7) == 0x7) { r.negate_v = 1; } else { if (r.type != REG_TYPE_TEMP) { n = get_temp_reg(rp); emit_arith(rp, PFS_OP_MAD, n, 0x7 ^ fpsrc.NegateBase, keep(r), pfs_one, pfs_zero, 0); r.negate_v = 1; emit_arith(rp, PFS_OP_MAD, n, fpsrc.NegateBase & 0x7 | WRITEMASK_W, r, pfs_one, pfs_zero, 0); r.negate_v = 0; r = n; } else { r.negate_v = 1; emit_arith(rp, PFS_OP_MAD, r, fpsrc.NegateBase & 0x7 | WRITEMASK_W, r, pfs_one, pfs_zero, 0); r.negate_v = 0; } }#endif return r;}static pfs_reg_t t_scalar_src(struct r300_fragment_program *rp, struct prog_src_register fpsrc){ struct prog_src_register src = fpsrc; int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ src.Swizzle = ((sc<<0)|(sc<<3)|(sc<<6)|(sc<<9)); return t_src(rp, src);}static pfs_reg_t t_dst(struct r300_fragment_program *rp, struct prog_dst_register dest) { pfs_reg_t r = undef; switch (dest.File) { case PROGRAM_TEMPORARY: r.index = dest.Index; r.valid = GL_TRUE; return r; case PROGRAM_OUTPUT: r.type = REG_TYPE_OUTPUT; switch (dest.Index) { case FRAG_RESULT_COLR: case FRAG_RESULT_DEPR: r.index = dest.Index; r.valid = GL_TRUE; return r; default: ERROR("Bad DstReg->Index 0x%x\n", dest.Index); return r; } default: ERROR("Bad DstReg->File 0x%x\n", dest.File); return r; }}static int t_hw_src(struct r300_fragment_program *rp, pfs_reg_t src, GLboolean tex){ COMPILE_STATE; int idx; switch (src.type) { case REG_TYPE_TEMP: /* NOTE: if reg==-1 here, a source is being read that * hasn't been written to. Undefined results */ if (cs->temps[src.index].reg == -1) cs->temps[src.index].reg = get_hw_temp(rp); idx = cs->temps[src.index].reg; if (!src.no_use && (--cs->temps[src.index].refcount == 0)) free_temp(rp, src); break; case REG_TYPE_INPUT: idx = cs->inputs[src.index].reg; if (!src.no_use && (--cs->inputs[src.index].refcount == 0)) free_hw_temp(rp, cs->inputs[src.index].reg); break; case REG_TYPE_CONST: return (src.index | SRC_CONST); default: ERROR("Invalid type for source reg\n"); return (0 | SRC_CONST); } if (!tex) cs->used_in_node |= (1 << idx); return idx;}static int t_hw_dst(struct r300_fragment_program *rp, pfs_reg_t dest, GLboolean tex){ COMPILE_STATE; int idx; assert(dest.valid); switch (dest.type) { case REG_TYPE_TEMP: if (cs->temps[dest.index].reg == -1) { if (!tex) cs->temps[dest.index].reg = get_hw_temp(rp); else cs->temps[dest.index].reg = get_hw_temp_tex(rp); } idx = cs->temps[dest.index].reg; if (!dest.no_use && (--cs->temps[dest.index].refcount == 0)) free_temp(rp, dest); cs->dest_in_node |= (1 << idx); cs->used_in_node |= (1 << idx); break; case REG_TYPE_OUTPUT: switch (dest.index) { case FRAG_RESULT_COLR: rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_COLOR; break; case FRAG_RESULT_DEPR: rp->node[rp->cur_node].flags |= R300_PFS_NODE_OUTPUT_DEPTH; break; } return dest.index; break; default: ERROR("invalid dest reg type %d\n", dest.type); return 0; } return idx;}static void emit_nop(struct r300_fragment_program *rp, GLuint mask, GLboolean sync){ COMPILE_STATE; if (sync) cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); if (mask & WRITEMASK_XYZ) { rp->alu.inst[cs->v_pos].inst0 = NOP_INST0; rp->alu.inst[cs->v_pos].inst1 = NOP_INST1; cs->v_pos++; } if (mask & WRITEMASK_W) { rp->alu.inst[cs->s_pos].inst2 = NOP_INST2; rp->alu.inst[cs->s_pos].inst3 = NOP_INST3; cs->s_pos++; }}static void emit_tex(struct r300_fragment_program *rp, struct prog_instruction *fpi, int opcode){ COMPILE_STATE; pfs_reg_t coord = t_src(rp, fpi->SrcReg[0]); pfs_reg_t dest = undef, rdest = undef; GLuint din = cs->dest_in_node, uin = cs->used_in_node; int unit = fpi->TexSrcUnit; int hwsrc, hwdest; /* Resolve source/dest to hardware registers */ hwsrc = t_hw_src(rp, coord, GL_TRUE); if (opcode != R300_FPITX_OP_KIL) { dest = t_dst(rp, fpi->DstReg); /* r300 doesn't seem to be able to do TEX->output reg */ if (dest.type == REG_TYPE_OUTPUT) { rdest = dest; dest = get_temp_reg_tex(rp); } hwdest = t_hw_dst(rp, dest, GL_TRUE); /* Use a temp that hasn't been used in this node, rather * than causing an indirection */ if (uin & (1 << hwdest)) { free_hw_temp(rp, hwdest); hwdest = get_hw_temp_tex(rp); cs->temps[dest.index].reg = hwdest; } } else { hwdest = 0; unit = 0; } /* Indirection if source has been written in this node, or if the * dest has been read/written in this node */ if ((coord.type != REG_TYPE_CONST && (din & (1<<hwsrc))) || (uin & (1<<hwdest))) { /* Finish off current node */ cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); if (rp->node[rp->cur_node].alu_offset == cs->v_pos) { /* No alu instructions in the node? Emit a NOP. */ emit_nop(rp, WRITEMASK_XYZW, GL_TRUE); cs->v_pos = cs->s_pos = MAX2(cs->v_pos, cs->s_pos); } rp->node[rp->cur_node].alu_end = cs->v_pos - rp->node[rp->cur_node].alu_offset - 1; assert(rp->node[rp->cur_node].alu_end >= 0); if (++rp->cur_node >= PFS_MAX_TEX_INDIRECT) { ERROR("too many levels of texture indirection\n"); return; } /* Start new node */ rp->node[rp->cur_node].tex_offset = rp->tex.length; rp->node[rp->cur_node].alu_offset = cs->v_pos; rp->node[rp->cur_node].tex_end = -1; rp->node[rp->cur_node].alu_end = -1; rp->node[rp->cur_node].flags = 0; cs->used_in_node = 0; cs->dest_in_node = 0; } if (rp->cur_node == 0) rp->first_node_has_tex = 1; rp->tex.inst[rp->tex.length++] = 0 | (hwsrc << R300_FPITX_SRC_SHIFT) | (hwdest << R300_FPITX_DST_SHIFT) | (unit << R300_FPITX_IMAGE_SHIFT) | (opcode << R300_FPITX_OPCODE_SHIFT); /* not entirely sure about this */ cs->dest_in_node |= (1 << hwdest); if (coord.type != REG_TYPE_CONST) cs->used_in_node |= (1 << hwsrc); rp->node[rp->cur_node].tex_end++; /* Copy from temp to output if needed */ if (rdest.valid) { emit_arith(rp, PFS_OP_MAD, rdest, WRITEMASK_XYZW, dest, pfs_one, pfs_zero, 0); free_temp(rp, dest); }}/* Add sources to FPI1/FPI3 lists. If source is already on list, * reuse the index instead of wasting a source. */static int add_src(struct r300_fragment_program *rp, int reg, int pos, int srcmask){ COMPILE_STATE; int csm, i; /* Look for matches */ for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) { /* If sources have been allocated in this position(s)... */ if ((cs->slot[pos].umask & csm) == csm) { /* ... and the register number(s) match, re-use the source */ if (srcmask == SLOT_VECTOR && cs->slot[pos].vsrc[i] == reg) return i; if (srcmask == SLOT_SCALAR && cs->slot[pos].ssrc[i] == reg) return i; if (srcmask == SLOT_BOTH && cs->slot[pos].vsrc[i] == reg && cs->slot[pos].ssrc[i] == reg) return i; } } /* Look for free spaces */ for (i=0,csm=srcmask; i<3; i++,csm=csm<<1) { /* If the position(s) haven't been allocated */ if ((cs->slot[pos].umask & csm) == 0) { cs->slot[pos].umask |= csm; if (srcmask & SLOT_VECTOR) cs->slot[pos].vsrc[i] = reg; if (srcmask & SLOT_SCALAR) cs->slot[pos].ssrc[i] = reg; return i; } } //ERROR("Failed to allocate sources in FPI1/FPI3!\n"); return 0;}/* Determine whether or not to position opcode in the same ALU slot for both * vector and scalar portions of an instruction. * * It's not necessary to force the first case, but it makes disassembled * shaders easier to read. */static GLboolean force_same_slot(int vop, int sop, GLboolean emit_vop, GLboolean emit_sop, int argc, pfs_reg_t *src){ int i; if (emit_vop && emit_sop) return GL_TRUE; if (emit_vop && vop == R300_FPI0_OUTC_REPL_ALPHA) return GL_TRUE; if (emit_vop) { for (i=0;i<argc;i++) if (src[i].v_swz == SWIZZLE_WZY) return GL_TRUE; } return GL_FALSE;}static void emit_arith(struct r300_fragment_program *rp, int op, pfs_reg_t dest, int mask, pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, int flags){ COMPILE_STATE; pfs_reg_t src[3] = { src0, src1, src2 }; int hwsrc[3], sswz[3], vswz[3]; int hwdest; GLboolean emit_vop = GL_FALSE, emit_sop = GL_FALSE; int vop, sop, argc; int vpos, spos; int i; vop = r300_fpop[op].v_op; sop = r300_fpop[op].s_op; argc = r300_fpop[op].argc; if ((mask & WRITEMASK_XYZ) || vop == R300_FPI0_OUTC_DP3) emit_vop = GL_TRUE; if ((mask & WRITEMASK_W) || vop == R300_FPI0_OUTC_REPL_ALPHA) emit_sop = GL_TRUE; if (dest.type == REG_TYPE_OUTPUT && dest.index == FRAG_RESULT_DEPR) emit_vop = GL_FALSE; if (force_same_slot(vop, sop, emit_vop, emit_sop, argc, src)) { vpos = spos = MAX2(cs->v_pos, cs->s_pos); } else { vpos = cs->v_pos; spos = cs->s_pos; /* Here is where we'd decide on where a safe place is to * combine this instruction with a previous one. * * This is extremely simple for now.. if a source depends * on the opposite stream, force the same instruction. */ for (i=0;i<3;i++) { if (emit_vop && (v_swiz[src[i].v_swz].flags & SLOT_SCALAR)) { vpos = spos = MAX2(vpos, spos); break; } if (emit_sop && (s_swiz[src[i].s_swz].flags & SLOT_VECTOR)) { vpos = spos = MAX2(vpos, spos); break; } } } /* - Convert src->hwsrc, record for FPI1/FPI3 * - Determine ARG parts of FPI0/FPI2, unused args are filled * with ARG_ZERO. */ for (i=0;i<3;i++) { int srcpos; if (i >= argc) { vswz[i] = R300_FPI0_ARGC_ZERO; sswz[i] = R300_FPI2_ARGA_ZERO; continue; } hwsrc[i] = t_hw_src(rp, src[i], GL_FALSE); if (emit_vop && vop != R300_FPI0_OUTC_REPL_ALPHA) { srcpos = add_src(rp, hwsrc[i], vpos, v_swiz[src[i].v_swz].flags); vswz[i] = (v_swiz[src[i].v_swz].base + (srcpos * v_swiz[src[i].v_swz].stride)) | (src[i].negate_v ? ARG_NEG : 0) | (src[i].absolute ? ARG_ABS : 0); } else vswz[i] = R300_FPI0_ARGC_ZERO; if (emit_sop) { srcpos = add_src(rp, hwsrc[i], spos, s_swiz[src[i].s_swz].flags); sswz[i] = (s_swiz[src[i].s_swz].base + (srcpos * s_swiz[src[i].s_swz].stride)) | (src[i].negate_s ? ARG_NEG : 0) | (src[i].absolute ? ARG_ABS : 0); } else sswz[i] = R300_FPI2_ARGA_ZERO; } hwdest = t_hw_dst(rp, dest, GL_FALSE); if (flags & PFS_FLAG_SAT) { vop |= R300_FPI0_OUTC_SAT; sop |= R300_FPI2_OUTA_SAT; } /* Throw the pieces together and get FPI0/1 */ rp->alu.inst[vpos].inst1 = ((cs->slot[vpos].vsrc[0] << R300_FPI1_SRC0C_SHIFT) | (cs->slot[vpos].vsrc[1] << R300_FPI1_SRC1C_SHIFT) | (cs->slot[vpos].vsrc[2] << R300_FPI1_SRC2C_SHIFT)); if (emit_vop) { rp->alu.inst[vpos].inst0 = vop | (vswz[0] << R300_FPI0_ARG0C_SHIFT) | (vswz[1] << R300_FPI0_ARG1C_SHIFT) | (vswz[2] << R300_FPI0_ARG2C_SHIFT); rp->alu.inst[vpos].inst1 |= hwdest << R300_FPI1_DSTC_SHIFT; if (dest.type == REG_TYPE_OUTPUT) { if (dest.index == FRAG_RESULT_COLR) { rp->alu.inst[vpos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { rp->alu.inst[vpos].inst1 |= (mask & WRITEMASK_XYZ) << R300_FPI1_DSTC_REG_MASK_SHIFT; } cs->v_pos = vpos+1; } else if (spos >= vpos) rp->alu.inst[spos].inst0 = NOP_INST0; /* And now FPI2/3 */ rp->alu.inst[spos].inst3 = ((cs->slot[spos].ssrc[0] << R300_FPI3_SRC0A_SHIFT) | (cs->slot[spos].ssrc[1] << R300_FPI3_SRC1A_SHIFT) | (cs->slot[spos].ssrc[2] << R300_FPI3_SRC2A_SHIFT)); if (emit_sop) { rp->alu.inst[spos].inst2 = sop | sswz[0] << R300_FPI2_ARG0A_SHIFT | sswz[1] << R300_FPI2_ARG1A_SHIFT | sswz[2] << R300_FPI2_ARG2A_SHIFT; if (mask & WRITEMASK_W) { if (dest.type == REG_TYPE_OUTPUT) { if (dest.index == FRAG_RESULT_COLR) { rp->alu.inst[spos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_OUTPUT; } else if (dest.index == FRAG_RESULT_DEPR) { rp->alu.inst[spos].inst3 |= R300_FPI3_DSTA_DEPTH; } else assert(0); } else { rp->alu.inst[spos].inst3 |= (hwdest << R300_FPI3_DSTA_SHIFT) | R300_FPI3_DSTA_REG; } } cs->s_pos = spos+1; } else if (vpos >= spos) rp->alu.inst[vpos].inst2 = NOP_INST2; return;};#if 0static pfs_reg_t get_attrib(struct r300_fragment_program *rp, GLuint attr){ struct fragment_program *mp = &rp->mesa_program; pfs_reg_t r = undef; if (!(mp->Base.InputsRead & (1<<attr))) { ERROR("Attribute %d was not provided!\n", attr); return undef; } r.type = REG_TYPE_INPUT; r.index = attr; r.valid = GL_TRUE; return r;}#endifstatic GLboolean parse_program(struct r300_fragment_program *rp){ struct fragment_program *mp = &rp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; pfs_reg_t src[3], dest, temp; int flags, mask; if (!inst || inst[0].Opcode == OPCODE_END) { ERROR("empty program?\n"); return GL_FALSE; } for (fpi=mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { if (fpi->SaturateMode == SATURATE_ZERO_ONE) flags = PFS_FLAG_SAT; else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -