📄 brw_vs_emit.c
字号:
tmp_d, brw_imm_d(23)); release_tmp(c, tmp); } if (dst.dw1.bits.writemask & WRITEMASK_Y) { /* result[1] = arg0.x - floor(arg0.x) */ brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0)); } if (dst.dw1.bits.writemask & WRITEMASK_Z) { /* As with the LOG instruction, we might be better off just * doing a taylor expansion here, seeing as we have to do all * the prep work. * * If mathbox partial precision is too low, consider also: * result[3] = result[0] * EXP(result[1]) */ emit_math1(c, BRW_MATH_FUNCTION_EXP, brw_writemask(dst, WRITEMASK_Z), brw_swizzle1(arg0, 0), BRW_MATH_PRECISION_PARTIAL); } if (dst.dw1.bits.writemask & WRITEMASK_W) { /* result[3] = 1.0; */ brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1)); }}static void emit_log_noalias( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ){ struct brw_compile *p = &c->func; struct brw_reg tmp = dst; struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || dst.file != BRW_GENERAL_REGISTER_FILE); if (need_tmp) { tmp = get_tmp(c); tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); } /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt * according to spec: * * These almost look likey they could be joined up, but not really * practical: * * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 * result[1].i = (x.i & ((1<<23)-1) + (127<<23) */ if (dst.dw1.bits.writemask & WRITEMASK_XZ) { brw_AND(p, brw_writemask(tmp_ud, WRITEMASK_X), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1U<<31)-1)); brw_SHR(p, brw_writemask(tmp_ud, WRITEMASK_X), tmp_ud, brw_imm_ud(23)); brw_ADD(p, brw_writemask(tmp, WRITEMASK_X), retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ brw_imm_d(-127)); } if (dst.dw1.bits.writemask & WRITEMASK_YZ) { brw_AND(p, brw_writemask(tmp_ud, WRITEMASK_Y), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1<<23)-1)); brw_OR(p, brw_writemask(tmp_ud, WRITEMASK_Y), tmp_ud, brw_imm_ud(127<<23)); } if (dst.dw1.bits.writemask & WRITEMASK_Z) { /* result[2] = result[0] + LOG2(result[1]); */ /* Why bother? The above is just a hint how to do this with a * taylor series. Maybe we *should* use a taylor series as by * the time all the above has been done it's almost certainly * quicker than calling the mathbox, even with low precision. * * Options are: * - result[0] + mathbox.LOG2(result[1]) * - mathbox.LOG2(arg0.x) * - result[0] + inline_taylor_approx(result[1]) */ emit_math1(c, BRW_MATH_FUNCTION_LOG, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(tmp, 1), BRW_MATH_PRECISION_FULL); brw_ADD(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(tmp, 2), brw_swizzle1(tmp, 0)); } if (dst.dw1.bits.writemask & WRITEMASK_W) { /* result[3] = 1.0; */ brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1)); } if (need_tmp) { brw_MOV(p, dst, tmp); release_tmp(c, tmp); }} /* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 */static void emit_dst_noalias( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0, struct brw_reg arg1){ struct brw_compile *p = &c->func; /* There must be a better way to do this: */ if (dst.dw1.bits.writemask & WRITEMASK_X) brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0)); if (dst.dw1.bits.writemask & WRITEMASK_Y) brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1); if (dst.dw1.bits.writemask & WRITEMASK_Z) brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0); if (dst.dw1.bits.writemask & WRITEMASK_W) brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);}static void emit_xpd( struct brw_compile *p, struct brw_reg dst, struct brw_reg t, struct brw_reg u){ brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));}static void emit_lit_noalias( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ){ struct brw_compile *p = &c->func; struct brw_instruction *if_insn; struct brw_reg tmp = dst; GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); if (need_tmp) tmp = get_tmp(c); brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order * to get all channels active inside the IF. In the clipping code * we run with NoMask, so it's not an option and we can use * BRW_EXECUTE_1 for all comparisions. */ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); if_insn = brw_IF(p, BRW_EXECUTE_8); { brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); emit_math2(c, BRW_MATH_FUNCTION_POW, brw_writemask(dst, WRITEMASK_Z), brw_swizzle1(tmp, 2), brw_swizzle1(arg0, 3), BRW_MATH_PRECISION_PARTIAL); } brw_ENDIF(p, if_insn);}/* TODO: relative addressing! */static struct brw_reg get_reg( struct brw_vs_compile *c, GLuint file, GLuint index ){ switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: assert(c->regs[file][index].nr != 0); return c->regs[file][index]; case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: case PROGRAM_UNIFORM: assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); return c->regs[PROGRAM_STATE_VAR][index]; case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; case PROGRAM_UNDEFINED: /* undef values */ return brw_null_reg(); case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_WRITE_ONLY: default: assert(0); return brw_null_reg(); }}static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset){ struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); { brw_push_insn_state(p); brw_set_access_mode(p, BRW_ALIGN_1); /* This is pretty clunky - load the address register twice and * fetch each 4-dword value in turn. There must be a way to do * this in a single pass, but I couldn't get it to work. */ brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); brw_MOV(p, tmp, indirect); brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); brw_MOV(p, suboffset(tmp, 4), indirect); brw_pop_insn_state(p); } return vec8(tmp);}static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ){ struct brw_compile *p = &c->func; struct brw_reg tmp = dst; GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); if (need_tmp) tmp = get_tmp(c); brw_RNDD(p, tmp, arg0); brw_MUL(p, dst, tmp, brw_imm_d(16)); if (need_tmp) release_tmp(c, tmp);}/* Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */static struct brw_reg get_arg( struct brw_vs_compile *c, struct prog_src_register *src ){ struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); if (src->RelAddr) reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); else reg = get_reg(c, src->File, src->Index); /* Convert 3-bit swizzle to 2-bit. */ reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), GET_SWZ(src->Swizzle, 1), GET_SWZ(src->Swizzle, 2), GET_SWZ(src->Swizzle, 3)); /* Note this is ok for non-swizzle instructions: */ reg.negate = src->NegateBase ? 1 : 0; return reg;}static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ){ struct brw_reg reg = get_reg(c, dst.File, dst.Index); reg.dw1.bits.writemask = dst.WriteMask; return reg;}static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, struct prog_src_register src ){ struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; GLuint src_mask = 0; GLubyte src_swz[4]; GLboolean need_tmp = (src.NegateBase && dst.file != BRW_GENERAL_REGISTER_FILE); struct brw_reg tmp = dst; GLuint i; if (need_tmp) tmp = get_tmp(c); for (i = 0; i < 4; i++) { if (dst.dw1.bits.writemask & (1<<i)) { GLubyte s = GET_SWZ(src.Swizzle, i); switch (s) { case SWIZZLE_X: case SWIZZLE_Y: case SWIZZLE_Z: case SWIZZLE_W: src_mask |= 1<<i; src_swz[i] = s; break; case SWIZZLE_ZERO: zeros_mask |= 1<<i; break; case SWIZZLE_ONE: ones_mask |= 1<<i; break; } } } /* Do src first, in case dst aliases src: */ if (src_mask) { struct brw_reg arg0; if (src.RelAddr) arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); else arg0 = get_reg(c, src.File, src.Index); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], src_swz[2], src_swz[3]); brw_MOV(p, brw_writemask(tmp, src_mask), arg0); } if (zeros_mask) brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); if (ones_mask) brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); if (src.NegateBase) brw_MOV(p, brw_writemask(tmp, src.NegateBase), negate(tmp)); if (need_tmp) { brw_MOV(p, dst, tmp); release_tmp(c, tmp); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -