📄 r300_fragprog.c
字号:
/* * Copyright (C) 2005 Ben Skeggs. * * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * *//* * Authors: * Ben Skeggs <darktama@iinet.net.au> *//*TODO'S * * - COS/SIN/SCS/LIT instructions * - Depth write, WPOS/FOGC inputs * - FogOption * - Negate on individual components (implement in swizzle code?) * - Verify results of opcodes for accuracy, I've only checked them * in specific cases. * - and more... */#include "glheader.h"#include "macros.h"#include "enums.h"#include "program.h"#include "program_instruction.h"#include "r300_context.h"#include "r300_fragprog.h"#include "r300_reg.h"#define PFS_INVAL 0xFFFFFFFF#define COMPILE_STATE struct r300_pfs_compile_state *cs = rp->csstatic void dump_program(struct r300_fragment_program *rp);static void emit_arith(struct r300_fragment_program *rp, int op, pfs_reg_t dest, int mask, pfs_reg_t src0, pfs_reg_t src1, pfs_reg_t src2, int flags);/*************************************** * begin: useful data structions for fragment program generation ***************************************//* description of r300 native hw instructions */static const struct { const char *name; int argc; int v_op; int s_op;} r300_fpop[] = { { "MAD", 3, R300_FPI0_OUTC_MAD, R300_FPI2_OUTA_MAD }, { "DP3", 2, R300_FPI0_OUTC_DP3, R300_FPI2_OUTA_DP4 }, { "DP4", 2, R300_FPI0_OUTC_DP4, R300_FPI2_OUTA_DP4 }, { "MIN", 2, R300_FPI0_OUTC_MIN, R300_FPI2_OUTA_MIN }, { "MAX", 2, R300_FPI0_OUTC_MAX, R300_FPI2_OUTA_MAX }, { "CMP", 3, R300_FPI0_OUTC_CMP, R300_FPI2_OUTA_CMP }, { "FRC", 1, R300_FPI0_OUTC_FRC, R300_FPI2_OUTA_FRC }, { "EX2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_EX2 }, { "LG2", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_LG2 }, { "RCP", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RCP }, { "RSQ", 1, R300_FPI0_OUTC_REPL_ALPHA, R300_FPI2_OUTA_RSQ }, { "REPL_ALPHA", 1, R300_FPI0_OUTC_REPL_ALPHA, PFS_INVAL }};#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ SWIZZLE_##y, \ SWIZZLE_##z, \ SWIZZLE_ZERO))#define SLOT_VECTOR (1<<0)#define SLOT_SCALAR (1<<3)#define SLOT_BOTH (SLOT_VECTOR|SLOT_SCALAR)/* vector swizzles r300 can support natively, with a couple of * cases we handle specially * * pfs_reg_t.v_swz/pfs_reg_t.s_swz is an index into this table **/static const struct r300_pfs_swizzle { GLuint hash; /* swizzle value this matches */ GLuint base; /* base value for hw swizzle */ GLuint stride; /* difference in base between arg0/1/2 */ GLuint flags;} v_swiz[] = {/* native swizzles */ { MAKE_SWZ3(X, Y, Z), R300_FPI0_ARGC_SRC0C_XYZ, 4, SLOT_VECTOR }, { MAKE_SWZ3(X, X, X), R300_FPI0_ARGC_SRC0C_XXX, 4, SLOT_VECTOR }, { MAKE_SWZ3(Y, Y, Y), R300_FPI0_ARGC_SRC0C_YYY, 4, SLOT_VECTOR }, { MAKE_SWZ3(Z, Z, Z), R300_FPI0_ARGC_SRC0C_ZZZ, 4, SLOT_VECTOR }, { MAKE_SWZ3(W, W, W), R300_FPI0_ARGC_SRC0A, 1, SLOT_SCALAR }, { MAKE_SWZ3(Y, Z, X), R300_FPI0_ARGC_SRC0C_YZX, 1, SLOT_VECTOR }, { MAKE_SWZ3(Z, X, Y), R300_FPI0_ARGC_SRC0C_ZXY, 1, SLOT_VECTOR }, { MAKE_SWZ3(W, Z, Y), R300_FPI0_ARGC_SRC0CA_WZY, 1, SLOT_BOTH }, { MAKE_SWZ3(ONE, ONE, ONE), R300_FPI0_ARGC_ONE, 0, 0}, { MAKE_SWZ3(ZERO, ZERO, ZERO), R300_FPI0_ARGC_ZERO, 0, 0}, { PFS_INVAL, R300_FPI0_ARGC_HALF, 0, 0}, { PFS_INVAL, 0, 0, 0},};#define SWIZZLE_XYZ 0#define SWIZZLE_XXX 1#define SWIZZLE_YYY 2#define SWIZZLE_ZZZ 3#define SWIZZLE_WWW 4#define SWIZZLE_YZX 5#define SWIZZLE_ZXY 6#define SWIZZLE_WZY 7#define SWIZZLE_111 8#define SWIZZLE_000 9#define SWIZZLE_HHH 10#define SWZ_X_MASK (7 << 0)#define SWZ_Y_MASK (7 << 3)#define SWZ_Z_MASK (7 << 6)#define SWZ_W_MASK (7 << 9)/* used during matching of non-native swizzles */static const struct { GLuint hash; /* used to mask matching swizzle components */ int mask; /* actual outmask */ int count; /* count of components matched */} s_mask[] = { { SWZ_X_MASK|SWZ_Y_MASK|SWZ_Z_MASK, 1|2|4, 3}, { SWZ_X_MASK|SWZ_Y_MASK, 1|2, 2}, { SWZ_X_MASK|SWZ_Z_MASK, 1|4, 2}, { SWZ_Y_MASK|SWZ_Z_MASK, 2|4, 2}, { SWZ_X_MASK, 1, 1}, { SWZ_Y_MASK, 2, 1}, { SWZ_Z_MASK, 4, 1}, { PFS_INVAL, PFS_INVAL, PFS_INVAL}};/* mapping from SWIZZLE_* to r300 native values for scalar insns */static const struct { int base; /* hw value of swizzle */ int stride; /* difference between SRC0/1/2 */ GLuint flags;} s_swiz[] = { { R300_FPI2_ARGA_SRC0C_X, 3, SLOT_VECTOR }, { R300_FPI2_ARGA_SRC0C_Y, 3, SLOT_VECTOR }, { R300_FPI2_ARGA_SRC0C_Z, 3, SLOT_VECTOR }, { R300_FPI2_ARGA_SRC0A , 1, SLOT_SCALAR }, { R300_FPI2_ARGA_ZERO , 0, 0 }, { R300_FPI2_ARGA_ONE , 0, 0 }, { R300_FPI2_ARGA_HALF , 0, 0 }};#define SWIZZLE_HALF 6/* boiler-plate reg, for convenience */static const pfs_reg_t undef = { type: REG_TYPE_TEMP, index: 0, v_swz: SWIZZLE_XYZ, s_swz: SWIZZLE_W, negate_v: 0, negate_s: 0, absolute: 0, no_use: GL_FALSE, valid: GL_FALSE};/* constant zero source */static const pfs_reg_t pfs_one = { type: REG_TYPE_CONST, index: 0, v_swz: SWIZZLE_111, s_swz: SWIZZLE_ONE, valid: GL_TRUE};/* constant one source */static const pfs_reg_t pfs_zero = { type: REG_TYPE_CONST, index: 0, v_swz: SWIZZLE_000, s_swz: SWIZZLE_ZERO, valid: GL_TRUE};/*************************************** * end: data structures ***************************************/#define ERROR(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n",\ __FILE__, __func__, ##args); \ rp->error = GL_TRUE; \} while(0)static int get_hw_temp(struct r300_fragment_program *rp){ COMPILE_STATE; int r = ffs(~cs->hwreg_in_use); if (!r) { ERROR("Out of hardware temps\n"); return 0; } cs->hwreg_in_use |= (1 << --r); if (r > rp->max_temp_idx) rp->max_temp_idx = r; return r;}static int get_hw_temp_tex(struct r300_fragment_program *rp){ COMPILE_STATE; int r; r = ffs(~(cs->hwreg_in_use | cs->used_in_node)); if (!r) return get_hw_temp(rp); /* Will cause an indirection */ cs->hwreg_in_use |= (1 << --r); if (r > rp->max_temp_idx) rp->max_temp_idx = r; return r;}static void free_hw_temp(struct r300_fragment_program *rp, int idx){ COMPILE_STATE; cs->hwreg_in_use &= ~(1<<idx);}static pfs_reg_t get_temp_reg(struct r300_fragment_program *rp){ COMPILE_STATE; pfs_reg_t r = undef; r.index = ffs(~cs->temp_in_use); if (!r.index) { ERROR("Out of program temps\n"); return r; } cs->temp_in_use |= (1 << --r.index); cs->temps[r.index].refcount = 0xFFFFFFFF; cs->temps[r.index].reg = -1; r.valid = GL_TRUE; return r;}static pfs_reg_t get_temp_reg_tex(struct r300_fragment_program *rp){ COMPILE_STATE; pfs_reg_t r = undef; r.index = ffs(~cs->temp_in_use); if (!r.index) { ERROR("Out of program temps\n"); return r; } cs->temp_in_use |= (1 << --r.index); cs->temps[r.index].refcount = 0xFFFFFFFF; cs->temps[r.index].reg = get_hw_temp_tex(rp); r.valid = GL_TRUE; return r;}static void free_temp(struct r300_fragment_program *rp, pfs_reg_t r){ COMPILE_STATE; if (!(cs->temp_in_use & (1<<r.index))) return; if (r.type == REG_TYPE_TEMP) { free_hw_temp(rp, cs->temps[r.index].reg); cs->temps[r.index].reg = -1; cs->temp_in_use &= ~(1<<r.index); } else if (r.type == REG_TYPE_INPUT) { free_hw_temp(rp, cs->inputs[r.index].reg); cs->inputs[r.index].reg = -1; }}static pfs_reg_t emit_param4fv(struct r300_fragment_program *rp, GLfloat *values){ pfs_reg_t r = undef; r.type = REG_TYPE_CONST; int pidx; pidx = rp->param_nr++; r.index = rp->const_nr++; if (pidx >= PFS_NUM_CONST_REGS || r.index >= PFS_NUM_CONST_REGS) { ERROR("Out of const/param slots!\n"); return r; } rp->param[pidx].idx = r.index; rp->param[pidx].values = values; rp->params_uptodate = GL_FALSE; r.valid = GL_TRUE; return r;}#if 0static pfs_reg_t emit_const4fv(struct r300_fragment_program *rp, GLfloat *cp){ pfs_reg_t r = undef; r.type = REG_TYPE_CONST; r.index = rp->const_nr++; if (r.index >= PFS_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return r; } COPY_4V(rp->constant[r.index], cp); r.valid = GL_TRUE; return r;}#endifstatic __inline pfs_reg_t negate(pfs_reg_t r){ r.negate_v = 1; r.negate_s = 1; return r;}/* Hack, to prevent clobbering sources used multiple times when * emulating non-native instructions */static __inline pfs_reg_t keep(pfs_reg_t r){ r.no_use = GL_TRUE; return r;}static __inline pfs_reg_t absolute(pfs_reg_t r){ r.absolute = 1; return r;}static int swz_native(struct r300_fragment_program *rp, pfs_reg_t src, pfs_reg_t *r, GLuint arbneg){ /* Native swizzle, nothing to see here */ src.negate_s = (arbneg >> 3) & 1; if ((arbneg & 0x7) == 0x0) { src.negate_v = 0; *r = src; } else if ((arbneg & 0x7) == 0x7) { src.negate_v = 1; *r = src; } else { if (!r->valid) *r = get_temp_reg(rp); src.negate_v = 1; emit_arith(rp, PFS_OP_MAD, *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); src.negate_v = 0; emit_arith(rp, PFS_OP_MAD, *r, (arbneg ^ 0x7) | WRITEMASK_W, src, pfs_one, pfs_zero, 0); } return 3;}static int swz_emit_partial(struct r300_fragment_program *rp, pfs_reg_t src, pfs_reg_t *r, int mask, int mc, GLuint arbneg){ GLuint tmp; GLuint wmask = 0; if (!r->valid) *r = get_temp_reg(rp); /* A partial match, src.v_swz/mask define what parts of the * desired swizzle we match */ if (mc + s_mask[mask].count == 3) { wmask = WRITEMASK_W; src.negate_s = (arbneg >> 3) & 1; } tmp = arbneg & s_mask[mask].mask; if (tmp) { tmp = tmp ^ s_mask[mask].mask; if (tmp) { src.negate_v = 1; emit_arith(rp, PFS_OP_MAD, *r, arbneg & s_mask[mask].mask, keep(src), pfs_one, pfs_zero, 0); src.negate_v = 0; if (!wmask) src.no_use = GL_TRUE; else src.no_use = GL_FALSE; emit_arith(rp, PFS_OP_MAD, *r, tmp | wmask, src, pfs_one, pfs_zero, 0); } else { src.negate_v = 1; if (!wmask) src.no_use = GL_TRUE; else src.no_use = GL_FALSE; emit_arith(rp, PFS_OP_MAD, *r, (arbneg & s_mask[mask].mask) | wmask, src, pfs_one, pfs_zero, 0); src.negate_v = 0; } } else { if (!wmask) src.no_use = GL_TRUE; else src.no_use = GL_FALSE; emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask | wmask, src, pfs_one, pfs_zero, 0); } return s_mask[mask].count;}#define swizzle(r, x, y, z, w) do_swizzle(rp, r, \ ((SWIZZLE_##x<<0)| \ (SWIZZLE_##y<<3)| \ (SWIZZLE_##z<<6)| \ (SWIZZLE_##w<<9)), \ 0)static pfs_reg_t do_swizzle(struct r300_fragment_program *rp, pfs_reg_t src, GLuint arbswz, GLuint arbneg){ pfs_reg_t r = undef; int c_mask = 0; int v_matched = 0; /* If swizzling from something without an XYZW native swizzle, * emit result to a temp, and do new swizzle from the temp. */ if (src.v_swz != SWIZZLE_XYZ || src.s_swz != SWIZZLE_W) { pfs_reg_t temp = get_temp_reg(rp); emit_arith(rp, PFS_OP_MAD, temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); src = temp; } src.s_swz = GET_SWZ(arbswz, 3); do { do {#define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash) if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) { if (s_mask[c_mask].count == 3) v_matched += swz_native(rp, src, &r, arbneg); else v_matched += swz_emit_partial(rp, src, &r, c_mask, v_matched, arbneg); if (v_matched == 3) return r; /* Fill with something invalid.. all 0's was * wrong before, matched SWIZZLE_X. So all * 1's will be okay for now */ arbswz |= (PFS_INVAL & s_mask[c_mask].hash); } } while(v_swiz[++src.v_swz].hash != PFS_INVAL); src.v_swz = SWIZZLE_XYZ; } while (s_mask[++c_mask].hash != PFS_INVAL); ERROR("should NEVER get here\n"); return r;} static pfs_reg_t t_src(struct r300_fragment_program *rp, struct prog_src_register fpsrc){ pfs_reg_t r = undef;#if 0 pfs_reg_t n = undef;#endif switch (fpsrc.File) { case PROGRAM_TEMPORARY: r.index = fpsrc.Index; r.valid = GL_TRUE; break; case PROGRAM_INPUT: r.index = fpsrc.Index; r.type = REG_TYPE_INPUT; r.valid = GL_TRUE; break; case PROGRAM_LOCAL_PARAM: r = emit_param4fv(rp, rp->mesa_program.Base.LocalParams[fpsrc.Index]); break; case PROGRAM_ENV_PARAM: r = emit_param4fv(rp, rp->ctx->FragmentProgram.Parameters[fpsrc.Index]); break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: r = emit_param4fv(rp, rp->mesa_program.Base.Parameters->ParameterValues[fpsrc.Index]); break; default:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -