t_vb_arbprogram.c
来自「mesa-6.5-minigui源码」· C语言 代码 · 共 1,571 行 · 第 1/3 页
C
1,571 行
/* * Mesa 3-D graphics library * Version: 6.5 * * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//** * \file t_arb_program.c * Compile vertex programs to an intermediate representation. * Execute vertex programs over a buffer of vertices. * \author Keith Whitwell, Brian Paul */#include "glheader.h"#include "context.h"#include "imports.h"#include "macros.h"#include "mtypes.h"#include "arbprogparse.h"#include "light.h"#include "program.h"#include "math/m_matrix.h"#include "math/m_translate.h"#include "t_context.h"#include "t_pipeline.h"#include "t_vb_arbprogram.h"#include "tnl.h"#include "program_instruction.h"#define DISASSEM 0struct compilation { GLuint reg_active; union instruction *csr;};#define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))#define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])/* Lower precision functions for the EXP, LOG and LIT opcodes. The * LOG2() implementation is probably not accurate enough, and the * attempted optimization for Exp2 is definitely not accurate * enough - it discards all of t's fractional bits! */static GLfloat RoughApproxLog2(GLfloat t){ return LOG2(t);}static GLfloat RoughApproxExp2(GLfloat t){ #if 0 fi_type fi; fi.i = (GLint) t; fi.i = (fi.i << 23) + 0x3f800000; return fi.f;#else return (GLfloat) _mesa_pow(2.0, t);#endif}static GLfloat RoughApproxPower(GLfloat x, GLfloat y){ if (x == 0.0 && y == 0.0) return 1.0; /* spec requires this */ else return RoughApproxExp2(y * RoughApproxLog2(x));}/* Higher precision functions for the EX2, LG2 and POW opcodes: */static GLfloat ApproxLog2(GLfloat t){ return (GLfloat) (LOGF(t) * 1.442695F);}static GLfloat ApproxExp2(GLfloat t){ return (GLfloat) _mesa_pow(2.0, t);}static GLfloat ApproxPower(GLfloat x, GLfloat y){ return (GLfloat) _mesa_pow(x, y);}static GLfloat rough_approx_log2_0_1(GLfloat x){ return LOG2(x);}/** * Perform a reduced swizzle: */static void do_RSW( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.rsw.dst]; const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0]; GLuint swz = op.rsw.swz; GLuint neg = op.rsw.neg; GLfloat tmp[4]; /* Need a temporary to be correct in the case where result == arg0. */ COPY_4V(tmp, arg0); result[0] = tmp[GET_RSW(swz, 0)]; result[1] = tmp[GET_RSW(swz, 1)]; result[2] = tmp[GET_RSW(swz, 2)]; result[3] = tmp[GET_RSW(swz, 3)]; if (neg) { if (neg & 0x1) result[0] = -result[0]; if (neg & 0x2) result[1] = -result[1]; if (neg & 0x4) result[2] = -result[2]; if (neg & 0x8) result[3] = -result[3]; }}/* Used to implement write masking. To make things easier for the sse * generator I've gone back to a 1 argument version of this function * (dst.msk = arg), rather than the semantically cleaner (dst = SEL * arg0, arg1, msk) * * That means this is the only instruction which doesn't write a full * 4 dwords out. This would make such a program harder to analyse, * but it looks like analysis is going to take place on a higher level * anyway. */static void do_MSK( struct arb_vp_machine *m, union instruction op ){ GLfloat *dst = m->File[0][op.msk.dst]; const GLfloat *arg = m->File[op.msk.file][op.msk.idx]; if (op.msk.mask & 0x1) dst[0] = arg[0]; if (op.msk.mask & 0x2) dst[1] = arg[1]; if (op.msk.mask & 0x4) dst[2] = arg[2]; if (op.msk.mask & 0x8) dst[3] = arg[3];}static void do_PRT( struct arb_vp_machine *m, union instruction op ){ const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr, arg0[0], arg0[1], arg0[2], arg0[3]);}/** * The traditional ALU and texturing instructions. All operate on * internal registers and ignore write masks and swizzling issues. */static void do_ABS( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0]; result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1]; result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2]; result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];}static void do_ADD( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[0] + arg1[0]; result[1] = arg0[1] + arg1[1]; result[2] = arg0[2] + arg1[2]; result[3] = arg0[3] + arg1[3];}static void do_DP3( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] * arg1[0] + arg0[1] * arg1[1] + arg0[2] * arg1[2]); PUFF(result);}static void do_DP4( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] * arg1[0] + arg0[1] * arg1[1] + arg0[2] * arg1[2] + arg0[3] * arg1[3]); PUFF(result);}static void do_DPH( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] * arg1[0] + arg0[1] * arg1[1] + arg0[2] * arg1[2] + 1.0 * arg1[3]); PUFF(result);}static void do_DST( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; /* This should be ok even if result == arg0 or result == arg1. */ result[0] = 1.0F; result[1] = arg0[1] * arg1[1]; result[2] = arg0[2]; result[3] = arg1[3];}/* Intended to be high precision: */static void do_EX2( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = (GLfloat)ApproxExp2(arg0[0]); PUFF(result);}/* Allowed to be lower precision: */static void do_EXP( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; GLfloat tmp = arg0[0]; GLfloat flr_tmp = FLOORF(tmp); GLfloat frac_tmp = tmp - flr_tmp; result[0] = LDEXPF(1.0, (int)flr_tmp); result[1] = frac_tmp; result[2] = LDEXPF(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp); result[3] = 1.0F;}static void do_FLR( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = FLOORF(arg0[0]); result[1] = FLOORF(arg0[1]); result[2] = FLOORF(arg0[2]); result[3] = FLOORF(arg0[3]);}static void do_FRC( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = arg0[0] - FLOORF(arg0[0]); result[1] = arg0[1] - FLOORF(arg0[1]); result[2] = arg0[2] - FLOORF(arg0[2]); result[3] = arg0[3] - FLOORF(arg0[3]);}/* High precision log base 2: */static void do_LG2( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = ApproxLog2(arg0[0]); PUFF(result);}static void do_LIT( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; GLfloat tmp[4]; tmp[0] = 1.0; tmp[1] = arg0[0]; if (arg0[0] > 0.0) { tmp[2] = RoughApproxPower(arg0[1], arg0[3]); } else { tmp[2] = 0.0; } tmp[3] = 1.0; COPY_4V(result, tmp);}/* Intended to allow a lower precision than required for LG2 above. */static void do_LOG( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; GLfloat tmp = FABSF(arg0[0]); int exponent; GLfloat mantissa = FREXPF(tmp, &exponent); result[0] = (GLfloat) (exponent - 1); result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */ result[2] = exponent + LOG2(mantissa); result[3] = 1.0;}static void do_MAX( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0]; result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1]; result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2]; result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];}static void do_MIN( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0]; result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1]; result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2]; result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];}static void do_MOV( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = arg0[0]; result[1] = arg0[1]; result[2] = arg0[2]; result[3] = arg0[3];}static void do_MUL( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[0] * arg1[0]; result[1] = arg0[1] * arg1[1]; result[2] = arg0[2] * arg1[2]; result[3] = arg0[3] * arg1[3];}/* Intended to be "high" precision */static void do_POW( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]); PUFF(result);}static void do_REL( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1); const GLfloat *arg0 = m->File[op.alu.file0][idx]; result[0] = arg0[0]; result[1] = arg0[1]; result[2] = arg0[2]; result[3] = arg0[3];}static void do_RCP( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = 1.0F / arg0[0]; PUFF(result);}static void do_RSQ( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; result[0] = INV_SQRTF(FABSF(arg0[0])); PUFF(result);}static void do_SGE( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F; result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F; result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F; result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;}static void do_SLT( struct arb_vp_machine *m, union instruction op ){ GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F; result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F; result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F; result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;}static void do_SUB( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; result[0] = arg0[0] - arg1[0]; result[1] = arg0[1] - arg1[1]; result[2] = arg0[2] - arg1[2]; result[3] = arg0[3] - arg1[3];}static void do_XPD( struct arb_vp_machine *m, union instruction op ) { GLfloat *result = m->File[0][op.alu.dst]; const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0]; const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1]; GLfloat tmp[3]; tmp[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; tmp[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; tmp[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; /* Need a temporary to be correct in the case where result == arg0 * or result == arg1. */ result[0] = tmp[0]; result[1] = tmp[1]; result[2] = tmp[2];}static void do_NOP( struct arb_vp_machine *m, union instruction op ) {}/* Some useful debugging functions: */static void print_mask( GLuint mask ){ _mesa_printf("."); if (mask&0x1) _mesa_printf("x");
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?