t_vb_arbprogram.c

来自「mesa-6.5-minigui源码」· C语言 代码 · 共 1,571 行 · 第 1/3 页

C
1,571
字号
/* * Mesa 3-D graphics library * Version:  6.5 * * Copyright (C) 1999-2006  Brian Paul   All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//** * \file t_arb_program.c * Compile vertex programs to an intermediate representation. * Execute vertex programs over a buffer of vertices. * \author Keith Whitwell, Brian Paul */#include "glheader.h"#include "context.h"#include "imports.h"#include "macros.h"#include "mtypes.h"#include "arbprogparse.h"#include "light.h"#include "program.h"#include "math/m_matrix.h"#include "math/m_translate.h"#include "t_context.h"#include "t_pipeline.h"#include "t_vb_arbprogram.h"#include "tnl.h"#include "program_instruction.h"#define DISASSEM 0struct compilation {   GLuint reg_active;   union instruction *csr;};#define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))#define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])/* Lower precision functions for the EXP, LOG and LIT opcodes.  The * LOG2() implementation is probably not accurate enough, and the * attempted optimization for Exp2 is definitely not accurate * enough - it discards all of t's fractional bits! */static GLfloat RoughApproxLog2(GLfloat t){   return LOG2(t);}static GLfloat RoughApproxExp2(GLfloat t){   #if 0   fi_type fi;   fi.i = (GLint) t;   fi.i = (fi.i << 23) + 0x3f800000;   return fi.f;#else   return (GLfloat) _mesa_pow(2.0, t);#endif}static GLfloat RoughApproxPower(GLfloat x, GLfloat y){   if (x == 0.0 && y == 0.0)      return 1.0;  /* spec requires this */   else      return RoughApproxExp2(y * RoughApproxLog2(x));}/* Higher precision functions for the EX2, LG2 and POW opcodes: */static GLfloat ApproxLog2(GLfloat t){   return (GLfloat) (LOGF(t) * 1.442695F);}static GLfloat ApproxExp2(GLfloat t){      return (GLfloat) _mesa_pow(2.0, t);}static GLfloat ApproxPower(GLfloat x, GLfloat y){   return (GLfloat) _mesa_pow(x, y);}static GLfloat rough_approx_log2_0_1(GLfloat x){   return LOG2(x);}/** * Perform a reduced swizzle: */static void do_RSW( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.rsw.dst];   const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];   GLuint swz = op.rsw.swz;   GLuint neg = op.rsw.neg;   GLfloat tmp[4];   /* Need a temporary to be correct in the case where result == arg0.    */   COPY_4V(tmp, arg0);      result[0] = tmp[GET_RSW(swz, 0)];   result[1] = tmp[GET_RSW(swz, 1)];   result[2] = tmp[GET_RSW(swz, 2)];   result[3] = tmp[GET_RSW(swz, 3)];      if (neg) {      if (neg & 0x1) result[0] = -result[0];      if (neg & 0x2) result[1] = -result[1];      if (neg & 0x4) result[2] = -result[2];      if (neg & 0x8) result[3] = -result[3];   }}/* Used to implement write masking.  To make things easier for the sse * generator I've gone back to a 1 argument version of this function * (dst.msk = arg), rather than the semantically cleaner (dst = SEL * arg0, arg1, msk) * * That means this is the only instruction which doesn't write a full * 4 dwords out.  This would make such a program harder to analyse, * but it looks like analysis is going to take place on a higher level * anyway. */static void do_MSK( struct arb_vp_machine *m, union instruction op ){   GLfloat *dst = m->File[0][op.msk.dst];   const GLfloat *arg = m->File[op.msk.file][op.msk.idx];    if (op.msk.mask & 0x1) dst[0] = arg[0];   if (op.msk.mask & 0x2) dst[1] = arg[1];   if (op.msk.mask & 0x4) dst[2] = arg[2];   if (op.msk.mask & 0x8) dst[3] = arg[3];}static void do_PRT( struct arb_vp_machine *m, union instruction op ){   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];      _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr, 		arg0[0], arg0[1], arg0[2], arg0[3]);}/** * The traditional ALU and texturing instructions.  All operate on * internal registers and ignore write masks and swizzling issues. */static void do_ABS( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];   result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];   result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];   result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];}static void do_ADD( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = arg0[0] + arg1[0];   result[1] = arg0[1] + arg1[1];   result[2] = arg0[2] + arg1[2];   result[3] = arg0[3] + arg1[3];}static void do_DP3( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] * arg1[0] + 		arg0[1] * arg1[1] + 		arg0[2] * arg1[2]);   PUFF(result);}static void do_DP4( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] * arg1[0] + 		arg0[1] * arg1[1] + 		arg0[2] * arg1[2] + 		arg0[3] * arg1[3]);   PUFF(result);}static void do_DPH( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] * arg1[0] + 		arg0[1] * arg1[1] + 		arg0[2] * arg1[2] + 		1.0     * arg1[3]);      PUFF(result);}static void do_DST( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   /* This should be ok even if result == arg0 or result == arg1.    */   result[0] = 1.0F;   result[1] = arg0[1] * arg1[1];   result[2] = arg0[2];   result[3] = arg1[3];}/* Intended to be high precision: */static void do_EX2( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = (GLfloat)ApproxExp2(arg0[0]);   PUFF(result);}/* Allowed to be lower precision: */static void do_EXP( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   GLfloat tmp = arg0[0];   GLfloat flr_tmp = FLOORF(tmp);   GLfloat frac_tmp = tmp - flr_tmp;   result[0] = LDEXPF(1.0, (int)flr_tmp);   result[1] = frac_tmp;   result[2] = LDEXPF(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);   result[3] = 1.0F;}static void do_FLR( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = FLOORF(arg0[0]);   result[1] = FLOORF(arg0[1]);   result[2] = FLOORF(arg0[2]);   result[3] = FLOORF(arg0[3]);}static void do_FRC( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = arg0[0] - FLOORF(arg0[0]);   result[1] = arg0[1] - FLOORF(arg0[1]);   result[2] = arg0[2] - FLOORF(arg0[2]);   result[3] = arg0[3] - FLOORF(arg0[3]);}/* High precision log base 2: */static void do_LG2( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = ApproxLog2(arg0[0]);   PUFF(result);}static void do_LIT( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   GLfloat tmp[4];   tmp[0] = 1.0;   tmp[1] = arg0[0];   if (arg0[0] > 0.0) {      tmp[2] = RoughApproxPower(arg0[1], arg0[3]);   }   else {      tmp[2] = 0.0;   }   tmp[3] = 1.0;   COPY_4V(result, tmp);}/* Intended to allow a lower precision than required for LG2 above. */static void do_LOG( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   GLfloat tmp = FABSF(arg0[0]);   int exponent;   GLfloat mantissa = FREXPF(tmp, &exponent);   result[0] = (GLfloat) (exponent - 1);   result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */   result[2] = exponent + LOG2(mantissa);   result[3] = 1.0;}static void do_MAX( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];   result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];   result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];   result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];}static void do_MIN( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];   result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];   result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];   result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];}static void do_MOV( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = arg0[0];   result[1] = arg0[1];   result[2] = arg0[2];   result[3] = arg0[3];}static void do_MUL( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = arg0[0] * arg1[0];   result[1] = arg0[1] * arg1[1];   result[2] = arg0[2] * arg1[2];   result[3] = arg0[3] * arg1[3];}/* Intended to be "high" precision */static void do_POW( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]);   PUFF(result);}static void do_REL( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);   const GLfloat *arg0 = m->File[op.alu.file0][idx];   result[0] = arg0[0];   result[1] = arg0[1];   result[2] = arg0[2];   result[3] = arg0[3];}static void do_RCP( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = 1.0F / arg0[0];     PUFF(result);}static void do_RSQ( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   result[0] = INV_SQRTF(FABSF(arg0[0]));   PUFF(result);}static void do_SGE( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;   result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;   result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;   result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;}static void do_SLT( struct arb_vp_machine *m, union instruction op ){   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;   result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;   result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;   result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;}static void do_SUB( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   result[0] = arg0[0] - arg1[0];   result[1] = arg0[1] - arg1[1];   result[2] = arg0[2] - arg1[2];   result[3] = arg0[3] - arg1[3];}static void do_XPD( struct arb_vp_machine *m, union instruction op ) {   GLfloat *result = m->File[0][op.alu.dst];   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];   GLfloat tmp[3];   tmp[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];   tmp[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];   tmp[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];   /* Need a temporary to be correct in the case where result == arg0    * or result == arg1.    */   result[0] = tmp[0];   result[1] = tmp[1];   result[2] = tmp[2];}static void do_NOP( struct arb_vp_machine *m, union instruction op ) {}/* Some useful debugging functions: */static void print_mask( GLuint mask ){   _mesa_printf(".");   if (mask&0x1) _mesa_printf("x");

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?