📄 t_vb_arbprogram.c
字号:
/*
* Mesa 3-D graphics library
* Version: 6.3
*
* Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file t_arb_program.c
* Compile vertex programs to an intermediate representation.
* Execute vertex programs over a buffer of vertices.
* \author Keith Whitwell, Brian Paul
*/
#include "glheader.h"
#include "context.h"
#include "imports.h"
#include "macros.h"
#include "mtypes.h"
#include "arbprogparse.h"
#include "light.h"
#include "program.h"
#include "math/m_matrix.h"
#include "math/m_translate.h"
#include "t_context.h"
#include "t_pipeline.h"
#include "t_vb_arbprogram.h"
#define DISASSEM 0
/*--------------------------------------------------------------------------- */
struct opcode_info {
GLuint nr_args;
const char *string;
void (*print)( union instruction , const struct opcode_info * );
};
struct compilation {
GLuint reg_active;
union instruction *csr;
};
#define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
#define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
/* Lower precision functions for the EXP, LOG and LIT opcodes. The
* LOG2() implementation is probably not accurate enough, and the
* attempted optimization for Exp2 is definitely not accurate
* enough - it discards all of t's fractional bits!
*/
static GLfloat RoughApproxLog2(GLfloat t)
{
return LOG2(t);
}
static GLfloat RoughApproxExp2(GLfloat t)
{
#if 0
fi_type fi;
fi.i = (GLint) t;
fi.i = (fi.i << 23) + 0x3f800000;
return fi.f;
#else
return (GLfloat) _mesa_pow(2.0, t);
#endif
}
static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
{
return RoughApproxExp2(y * RoughApproxLog2(x));
}
/* Higher precision functions for the EX2, LG2 and POW opcodes:
*/
static GLfloat ApproxLog2(GLfloat t)
{
return (GLfloat) (log(t) * 1.442695F);
}
static GLfloat ApproxExp2(GLfloat t)
{
return (GLfloat) _mesa_pow(2.0, t);
}
static GLfloat ApproxPower(GLfloat x, GLfloat y)
{
return (GLfloat) _mesa_pow(x, y);
}
static GLfloat rough_approx_log2_0_1(GLfloat x)
{
return LOG2(x);
}
/**
* Perform a reduced swizzle:
*/
static void do_RSW( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.rsw.dst];
const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
GLuint swz = op.rsw.swz;
GLuint neg = op.rsw.neg;
result[0] = arg0[GET_RSW(swz, 0)];
result[1] = arg0[GET_RSW(swz, 1)];
result[2] = arg0[GET_RSW(swz, 2)];
result[3] = arg0[GET_RSW(swz, 3)];
if (neg) {
if (neg & 0x1) result[0] = -result[0];
if (neg & 0x2) result[1] = -result[1];
if (neg & 0x4) result[2] = -result[2];
if (neg & 0x8) result[3] = -result[3];
}
}
/* Used to implement write masking. To make things easier for the sse
* generator I've gone back to a 1 argument version of this function
* (dst.msk = arg), rather than the semantically cleaner (dst = SEL
* arg0, arg1, msk)
*
* That means this is the only instruction which doesn't write a full
* 4 dwords out. This would make such a program harder to analyse,
* but it looks like analysis is going to take place on a higher level
* anyway.
*/
static void do_MSK( struct arb_vp_machine *m, union instruction op )
{
GLfloat *dst = m->File[0][op.msk.dst];
const GLfloat *arg = m->File[op.msk.file][op.msk.idx];
if (op.msk.mask & 0x1) dst[0] = arg[0];
if (op.msk.mask & 0x2) dst[1] = arg[1];
if (op.msk.mask & 0x4) dst[2] = arg[2];
if (op.msk.mask & 0x8) dst[3] = arg[3];
}
static void do_PRT( struct arb_vp_machine *m, union instruction op )
{
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
_mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
arg0[0], arg0[1], arg0[2], arg0[3]);
}
/**
* The traditional ALU and texturing instructions. All operate on
* internal registers and ignore write masks and swizzling issues.
*/
static void do_ABS( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
}
static void do_ADD( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[0] + arg1[0];
result[1] = arg0[1] + arg1[1];
result[2] = arg0[2] + arg1[2];
result[3] = arg0[3] + arg1[3];
}
static void do_DP3( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] * arg1[0] +
arg0[1] * arg1[1] +
arg0[2] * arg1[2]);
PUFF(result);
}
static void do_DP4( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] * arg1[0] +
arg0[1] * arg1[1] +
arg0[2] * arg1[2] +
arg0[3] * arg1[3]);
PUFF(result);
}
static void do_DPH( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] * arg1[0] +
arg0[1] * arg1[1] +
arg0[2] * arg1[2] +
1.0 * arg1[3]);
PUFF(result);
}
static void do_DST( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = 1.0F;
result[1] = arg0[1] * arg1[1];
result[2] = arg0[2];
result[3] = arg1[3];
}
/* Intended to be high precision:
*/
static void do_EX2( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = (GLfloat)ApproxExp2(arg0[0]);
PUFF(result);
}
/* Allowed to be lower precision:
*/
static void do_EXP( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp = arg0[0];
GLfloat flr_tmp = FLOORF(tmp);
GLfloat frac_tmp = tmp - flr_tmp;
result[0] = LDEXPF(1.0, (int)flr_tmp);
result[1] = frac_tmp;
result[2] = LDEXPF(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);
result[3] = 1.0F;
}
static void do_FLR( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = FLOORF(arg0[0]);
result[1] = FLOORF(arg0[1]);
result[2] = FLOORF(arg0[2]);
result[3] = FLOORF(arg0[3]);
}
static void do_FRC( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = arg0[0] - FLOORF(arg0[0]);
result[1] = arg0[1] - FLOORF(arg0[1]);
result[2] = arg0[2] - FLOORF(arg0[2]);
result[3] = arg0[3] - FLOORF(arg0[3]);
}
/* High precision log base 2:
*/
static void do_LG2( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = ApproxLog2(arg0[0]);
PUFF(result);
}
static void do_LIT( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp[4];
tmp[0] = 1.0;
tmp[1] = 0.0;
tmp[2] = 0.0;
tmp[3] = 1.0;
if (arg0[0] > 0.0) {
tmp[1] = arg0[0];
if (arg0[1] > 0.0) {
tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
}
}
COPY_4V(result, tmp);
}
/* Intended to allow a lower precision than required for LG2 above.
*/
static void do_LOG( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
GLfloat tmp = FABSF(arg0[0]);
int exponent;
GLfloat mantissa = FREXPF(tmp, &exponent);
result[0] = (GLfloat) (exponent - 1);
result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
result[2] = exponent + LOG2(mantissa);
result[3] = 1.0;
}
static void do_MAX( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
}
static void do_MIN( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
}
static void do_MOV( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = arg0[0];
result[1] = arg0[1];
result[2] = arg0[2];
result[3] = arg0[3];
}
static void do_MUL( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[0] * arg1[0];
result[1] = arg0[1] * arg1[1];
result[2] = arg0[2] * arg1[2];
result[3] = arg0[3] * arg1[3];
}
/* Intended to be "high" precision
*/
static void do_POW( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]);
PUFF(result);
}
static void do_REL( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);
const GLfloat *arg0 = m->File[op.alu.file0][idx];
result[0] = arg0[0];
result[1] = arg0[1];
result[2] = arg0[2];
result[3] = arg0[3];
}
static void do_RCP( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = 1.0F / arg0[0];
PUFF(result);
}
static void do_RSQ( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
result[0] = INV_SQRTF(FABSF(arg0[0]));
PUFF(result);
}
static void do_SGE( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
}
static void do_SLT( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
}
static void do_SUB( struct arb_vp_machine *m, union instruction op )
{
GLfloat *result = m->File[0][op.alu.dst];
const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
result[0] = arg0[0] - arg1[0];
result[1] = arg0[1] - arg1[1];
result[2] = arg0[2] - arg1[2];
result[3] = arg0[3] - arg1[3];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -