📄 nvvertexec.c
字号:
dst[3] = u[3];
store_vector4( &inst->DstReg, state, dst );
}
break;
case VP_OPCODE_MIN:
{
GLfloat t[4], u[4], min[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
min[0] = (t[0] < u[0]) ? t[0] : u[0];
min[1] = (t[1] < u[1]) ? t[1] : u[1];
min[2] = (t[2] < u[2]) ? t[2] : u[2];
min[3] = (t[3] < u[3]) ? t[3] : u[3];
store_vector4( &inst->DstReg, state, min );
}
break;
case VP_OPCODE_MAX:
{
GLfloat t[4], u[4], max[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
max[0] = (t[0] > u[0]) ? t[0] : u[0];
max[1] = (t[1] > u[1]) ? t[1] : u[1];
max[2] = (t[2] > u[2]) ? t[2] : u[2];
max[3] = (t[3] > u[3]) ? t[3] : u[3];
store_vector4( &inst->DstReg, state, max );
}
break;
case VP_OPCODE_SLT:
{
GLfloat t[4], u[4], slt[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
slt[0] = (t[0] < u[0]) ? 1.0F : 0.0F;
slt[1] = (t[1] < u[1]) ? 1.0F : 0.0F;
slt[2] = (t[2] < u[2]) ? 1.0F : 0.0F;
slt[3] = (t[3] < u[3]) ? 1.0F : 0.0F;
store_vector4( &inst->DstReg, state, slt );
}
break;
case VP_OPCODE_SGE:
{
GLfloat t[4], u[4], sge[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
sge[0] = (t[0] >= u[0]) ? 1.0F : 0.0F;
sge[1] = (t[1] >= u[1]) ? 1.0F : 0.0F;
sge[2] = (t[2] >= u[2]) ? 1.0F : 0.0F;
sge[3] = (t[3] >= u[3]) ? 1.0F : 0.0F;
store_vector4( &inst->DstReg, state, sge );
}
break;
case VP_OPCODE_MAD:
{
GLfloat t[4], u[4], v[4], sum[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
fetch_vector4( &inst->SrcReg[2], state, v );
sum[0] = t[0] * u[0] + v[0];
sum[1] = t[1] * u[1] + v[1];
sum[2] = t[2] * u[2] + v[2];
sum[3] = t[3] * u[3] + v[3];
store_vector4( &inst->DstReg, state, sum );
}
break;
case VP_OPCODE_ARL:
{
GLfloat t[4];
fetch_vector4( &inst->SrcReg[0], state, t );
state->AddressReg[0] = (GLint) floor(t[0]);
}
break;
case VP_OPCODE_DPH:
{
GLfloat t[4], u[4], dot[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
dot[0] = t[0] * u[0] + t[1] * u[1] + t[2] * u[2] + u[3];
dot[1] = dot[2] = dot[3] = dot[0];
store_vector4( &inst->DstReg, state, dot );
}
break;
case VP_OPCODE_RCC:
{
GLfloat t[4], u;
fetch_vector1( &inst->SrcReg[0], state, t );
if (t[0] == 1.0F)
u = 1.0F;
else
u = 1.0F / t[0];
if (u > 0.0F) {
if (u > 1.884467e+019F) {
u = 1.884467e+019F; /* IEEE 32-bit binary value 0x5F800000 */
}
else if (u < 5.42101e-020F) {
u = 5.42101e-020F; /* IEEE 32-bit binary value 0x1F800000 */
}
}
else {
if (u < -1.884467e+019F) {
u = -1.884467e+019F; /* IEEE 32-bit binary value 0xDF800000 */
}
else if (u > -5.42101e-020F) {
u = -5.42101e-020F; /* IEEE 32-bit binary value 0x9F800000 */
}
}
t[0] = t[1] = t[2] = t[3] = u;
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_SUB: /* GL_NV_vertex_program1_1 */
{
GLfloat t[4], u[4], sum[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
sum[0] = t[0] - u[0];
sum[1] = t[1] - u[1];
sum[2] = t[2] - u[2];
sum[3] = t[3] - u[3];
store_vector4( &inst->DstReg, state, sum );
}
break;
case VP_OPCODE_ABS: /* GL_NV_vertex_program1_1 */
{
GLfloat t[4];
fetch_vector4( &inst->SrcReg[0], state, t );
if (t[0] < 0.0) t[0] = -t[0];
if (t[1] < 0.0) t[1] = -t[1];
if (t[2] < 0.0) t[2] = -t[2];
if (t[3] < 0.0) t[3] = -t[3];
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_FLR: /* GL_ARB_vertex_program */
{
GLfloat t[4];
fetch_vector4( &inst->SrcReg[0], state, t );
t[0] = FLOORF(t[0]);
t[1] = FLOORF(t[1]);
t[2] = FLOORF(t[2]);
t[3] = FLOORF(t[3]);
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_FRC: /* GL_ARB_vertex_program */
{
GLfloat t[4];
fetch_vector4( &inst->SrcReg[0], state, t );
t[0] = t[0] - FLOORF(t[0]);
t[1] = t[1] - FLOORF(t[1]);
t[2] = t[2] - FLOORF(t[2]);
t[3] = t[3] - FLOORF(t[3]);
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_EX2: /* GL_ARB_vertex_program */
{
GLfloat t[4];
fetch_vector1( &inst->SrcReg[0], state, t );
t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(2.0, t[0]);
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_LG2: /* GL_ARB_vertex_program */
{
GLfloat t[4];
fetch_vector1( &inst->SrcReg[0], state, t );
t[0] = t[1] = t[2] = t[3] = LOG2(t[0]);
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_POW: /* GL_ARB_vertex_program */
{
GLfloat t[4], u[4];
fetch_vector1( &inst->SrcReg[0], state, t );
fetch_vector1( &inst->SrcReg[1], state, u );
t[0] = t[1] = t[2] = t[3] = (GLfloat)_mesa_pow(t[0], u[0]);
store_vector4( &inst->DstReg, state, t );
}
break;
case VP_OPCODE_XPD: /* GL_ARB_vertex_program */
{
GLfloat t[4], u[4], cross[4];
fetch_vector4( &inst->SrcReg[0], state, t );
fetch_vector4( &inst->SrcReg[1], state, u );
cross[0] = t[1] * u[2] - t[2] * u[1];
cross[1] = t[2] * u[0] - t[0] * u[2];
cross[2] = t[0] * u[1] - t[1] * u[0];
store_vector4( &inst->DstReg, state, cross );
}
break;
case VP_OPCODE_SWZ: /* GL_ARB_vertex_program */
{
const struct vp_src_register *source = &inst->SrcReg[0];
const GLfloat *src = get_register_pointer(source, state);
GLfloat result[4];
GLuint i;
/* do extended swizzling here */
for (i = 0; i < 3; i++) {
if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ZERO)
result[i] = 0.0;
else if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ONE)
result[i] = -1.0;
else
result[i] = -src[GET_SWZ(source->Swizzle, i)];
if (source->Negate)
result[i] = -result[i];
}
store_vector4( &inst->DstReg, state, result );
}
break;
case VP_OPCODE_PRINT:
if (inst->SrcReg[0].File) {
GLfloat t[4];
fetch_vector4( &inst->SrcReg[0], state, t );
_mesa_printf("%s%g, %g, %g, %g\n",
(char *) inst->Data, t[0], t[1], t[2], t[3]);
}
else {
_mesa_printf("%s\n", (char *) inst->Data);
}
break;
case VP_OPCODE_END:
ctx->_CurrentProgram = 0;
return;
default:
/* bad instruction opcode */
_mesa_problem(ctx, "Bad VP Opcode in _mesa_exec_vertex_program");
ctx->_CurrentProgram = 0;
return;
} /* switch */
} /* for */
ctx->_CurrentProgram = 0;
}
/**
Thoughts on vertex program optimization:
The obvious thing to do is to compile the vertex program into X86/SSE/3DNow!
assembly code. That will probably be a lot of work.
Another approach might be to replace the vp_instruction->Opcode field with
a pointer to a specialized C function which executes the instruction.
In particular we can write functions which skip swizzling, negating,
masking, relative addressing, etc. when they're not needed.
For example:
void simple_add( struct vp_instruction *inst )
{
GLfloat *sum = machine->Registers[inst->DstReg.Register];
GLfloat *a = machine->Registers[inst->SrcReg[0].Register];
GLfloat *b = machine->Registers[inst->SrcReg[1].Register];
sum[0] = a[0] + b[0];
sum[1] = a[1] + b[1];
sum[2] = a[2] + b[2];
sum[3] = a[3] + b[3];
}
*/
/*
KW:
A first step would be to 'vectorize' the programs in the same way as
the normal transformation code in the tnl module. Thus each opcode
takes zero or more input vectors (registers) and produces one or more
output vectors.
These operations would intially be coded in C, with machine-specific
assembly following, as is currently the case for matrix
transformations in the math/ directory. The preprocessing scheme for
selecting simpler operations Brian describes above would also work
here.
This should give reasonable performance without excessive effort.
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -