📄 t_vb_arbprogram.c

📁 winNT技术操作系统,国外开放的原代码和LIUX一样
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 * Mesa 3-D graphics library
 * Version:  6.3
 *
 * Copyright (C) 1999-2005  Brian Paul   All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

/**
 * \file t_arb_program.c
 * Compile vertex programs to an intermediate representation.
 * Execute vertex programs over a buffer of vertices.
 * \author Keith Whitwell, Brian Paul
 */

#include "glheader.h"
#include "context.h"
#include "imports.h"
#include "macros.h"
#include "mtypes.h"
#include "arbprogparse.h"
#include "light.h"
#include "program.h"
#include "math/m_matrix.h"
#include "math/m_translate.h"
#include "t_context.h"
#include "t_pipeline.h"
#include "t_vb_arbprogram.h"

#define DISASSEM 0

/*--------------------------------------------------------------------------- */

struct opcode_info {
   GLuint nr_args;
   const char *string;
   void (*print)( union instruction , const struct opcode_info * );
};

struct compilation {
   GLuint reg_active;
   union instruction *csr;
};


#define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))

#define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])



/* Lower precision functions for the EXP, LOG and LIT opcodes.  The
 * LOG2() implementation is probably not accurate enough, and the
 * attempted optimization for Exp2 is definitely not accurate
 * enough - it discards all of t's fractional bits!
 */
static GLfloat RoughApproxLog2(GLfloat t)
{
   return LOG2(t);
}

static GLfloat RoughApproxExp2(GLfloat t)
{   
#if 0
   fi_type fi;
   fi.i = (GLint) t;
   fi.i = (fi.i << 23) + 0x3f800000;
   return fi.f;
#else
   return (GLfloat) _mesa_pow(2.0, t);
#endif
}

static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
{
   return RoughApproxExp2(y * RoughApproxLog2(x));
}


/* Higher precision functions for the EX2, LG2 and POW opcodes:
 */
static GLfloat ApproxLog2(GLfloat t)
{
   return (GLfloat) (log(t) * 1.442695F);
}

static GLfloat ApproxExp2(GLfloat t)
{   
   return (GLfloat) _mesa_pow(2.0, t);
}

static GLfloat ApproxPower(GLfloat x, GLfloat y)
{
   return (GLfloat) _mesa_pow(x, y);
}

static GLfloat rough_approx_log2_0_1(GLfloat x)
{
   return LOG2(x);
}




/**
 * Perform a reduced swizzle:
 */
static void do_RSW( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.rsw.dst];
   const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
   GLuint swz = op.rsw.swz;
   GLuint neg = op.rsw.neg;

   result[0] = arg0[GET_RSW(swz, 0)];
   result[1] = arg0[GET_RSW(swz, 1)];
   result[2] = arg0[GET_RSW(swz, 2)];
   result[3] = arg0[GET_RSW(swz, 3)];
   
   if (neg) {
      if (neg & 0x1) result[0] = -result[0];
      if (neg & 0x2) result[1] = -result[1];
      if (neg & 0x4) result[2] = -result[2];
      if (neg & 0x8) result[3] = -result[3];
   }
}

/* Used to implement write masking.  To make things easier for the sse
 * generator I've gone back to a 1 argument version of this function
 * (dst.msk = arg), rather than the semantically cleaner (dst = SEL
 * arg0, arg1, msk)
 *
 * That means this is the only instruction which doesn't write a full
 * 4 dwords out.  This would make such a program harder to analyse,
 * but it looks like analysis is going to take place on a higher level
 * anyway.
 */
static void do_MSK( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *dst = m->File[0][op.msk.dst];
   const GLfloat *arg = m->File[op.msk.file][op.msk.idx];
 
   if (op.msk.mask & 0x1) dst[0] = arg[0];
   if (op.msk.mask & 0x2) dst[1] = arg[1];
   if (op.msk.mask & 0x4) dst[2] = arg[2];
   if (op.msk.mask & 0x8) dst[3] = arg[3];
}


static void do_PRT( struct arb_vp_machine *m, union instruction op )
{
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   
   _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr, 
		arg0[0], arg0[1], arg0[2], arg0[3]);
}


/**
 * The traditional ALU and texturing instructions.  All operate on
 * internal registers and ignore write masks and swizzling issues.
 */

static void do_ABS( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
   result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
   result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
   result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
}

static void do_ADD( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = arg0[0] + arg1[0];
   result[1] = arg0[1] + arg1[1];
   result[2] = arg0[2] + arg1[2];
   result[3] = arg0[3] + arg1[3];
}


static void do_DP3( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] * arg1[0] + 
		arg0[1] * arg1[1] + 
		arg0[2] * arg1[2]);

   PUFF(result);
}



static void do_DP4( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] * arg1[0] + 
		arg0[1] * arg1[1] + 
		arg0[2] * arg1[2] + 
		arg0[3] * arg1[3]);

   PUFF(result);
}

static void do_DPH( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] * arg1[0] + 
		arg0[1] * arg1[1] + 
		arg0[2] * arg1[2] + 
		1.0     * arg1[3]);
   
   PUFF(result);
}

static void do_DST( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = 1.0F;
   result[1] = arg0[1] * arg1[1];
   result[2] = arg0[2];
   result[3] = arg1[3];
}


/* Intended to be high precision:
 */
static void do_EX2( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = (GLfloat)ApproxExp2(arg0[0]);
   PUFF(result);
}


/* Allowed to be lower precision:
 */
static void do_EXP( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   GLfloat tmp = arg0[0];
   GLfloat flr_tmp = FLOORF(tmp);
   GLfloat frac_tmp = tmp - flr_tmp;

   result[0] = LDEXPF(1.0, (int)flr_tmp);
   result[1] = frac_tmp;
   result[2] = LDEXPF(rough_approx_log2_0_1(frac_tmp), (int)flr_tmp);
   result[3] = 1.0F;
}

static void do_FLR( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = FLOORF(arg0[0]);
   result[1] = FLOORF(arg0[1]);
   result[2] = FLOORF(arg0[2]);
   result[3] = FLOORF(arg0[3]);
}

static void do_FRC( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = arg0[0] - FLOORF(arg0[0]);
   result[1] = arg0[1] - FLOORF(arg0[1]);
   result[2] = arg0[2] - FLOORF(arg0[2]);
   result[3] = arg0[3] - FLOORF(arg0[3]);
}

/* High precision log base 2:
 */
static void do_LG2( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = ApproxLog2(arg0[0]);
   PUFF(result);
}



static void do_LIT( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   GLfloat tmp[4];

   tmp[0] = 1.0;
   tmp[1] = 0.0;
   tmp[2] = 0.0;
   tmp[3] = 1.0;

   if (arg0[0] > 0.0) {
      tmp[1] = arg0[0];

      if (arg0[1] > 0.0) {
	 tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
      }
   }

   COPY_4V(result, tmp);
}


/* Intended to allow a lower precision than required for LG2 above.
 */
static void do_LOG( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   GLfloat tmp = FABSF(arg0[0]);
   int exponent;
   GLfloat mantissa = FREXPF(tmp, &exponent);

   result[0] = (GLfloat) (exponent - 1);
   result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
   result[2] = exponent + LOG2(mantissa);
   result[3] = 1.0;
}

static void do_MAX( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
   result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
   result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
   result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
}


static void do_MIN( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
   result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
   result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
   result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
}

static void do_MOV( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = arg0[0];
   result[1] = arg0[1];
   result[2] = arg0[2];
   result[3] = arg0[3];
}

static void do_MUL( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = arg0[0] * arg1[0];
   result[1] = arg0[1] * arg1[1];
   result[2] = arg0[2] * arg1[2];
   result[3] = arg0[3] * arg1[3];
}


/* Intended to be "high" precision
 */
static void do_POW( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]);
   PUFF(result);
}

static void do_REL( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);
   const GLfloat *arg0 = m->File[op.alu.file0][idx];

   result[0] = arg0[0];
   result[1] = arg0[1];
   result[2] = arg0[2];
   result[3] = arg0[3];
}

static void do_RCP( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = 1.0F / arg0[0];  
   PUFF(result);
}

static void do_RSQ( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];

   result[0] = INV_SQRTF(FABSF(arg0[0]));
   PUFF(result);
}


static void do_SGE( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
   result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
   result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
   result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
}


static void do_SLT( struct arb_vp_machine *m, union instruction op )
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
   result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
   result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
   result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
}

static void do_SUB( struct arb_vp_machine *m, union instruction op ) 
{
   GLfloat *result = m->File[0][op.alu.dst];
   const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
   const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];

   result[0] = arg0[0] - arg1[0];
   result[1] = arg0[1] - arg1[1];
   result[2] = arg0[2] - arg1[2];
   result[3] = arg0[3] - arg1[3];
12 3 下一页
💿 文件大小 34543 K
👤 上传用户 ybsscauc
📂 所属分类操作系统开发
🏷️ 相关标签

#winNT #LIUX #操作系统 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -