t_vb_arbprogram_sse.c

来自「mesa-6.5-minigui源码」· C语言代码 · 共 1,269 行 · 第 1/3 页
1,269 行
/* * Mesa 3-D graphics library * Version:  6.3 * * Copyright (C) 1999-2004  Brian Paul   All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. *//** * \file t_vb_arb_program_sse.c * * Translate simplified vertex_program representation to * x86/x87/SSE/SSE2 machine code using mesa's rtasm runtime assembler. * * This is very much a first attempt - build something that works. * There are probably better approaches for applying SSE to vertex * programs, and the whole thing is crying out for static analysis of * the programs to avoid redundant operations. * * \author Keith Whitwell */#include "glheader.h"#include "context.h"#include "imports.h"#include "macros.h"#include "mtypes.h"#include "arbprogparse.h"#include "program.h"#include "program_instruction.h"#include "math/m_matrix.h"#include "math/m_translate.h"#include "t_context.h"#include "t_vb_arbprogram.h"#if defined(USE_SSE_ASM)#include "x86/rtasm/x86sse.h"#include "x86/common_x86_asm.h"#define X    0#define Y    1#define Z    2#define W    3/* Reg usage: * * EAX - temp * EBX - point to 'm->File[0]' * ECX - point to 'm->File[3]' * EDX - holds 'm' * EBP, * ESI, * EDI */#define DISASSEM 0#define FAIL								\do {									\   _mesa_printf("x86 translation failed in %s\n", __FUNCTION__);	\   return GL_FALSE;							\} while (0)struct compilation {   struct x86_function func;   struct tnl_compiled_program *p;      GLuint insn_counter;   struct {      GLuint file:2;      GLuint idx:7;      GLuint dirty:1;      GLuint last_used:10;   } xmm[8];   struct {      struct x86_reg base;   } file[4];   GLboolean have_sse2;   GLshort fpucntl;};static INLINE GLboolean eq( struct x86_reg a,			    struct x86_reg b ){   return (a.file == b.file &&	   a.idx == b.idx &&	   a.mod == b.mod &&	   a.disp == b.disp);}      static GLint get_offset( const void *a, const void *b ){   return (const char *)b - (const char *)a;}static struct x86_reg get_reg_ptr(GLuint file,				  GLuint idx ){   struct x86_reg reg;   switch (file) {   case FILE_REG:      reg = x86_make_reg(file_REG32, reg_BX);      assert(idx != REG_UNDEF);      break;   case FILE_STATE_PARAM:      reg = x86_make_reg(file_REG32, reg_CX);      break;   default:      assert(0);   }   return x86_make_disp(reg, 16 * idx);}			  static void spill( struct compilation *cp, GLuint idx ){   struct x86_reg oldval = get_reg_ptr(cp->xmm[idx].file,				       cp->xmm[idx].idx);   assert(cp->xmm[idx].dirty);   sse_movups(&cp->func, oldval, x86_make_reg(file_XMM, idx));   cp->xmm[idx].dirty = 0;}static struct x86_reg get_xmm_reg( struct compilation *cp ){   GLuint i;   GLuint oldest = 0;   for (i = 0; i < 8; i++)       if (cp->xmm[i].last_used < cp->xmm[oldest].last_used)	 oldest = i;   /* Need to write out the old value?    */   if (cp->xmm[oldest].dirty)       spill(cp, oldest);   assert(cp->xmm[oldest].last_used != cp->insn_counter);   cp->xmm[oldest].file = FILE_REG;   cp->xmm[oldest].idx = REG_UNDEF;   cp->xmm[oldest].last_used = cp->insn_counter;   return x86_make_reg(file_XMM, oldest);}static void invalidate_xmm( struct compilation *cp, 			    GLuint file, GLuint idx ){   GLuint i;   /* Invalidate any old copy of this register in XMM0-7.      */   for (i = 0; i < 8; i++) {      if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) {	 cp->xmm[i].file = FILE_REG;	 cp->xmm[i].idx = REG_UNDEF;	 cp->xmm[i].dirty = 0;	 break;      }   }}      /* Return an XMM reg to receive the results of an operation. */static struct x86_reg get_dst_xmm_reg( struct compilation *cp, 				       GLuint file, GLuint idx ){   struct x86_reg reg;   /* Invalidate any old copy of this register in XMM0-7.  Don't reuse    * as this may be one of the arguments.    */   invalidate_xmm( cp, file, idx );   reg = get_xmm_reg( cp );   cp->xmm[reg.idx].file = file;   cp->xmm[reg.idx].idx = idx;   cp->xmm[reg.idx].dirty = 1;   return reg;   }/* As above, but return a pointer.  Note - this pointer may alias * those returned by get_arg_ptr(). */static struct x86_reg get_dst_ptr( struct compilation *cp, 				   GLuint file, GLuint idx ){   /* Invalidate any old copy of this register in XMM0-7.  Don't reuse    * as this may be one of the arguments.    */   invalidate_xmm( cp, file, idx );   return get_reg_ptr(file, idx);}/* Return an XMM reg if the argument is resident, otherwise return a * base+offset pointer to the saved value. */static struct x86_reg get_arg( struct compilation *cp, GLuint file, GLuint idx ){   GLuint i;   for (i = 0; i < 8; i++) {      if (cp->xmm[i].file == file &&	  cp->xmm[i].idx == idx) {	 cp->xmm[i].last_used = cp->insn_counter;	 return x86_make_reg(file_XMM, i);      }   }   return get_reg_ptr(file, idx);}/* As above, but always return a pointer: */static struct x86_reg get_arg_ptr( struct compilation *cp, GLuint file, GLuint idx ){   GLuint i;   /* If there is a modified version of this register in one of the    * XMM regs, write it out to memory.    */   for (i = 0; i < 8; i++) {      if (cp->xmm[i].file == file && 	  cp->xmm[i].idx == idx &&	  cp->xmm[i].dirty) 	 spill(cp, i);   }   return get_reg_ptr(file, idx);}/* Emulate pshufd insn in regular SSE, if necessary: */static void emit_pshufd( struct compilation *cp,			 struct x86_reg dst,			 struct x86_reg arg0,			 GLubyte shuf ){   if (cp->have_sse2) {      sse2_pshufd(&cp->func, dst, arg0, shuf);      cp->func.fn = 0;   }   else {      if (!eq(dst, arg0)) 	 sse_movups(&cp->func, dst, arg0);      sse_shufps(&cp->func, dst, dst, shuf);   }}static void set_fpu_round_neg_inf( struct compilation *cp ){   if (cp->fpucntl != RND_NEG_FPU) {      struct x86_reg regEDX = x86_make_reg(file_REG32, reg_DX);      struct arb_vp_machine *m = NULL;      cp->fpucntl = RND_NEG_FPU;      x87_fnclex(&cp->func);      x87_fldcw(&cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_rnd_neg)));   }}/* Perform a reduced swizzle.   */static GLboolean emit_RSW( struct compilation *cp, union instruction op ) {   struct x86_reg arg0 = get_arg(cp, op.rsw.file0, op.rsw.idx0);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.rsw.dst);   GLuint swz = op.rsw.swz;   GLuint neg = op.rsw.neg;   emit_pshufd(cp, dst, arg0, swz);      if (neg) {      struct x86_reg negs = get_arg(cp, FILE_REG, REG_SWZ);      struct x86_reg tmp = get_xmm_reg(cp);      /* Load 1,-1,0,0       * Use neg as arg to pshufd       * Multiply       */      emit_pshufd(cp, tmp, negs, 		  SHUF((neg & 1) ? 1 : 0,		       (neg & 2) ? 1 : 0,		       (neg & 4) ? 1 : 0,		       (neg & 8) ? 1 : 0));      sse_mulps(&cp->func, dst, tmp);   }   return GL_TRUE;}/* Helper for writemask: */static GLboolean emit_shuf_copy1( struct compilation *cp,				  struct x86_reg dst,				  struct x86_reg arg0,				  struct x86_reg arg1,				  GLubyte shuf ){   struct x86_reg tmp = get_xmm_reg(cp);   sse_movups(&cp->func, dst, arg1);   emit_pshufd(cp, dst, dst, shuf);   emit_pshufd(cp, tmp, arg0, shuf);   sse_movss(&cp->func, dst, tmp);   emit_pshufd(cp, dst, dst, shuf);   return GL_TRUE;}/* Helper for writemask: */static GLboolean emit_shuf_copy2( struct compilation *cp,				  struct x86_reg dst,				  struct x86_reg arg0,				  struct x86_reg arg1,				  GLubyte shuf ){   struct x86_reg tmp = get_xmm_reg(cp);   emit_pshufd(cp, dst, arg1, shuf);   emit_pshufd(cp, tmp, arg0, shuf);   sse_shufps(&cp->func, dst, tmp, SHUF(X, Y, Z, W));   emit_pshufd(cp, dst, dst, shuf);   return GL_TRUE;}static void emit_x87_ex2( struct compilation *cp ){   struct x86_reg st0 = x86_make_reg(file_x87, 0);   struct x86_reg st1 = x86_make_reg(file_x87, 1);   struct x86_reg st3 = x86_make_reg(file_x87, 3);   set_fpu_round_neg_inf( cp );   x87_fld(&cp->func, st0); /* a a */   x87_fprndint( &cp->func );	/* int(a) a */   x87_fld(&cp->func, st0); /* int(a) int(a) a */   x87_fstp(&cp->func, st3); /* int(a) a int(a)*/   x87_fsubp(&cp->func, st1); /* frac(a) int(a) */   x87_f2xm1(&cp->func);    /* (2^frac(a))-1 int(a)*/   x87_fld1(&cp->func);    /* 1 (2^frac(a))-1 int(a)*/   x87_faddp(&cp->func, st1);	/* 2^frac(a) int(a) */   x87_fscale(&cp->func);	/* 2^a */}#if 0static GLboolean emit_MSK2( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.msk.file, op.msk.arg);   struct x86_reg arg1 = get_arg(cp, FILE_REG, op.msk.dst); /* NOTE! */   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.msk.dst);      /* make full width bitmask in tmp     * dst = ~tmp    * tmp &= arg0    * dst &= arg1    * dst |= tmp    */   emit_pshufd(cp, tmp, get_arg(cp, FILE_REG, REG_NEGS), 	       SHUF((op.msk.mask & 1) ? 2 : 0,		    (op.msk.mask & 2) ? 2 : 0,		    (op.msk.mask & 4) ? 2 : 0,		    (op.msk.mask & 8) ? 2 : 0));   sse2_pnot(&cp->func, dst, tmp);   sse2_pand(&cp->func, arg0, tmp);   sse2_pand(&cp->func, arg1, dst);   sse2_por(&cp->func, tmp, dst);   return GL_TRUE;}#endif/* Used to implement write masking.  This and most of the other instructions * here would be easier to implement if there had been a translation * to a 2 argument format (dst/arg0, arg1) at the shader level before * attempting to translate to x86/sse code. */static GLboolean emit_MSK( struct compilation *cp, union instruction op ){   struct x86_reg arg = get_arg(cp, op.msk.file, op.msk.idx);   struct x86_reg dst0 = get_arg(cp, FILE_REG, op.msk.dst); /* NOTE! */   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.msk.dst);      /* Note that dst and dst0 refer to the same program variable, but    * will definitely be different XMM registers.  We're effectively    * treating this as a 2 argument SEL now, just one of which happens    * always to be the same register as the destination.    */   switch (op.msk.mask) {   case 0:
t_vb_arbprogram_sse.c - 源码说明

本页面展示了「mesa-6.5-minigui源码」中的 t_vb_arbprogram_sse.c 源码文件，采用 C语言编程语言编写，共 1,269 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与minigui相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?