📄 t_vertex_sse.c

📁 Mesa is an open-source implementation of the OpenGL specification - a system for rendering interacti
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * Copyright 2003 Tungsten Graphics, inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: *    Keith Whitwell <keithw@tungstengraphics.com> */#include "glheader.h"#include "context.h"#include "colormac.h"#include "t_context.h"#include "t_vertex.h"#include "simple_list.h"#include "enums.h"#if defined(USE_SSE_ASM)#include "x86/rtasm/x86sse.h"#include "x86/common_x86_asm.h"/** * Number of bytes to allocate for generated SSE functions */#define MAX_SSE_CODE_SIZE 1024#define X    0#define Y    1#define Z    2#define W    3struct x86_program {   struct x86_function func;   GLcontext *ctx;   GLboolean inputs_safe;   GLboolean outputs_safe;   GLboolean have_sse2;      struct x86_reg identity;   struct x86_reg chan0;};static struct x86_reg get_identity( struct x86_program *p ){   return p->identity;}static void emit_load4f_4( struct x86_program *p, 			   			   struct x86_reg dest,			   struct x86_reg arg0 ){   sse_movups(&p->func, dest, arg0);}static void emit_load4f_3( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   /* Have to jump through some hoops:    *    * c 0 0 0    * c 0 0 1    * 0 0 c 1    * a b c 1    */   sse_movss(&p->func, dest, x86_make_disp(arg0, 8));   sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );   sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) );   sse_movlps(&p->func, dest, arg0);}static void emit_load4f_2( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   /* Initialize from identity, then pull in low two words:    */   sse_movups(&p->func, dest, get_identity(p));   sse_movlps(&p->func, dest, arg0);}static void emit_load4f_1( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   /* Pull in low word, then swizzle in identity */   sse_movss(&p->func, dest, arg0);   sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );}static void emit_load3f_3( struct x86_program *p, 			   			   struct x86_reg dest,			   struct x86_reg arg0 ){   /* Over-reads by 1 dword - potential SEGV if input is a vertex    * array.    */   if (p->inputs_safe) {      sse_movups(&p->func, dest, arg0);   }    else {      /* c 0 0 0       * c c c c       * a b c c        */      sse_movss(&p->func, dest, x86_make_disp(arg0, 8));      sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));      sse_movlps(&p->func, dest, arg0);   }}static void emit_load3f_2( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   emit_load4f_2(p, dest, arg0);}static void emit_load3f_1( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   emit_load4f_1(p, dest, arg0);}static void emit_load2f_2( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   sse_movlps(&p->func, dest, arg0);}static void emit_load2f_1( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   emit_load4f_1(p, dest, arg0);}static void emit_load1f_1( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   sse_movss(&p->func, dest, arg0);}static void (*load[4][4])( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ) = {   { emit_load1f_1,      emit_load1f_1,      emit_load1f_1,      emit_load1f_1 },   { emit_load2f_1,      emit_load2f_2,      emit_load2f_2,      emit_load2f_2 },   { emit_load3f_1,      emit_load3f_2,      emit_load3f_3,      emit_load3f_3 },   { emit_load4f_1,      emit_load4f_2,      emit_load4f_3,      emit_load4f_4 } };static void emit_load( struct x86_program *p,		       struct x86_reg dest,		       GLuint sz,		       struct x86_reg src,		       GLuint src_sz){   load[sz-1][src_sz-1](p, dest, src);}static void emit_store4f( struct x86_program *p, 			   			  struct x86_reg dest,			  struct x86_reg arg0 ){   sse_movups(&p->func, dest, arg0);}static void emit_store3f( struct x86_program *p, 			  struct x86_reg dest,			  struct x86_reg arg0 ){   if (p->outputs_safe) {      /* Emit the extra dword anyway.  This may hurt writecombining,       * may cause other problems.       */      sse_movups(&p->func, dest, arg0);   }   else {      /* Alternate strategy - emit two, shuffle, emit one.       */      sse_movlps(&p->func, dest, arg0);      sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */      sse_movss(&p->func, x86_make_disp(dest,8), arg0);   }}static void emit_store2f( struct x86_program *p, 			   struct x86_reg dest,			   struct x86_reg arg0 ){   sse_movlps(&p->func, dest, arg0);}static void emit_store1f( struct x86_program *p, 			  struct x86_reg dest,			  struct x86_reg arg0 ){   sse_movss(&p->func, dest, arg0);}static void (*store[4])( struct x86_program *p, 			 struct x86_reg dest,			 struct x86_reg arg0 ) = {   emit_store1f,    emit_store2f,    emit_store3f,    emit_store4f };static void emit_store( struct x86_program *p,			struct x86_reg dest,			GLuint sz,			struct x86_reg temp ){   store[sz-1](p, dest, temp);}static void emit_pack_store_4ub( struct x86_program *p,				 struct x86_reg dest,				 struct x86_reg temp ){   /* Scale by 255.0    */   sse_mulps(&p->func, temp, p->chan0);   if (p->have_sse2) {      sse2_cvtps2dq(&p->func, temp, temp);      sse2_packssdw(&p->func, temp, temp);      sse2_packuswb(&p->func, temp, temp);      sse_movss(&p->func, dest, temp);   }   else {      struct x86_reg mmx0 = x86_make_reg(file_MMX, 0);      struct x86_reg mmx1 = x86_make_reg(file_MMX, 1);      sse_cvtps2pi(&p->func, mmx0, temp);      sse_movhlps(&p->func, temp, temp);      sse_cvtps2pi(&p->func, mmx1, temp);      mmx_packssdw(&p->func, mmx0, mmx1);      mmx_packuswb(&p->func, mmx0, mmx0);      mmx_movd(&p->func, dest, mmx0);   }}static GLint get_offset( const void *a, const void *b ){   return (const char *)b - (const char *)a;}/* Not much happens here.  Eventually use this function to try and * avoid saving/reloading the source pointers each vertex (if some of * them can fit in registers). */static void get_src_ptr( struct x86_program *p,			 struct x86_reg srcREG,			 struct x86_reg vtxREG,			 struct tnl_clipspace_attr *a ){   struct tnl_clipspace *vtx = GET_VERTEX_STATE(p->ctx);   struct x86_reg ptr_to_src = x86_make_disp(vtxREG, get_offset(vtx, &a->inputptr));   /* Load current a[j].inputptr    */   x86_mov(&p->func, srcREG, ptr_to_src);}static void update_src_ptr( struct x86_program *p,			 struct x86_reg srcREG,			 struct x86_reg vtxREG,			 struct tnl_clipspace_attr *a ){   if (a->inputstride) {      struct tnl_clipspace *vtx = GET_VERTEX_STATE(p->ctx);      struct x86_reg ptr_to_src = x86_make_disp(vtxREG, get_offset(vtx, &a->inputptr));      /* add a[j].inputstride (hardcoded value - could just as easily       * pull the stride value from memory each time).       */      x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));            /* save new value of a[j].inputptr        */      x86_mov(&p->func, ptr_to_src, srcREG);   }}/* Lots of hardcoding * * EAX -- pointer to current output vertex * ECX -- pointer to current attribute  *  */static GLboolean build_vertex_emit( struct x86_program *p )
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -