⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t_vb_arbprogram_sse.c

📁 mesa-6.5-minigui源码
💻 C
📖 第 1 页 / 共 3 页
字号:
   /* Check arg0[0]:    */   x87_fldz(&cp->func);		/* 0 a0 a1 a3 */   x87_fucomp(&cp->func, st1);	/* a0 a1 a3 */   x87_fnstsw(&cp->func, regEAX);   x86_sahf(&cp->func);   fixup1 = x86_jcc_forward(&cp->func, cc_AE);       x87_fstp(&cp->func, x86_make_disp(dst, 4));	/* a1 a3 */   /* Check arg0[1]:    */    x87_fldz(&cp->func);		/* 0 a1 a3 */   x87_fucomp(&cp->func, st1);	/* a1 a3 */   x87_fnstsw(&cp->func, regEAX);   x86_sahf(&cp->func);   fixup2 = x86_jcc_forward(&cp->func, cc_AE);    /* Compute pow(a1, a3)    */   x87_fyl2x(&cp->func);	/* a3*log2(a1) */   emit_x87_ex2( cp );		/* 2^(a3*log2(a1)) */   x87_fstp(&cp->func, x86_make_disp(dst, 8));      /* Land jumps:    */   x86_fixup_fwd_jump(&cp->func, fixup1);   x86_fixup_fwd_jump(&cp->func, fixup2);#else   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);    struct x86_reg ones = get_reg_ptr(FILE_REG, REG_LIT);   sse_movups(&cp->func, dst, ones);#endif      return GL_TRUE;}static GLboolean emit_MAX( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   sse_movups(&cp->func, dst, arg0);   sse_maxps(&cp->func, dst, arg1);   return GL_TRUE;}static GLboolean emit_MIN( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   sse_movups(&cp->func, dst, arg0);   sse_minps(&cp->func, dst, arg1);   return GL_TRUE;}static GLboolean emit_MOV( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   sse_movups(&cp->func, dst, arg0);   return GL_TRUE;}static GLboolean emit_MUL( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   sse_movups(&cp->func, dst, arg0);   sse_mulps(&cp->func, dst, arg1);   return GL_TRUE;}static GLboolean emit_POW( struct compilation *cp, union instruction op ) {   struct x86_reg arg0 = get_arg_ptr(cp, op.alu.file0, op.alu.idx0);    struct x86_reg arg1 = get_arg_ptr(cp, op.alu.file1, op.alu.idx1);    struct x86_reg dst = get_dst_ptr(cp, FILE_REG, op.alu.dst);   x87_fld(&cp->func, arg1);   	/* a1 */   x87_fld(&cp->func, arg0);	/* a0 a1 */   x87_fyl2x(&cp->func);	/* a1*log2(a0) */   emit_x87_ex2( cp );		/* 2^(a1*log2(a0)) */   x87_fst(&cp->func, x86_make_disp(dst, 0));       x87_fst(&cp->func, x86_make_disp(dst, 4));       x87_fst(&cp->func, x86_make_disp(dst, 8));       x87_fstp(&cp->func, x86_make_disp(dst, 12));           return GL_TRUE;}static GLboolean emit_REL( struct compilation *cp, union instruction op ){/*    GLuint idx = (op.alu.idx0 + (GLint)cp->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1); *//*    GLuint idx = 0; *//*    struct x86_reg arg0 = get_arg(cp, op.alu.file0, idx); *//*    struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst); *//*    dst[0] = arg0[0]; *//*    dst[1] = arg0[1]; *//*    dst[2] = arg0[2]; *//*    dst[3] = arg0[3]; */   FAIL;}static GLboolean emit_RCP( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   if (cp->have_sse2) {      sse2_rcpss(&cp->func, dst, arg0);   }   else {      struct x86_reg ones = get_reg_ptr(FILE_REG, REG_ONES);      sse_movss(&cp->func, dst, ones);      sse_divss(&cp->func, dst, arg0);   }   sse_shufps(&cp->func, dst, dst, SHUF(X, X, X, X));   return GL_TRUE;}static GLboolean emit_RSQ( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   /* TODO: Calculate absolute value    */#if 0   sse_movss(&cp->func, dst, arg0);   sse_mulss(&cp->func, dst, neg);   sse_maxss(&cp->func, dst, arg0);#endif   sse_rsqrtss(&cp->func, dst, arg0);   sse_shufps(&cp->func, dst, dst, SHUF(X, X, X, X));   return GL_TRUE;}static GLboolean emit_SGE( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   struct x86_reg ones = get_reg_ptr(FILE_REG, REG_ONES);   sse_movups(&cp->func, dst, arg0);   sse_cmpps(&cp->func, dst, arg1, cc_NotLessThan);   sse_andps(&cp->func, dst, ones);   return GL_TRUE;}static GLboolean emit_SLT( struct compilation *cp, union instruction op ){   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   struct x86_reg ones = get_reg_ptr(FILE_REG, REG_ONES);      sse_movups(&cp->func, dst, arg0);   sse_cmpps(&cp->func, dst, arg1, cc_LessThan);   sse_andps(&cp->func, dst, ones);   return GL_TRUE;}static GLboolean emit_SUB( struct compilation *cp, union instruction op ) {   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   sse_movups(&cp->func, dst, arg0);   sse_subps(&cp->func, dst, arg1);   return GL_TRUE;}static GLboolean emit_XPD( struct compilation *cp, union instruction op ) {   struct x86_reg arg0 = get_arg(cp, op.alu.file0, op.alu.idx0);   struct x86_reg arg1 = get_arg(cp, op.alu.file1, op.alu.idx1);   struct x86_reg dst = get_dst_xmm_reg(cp, FILE_REG, op.alu.dst);   struct x86_reg tmp0 = get_xmm_reg(cp);   struct x86_reg tmp1 = get_xmm_reg(cp);   /* Could avoid tmp0, tmp1 if we overwrote arg0, arg1.  Need a way    * to invalidate registers.  This will come with better analysis    * (liveness analysis) of the incoming program.    */   emit_pshufd(cp, dst, arg0, SHUF(Y, Z, X, W));   emit_pshufd(cp, tmp1, arg1, SHUF(Z, X, Y, W));   sse_mulps(&cp->func, dst, tmp1);   emit_pshufd(cp, tmp0, arg0, SHUF(Z, X, Y, W));   emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W));   sse_mulps(&cp->func, tmp0, tmp1);   sse_subps(&cp->func, dst, tmp0);/*    dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; *//*    dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; *//*    dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; *//*    dst[3] is undef */   return GL_TRUE;}static GLboolean emit_NOP( struct compilation *cp, union instruction op ) {   return GL_TRUE;}static GLboolean (* const emit_func[])(struct compilation *, union instruction) = {   emit_ABS,   emit_ADD,   emit_NOP, /* ARA */   emit_NOP, /* ARL */   emit_NOP, /* ARL_NV */   emit_NOP, /* ARR */   emit_NOP, /* BRA */   emit_NOP, /* CAL */   emit_NOP, /* CMP */   emit_NOP, /* COS */   emit_NOP, /* DDX */   emit_NOP, /* DDY */   emit_DP3,   emit_DP4,   emit_DPH,   emit_DST,   emit_NOP, /* END */   emit_EX2,   emit_EXP,   emit_FLR,   emit_FRC,   emit_NOP, /* KIL */   emit_NOP, /* KIL_NV */   emit_LG2,   emit_LIT,   emit_LOG,   emit_NOP, /* LRP */   emit_NOP, /* MAD */   emit_MAX,   emit_MIN,   emit_MOV,   emit_MUL,   emit_NOP, /* PK2H */   emit_NOP, /* PK2US */   emit_NOP, /* PK4B */   emit_NOP, /* PK4UB */   emit_POW,   emit_NOP, /* POPA */   emit_PRT,   emit_NOP, /* PUSHA */   emit_NOP, /* RCC */   emit_RCP,   emit_NOP, /* RET */   emit_NOP, /* RFL */   emit_RSQ,   emit_NOP, /* SCS */   emit_NOP, /* SEQ */   emit_NOP, /* SFL */   emit_SGE,   emit_NOP, /* SGT */   emit_NOP, /* SIN */   emit_NOP, /* SLE */   emit_SLT,   emit_NOP, /* SNE */   emit_NOP, /* SSG */   emit_NOP, /* STR */   emit_SUB,   emit_RSW, /* SWZ */   emit_NOP, /* TEX */   emit_NOP, /* TXB */   emit_NOP, /* TXD */   emit_NOP, /* TXL */   emit_NOP, /* TXP */   emit_NOP, /* TXP_NV */   emit_NOP, /* UP2H */   emit_NOP, /* UP2US */   emit_NOP, /* UP4B */   emit_NOP, /* UP4UB */   emit_NOP, /* X2D */   emit_XPD,   emit_RSW,   emit_MSK,   emit_REL,};static GLboolean build_vertex_program( struct compilation *cp ){   struct arb_vp_machine *m = NULL;   GLuint j;   struct x86_reg regEBX = x86_make_reg(file_REG32, reg_BX);   struct x86_reg regECX = x86_make_reg(file_REG32, reg_CX);   struct x86_reg regEDX = x86_make_reg(file_REG32, reg_DX);   x86_push(&cp->func, regEBX);   x86_mov(&cp->func, regEDX, x86_fn_arg(&cp->func, 1));      x86_mov(&cp->func, regEBX, x86_make_disp(regEDX, get_offset(m, m->File + FILE_REG)));   x86_mov(&cp->func, regECX, x86_make_disp(regEDX, get_offset(m, m->File + FILE_STATE_PARAM)));   for (j = 0; j < cp->p->nr_instructions; j++) {      union instruction inst = cp->p->instructions[j];	       cp->insn_counter = j+1;	/* avoid zero */            if (DISASSEM) {	 _mesa_printf("%p: ", cp->func.csr); 	 _tnl_disassem_vba_insn( inst );      }      cp->func.fn = NULL;      if (!emit_func[inst.alu.opcode]( cp, inst )) {	 return GL_FALSE;      }   }   /* TODO: only for outputs:    */   for (j = 0; j < 8; j++) {      if (cp->xmm[j].dirty) 	 spill(cp, j);   }         /* Exit mmx state?    */   if (cp->func.need_emms)      mmx_emms(&cp->func);   /* Restore FPU control word?    */   if (cp->fpucntl != RESTORE_FPU) {      x87_fnclex(&cp->func);      x87_fldcw(&cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore)));   }   x86_pop(&cp->func, regEBX);   x86_ret(&cp->func);   return GL_TRUE;}/** * Execute the given vertex program.   *  * TODO: Integrate the t_vertex.c code here, to build machine vertices * directly at this point. * * TODO: Eliminate the VB struct entirely and just use * struct arb_vertex_machine. */GLboolean_tnl_sse_codegen_vertex_program(struct tnl_compiled_program *p){   struct compilation cp;      /* sanity checks */   assert(emit_func[OPCODE_ABS] == emit_ABS);   assert(emit_func[OPCODE_MUL] == emit_MUL);   assert(emit_func[OPCODE_XPD] == emit_XPD);   _mesa_memset(&cp, 0, sizeof(cp));   cp.p = p;   cp.have_sse2 = 1;   if (p->compiled_func) {      _mesa_free((void *)p->compiled_func);      p->compiled_func = NULL;   }   x86_init_func(&cp.func);   cp.fpucntl = RESTORE_FPU;   /* Note ctx state is not referenced in building the function, so it    * depends only on the list of instructions:    */   if (!build_vertex_program(&cp)) {      x86_release_func( &cp.func );      return GL_FALSE;   }   p->compiled_func = (void (*)(struct arb_vp_machine *))x86_get_func( &cp.func );   return GL_TRUE;}#elseGLboolean_tnl_sse_codegen_vertex_program(struct tnl_compiled_program *p){   /* Dummy version for when USE_SSE_ASM not defined */   return GL_FALSE;}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -