📄 s_nvfragprog.c

📁 winNT技术操作系统,国外开放的原代码和LIUX一样
💻 C
📖 第 1 页 / 共 4 页
字号:
   case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
   case COND_GT: return (condCode == COND_GT);
   case COND_TR: return GL_TRUE;
   case COND_FL: return GL_FALSE;
   default:      return GL_TRUE;
   }
}


/**
 * Store 4 floats into a register.  Observe the instructions saturate and
 * set-condition-code flags.
 */
static void
store_vector4( const struct fp_instruction *inst,
               struct fp_machine *machine,
               const GLfloat value[4] )
{
   const struct fp_dst_register *dest = &(inst->DstReg);
   const GLboolean clamp = inst->Saturate;
   const GLboolean updateCC = inst->UpdateCondRegister;
   GLfloat *dstReg;
   GLfloat dummyReg[4];
   GLfloat clampedValue[4];
   GLboolean condWriteMask[4];
   GLuint writeMask = dest->WriteMask;

   switch (dest->File) {
      case PROGRAM_OUTPUT:
         dstReg = machine->Outputs[dest->Index];
         break;
      case PROGRAM_TEMPORARY:
         dstReg = machine->Temporaries[dest->Index];
         break;
      case PROGRAM_WRITE_ONLY:
         dstReg = dummyReg;
         return;
      default:
         _mesa_problem(NULL, "bad register file in store_vector4(fp)");
         return;
   }

#if DEBUG_FRAG
   if (value[0] > 1.0e10 ||
       IS_INF_OR_NAN(value[0]) ||
       IS_INF_OR_NAN(value[1]) ||
       IS_INF_OR_NAN(value[2]) ||
       IS_INF_OR_NAN(value[3])  )
      printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
#endif

   if (clamp) {
      clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
      clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
      clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
      clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
      value = clampedValue;
   }

   if (dest->CondMask != COND_TR) {
      condWriteMask[0] = GET_BIT(writeMask, 0)
         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)], dest->CondMask);
      condWriteMask[1] = GET_BIT(writeMask, 1)
         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)], dest->CondMask);
      condWriteMask[2] = GET_BIT(writeMask, 2)
         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)], dest->CondMask);
      condWriteMask[3] = GET_BIT(writeMask, 3)
         && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)], dest->CondMask);

      writeMask = ((condWriteMask[0] << 0) |
		   (condWriteMask[1] << 1) |
		   (condWriteMask[2] << 2) |
		   (condWriteMask[3] << 3));
   }

   if (GET_BIT(writeMask, 0)) {
      dstReg[0] = value[0];
      if (updateCC)
         machine->CondCodes[0] = generate_cc(value[0]);
   }
   if (GET_BIT(writeMask, 1)) {
      dstReg[1] = value[1];
      if (updateCC)
         machine->CondCodes[1] = generate_cc(value[1]);
   }
   if (GET_BIT(writeMask, 2)) {
      dstReg[2] = value[2];
      if (updateCC)
         machine->CondCodes[2] = generate_cc(value[2]);
   }
   if (GET_BIT(writeMask, 3)) {
      dstReg[3] = value[3];
      if (updateCC)
         machine->CondCodes[3] = generate_cc(value[3]);
   }
}


/**
 * Initialize a new machine state instance from an existing one, adding
 * the partial derivatives onto the input registers.
 * Used to implement DDX and DDY instructions in non-trivial cases.
 */
static void
init_machine_deriv( GLcontext *ctx,
                    const struct fp_machine *machine,
                    const struct fragment_program *program,
                    const struct sw_span *span, char xOrY,
                    struct fp_machine *dMachine )
{
   GLuint u;

   ASSERT(xOrY == 'X' || xOrY == 'Y');

   /* copy existing machine */
   _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));

   if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
      /* Clear temporary registers (undefined for ARB_f_p) */
      _mesa_bzero( (void*) machine->Temporaries,
                   MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
   }

   /* Add derivatives */
   if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
      GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
      if (xOrY == 'X') {
         wpos[0] += 1.0F;
         wpos[1] += 0.0F;
         wpos[2] += span->dzdx;
         wpos[3] += span->dwdx;
      }
      else {
         wpos[0] += 0.0F;
         wpos[1] += 1.0F;
         wpos[2] += span->dzdy;
         wpos[3] += span->dwdy;
      }
   }
   if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
      GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
      if (xOrY == 'X') {
         col0[0] += span->drdx * (1.0F / CHAN_MAXF);
         col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
         col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
         col0[3] += span->dadx * (1.0F / CHAN_MAXF);
      }
      else {
         col0[0] += span->drdy * (1.0F / CHAN_MAXF);
         col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
         col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
         col0[3] += span->dady * (1.0F / CHAN_MAXF);
      }
   }
   if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
      GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
      if (xOrY == 'X') {
         col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
         col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
         col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
         col1[3] += 0.0; /*XXX fix */
      }
      else {
         col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
         col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
         col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
         col1[3] += 0.0; /*XXX fix */
      }
   }
   if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
      GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
      if (xOrY == 'X') {
         fogc[0] += span->dfogdx;
      }
      else {
         fogc[0] += span->dfogdy;
      }
   }
   for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
      if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
         GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
         /* XXX perspective-correct interpolation */
         if (xOrY == 'X') {
            tex[0] += span->texStepX[u][0];
            tex[1] += span->texStepX[u][1];
            tex[2] += span->texStepX[u][2];
            tex[3] += span->texStepX[u][3];
         }
         else {
            tex[0] += span->texStepY[u][0];
            tex[1] += span->texStepY[u][1];
            tex[2] += span->texStepY[u][2];
            tex[3] += span->texStepY[u][3];
         }
      }
   }

   /* init condition codes */
   dMachine->CondCodes[0] = COND_EQ;
   dMachine->CondCodes[1] = COND_EQ;
   dMachine->CondCodes[2] = COND_EQ;
   dMachine->CondCodes[3] = COND_EQ;
}


/**
 * Execute the given vertex program.
 * NOTE: we do everything in single-precision floating point; we don't
 * currently observe the single/half/fixed-precision qualifiers.
 * \param ctx - rendering context
 * \param program - the fragment program to execute
 * \param machine - machine state (register file)
 * \param maxInst - max number of instructions to execute
 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
 */
static GLboolean
execute_program( GLcontext *ctx,
                 const struct fragment_program *program, GLuint maxInst,
                 struct fp_machine *machine, const struct sw_span *span,
                 GLuint column )
{
   GLuint pc;

#if DEBUG_FRAG
   printf("execute fragment program --------------------\n");
#endif

   for (pc = 0; pc < maxInst; pc++) {
      const struct fp_instruction *inst = program->Instructions + pc;

      if (ctx->FragmentProgram.CallbackEnabled &&
          ctx->FragmentProgram.Callback) {
         ctx->FragmentProgram.CurrentPosition = inst->StringPos;
         ctx->FragmentProgram.Callback(program->Base.Target,
                                       ctx->FragmentProgram.CallbackData);
      }

      switch (inst->Opcode) {
         case FP_OPCODE_ABS:
            {
               GLfloat a[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               result[0] = FABSF(a[0]);
               result[1] = FABSF(a[1]);
               result[2] = FABSF(a[2]);
               result[3] = FABSF(a[3]);
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_ADD:
            {
               GLfloat a[4], b[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
               result[0] = a[0] + b[0];
               result[1] = a[1] + b[1];
               result[2] = a[2] + b[2];
               result[3] = a[3] + b[3];
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_CMP:
            {
               GLfloat a[4], b[4], c[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
               fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
               result[0] = a[0] < 0.0F ? b[0] : c[0];
               result[1] = a[1] < 0.0F ? b[1] : c[1];
               result[2] = a[2] < 0.0F ? b[2] : c[2];
               result[3] = a[3] < 0.0F ? b[3] : c[3];
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_COS:
            {
               GLfloat a[4], result[4];
               fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
               result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_DDX: /* Partial derivative with respect to X */
            {
               GLfloat a[4], aNext[4], result[4];
               struct fp_machine dMachine;
               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
                                        column, result)) {
                  /* This is tricky.  Make a copy of the current machine state,
                   * increment the input registers by the dx or dy partial
                   * derivatives, then re-execute the program up to the
                   * preceeding instruction, then fetch the source register.
                   * Finally, find the difference in the register values for
                   * the original and derivative runs.
                   */
                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
                  init_machine_deriv(ctx, machine, program, span,
                                     'X', &dMachine);
                  execute_program(ctx, program, pc, &dMachine, span, column);
                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
                  result[0] = aNext[0] - a[0];
                  result[1] = aNext[1] - a[1];
                  result[2] = aNext[2] - a[2];
                  result[3] = aNext[3] - a[3];
               }
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
            {
               GLfloat a[4], aNext[4], result[4];
               struct fp_machine dMachine;
               if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
                                        column, result)) {
                  init_machine_deriv(ctx, machine, program, span,
                                     'Y', &dMachine);
                  fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
                  execute_program(ctx, program, pc, &dMachine, span, column);
                  fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
                  result[0] = aNext[0] - a[0];
                  result[1] = aNext[1] - a[1];
                  result[2] = aNext[2] - a[2];
                  result[3] = aNext[3] - a[3];
               }
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_DP3:
            {
               GLfloat a[4], b[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
               result[0] = result[1] = result[2] = result[3] = 
                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
               store_vector4( inst, machine, result );
#if DEBUG_FRAG
               printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
                      result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
#endif
            }
            break;
         case FP_OPCODE_DP4:
            {
               GLfloat a[4], b[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
               result[0] = result[1] = result[2] = result[3] = 
                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
               store_vector4( inst, machine, result );
#if DEBUG_FRAG
               printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
                      result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
#endif
            }
            break;
         case FP_OPCODE_DPH:
            {
               GLfloat a[4], b[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
               result[0] = result[1] = result[2] = result[3] = 
                  a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
               store_vector4( inst, machine, result );
            }
            break;
         case FP_OPCODE_DST: /* Distance vector */
            {
               GLfloat a[4], b[4], result[4];
               fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
               fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
               result[0] = 1.0F;
               result[1] = a[1] * b[1];
               result[2] = a[2];
               result[3] = b[3];
               store_vector4( inst, machine, result );
💿 文件大小 34543 K
👤 上传用户 ybsscauc
📂 所属分类操作系统开发
🏷️ 相关标签

#winNT #LIUX #操作系统 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -