📄 texturing.cpp
字号:
int offset = pixel; CONTEXT_LOAD(offset, generated_vars.rt); CONTEXT_LOAD(u, generated_vars.lb); ADD(AL, 0, offset, offset, u); LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); SMULBB(AL, u, U, V); ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); build_and_immediate(pixel, pixel, mask, 32); if (adjust) { if (round) ADD(AL, 0, u, u, imm(1<<(adjust-1))); MOV(AL, 0, u, reg_imm(u, LSR, adjust)); } MUL(AL, 0, d, pixel, u); RSB(AL, 0, k, u, imm(1<<prec)); // LB -> (1-U) * V CONTEXT_LOAD(offset, generated_vars.lb); RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); SMULBB(AL, u, U, V); ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); build_and_immediate(pixel, pixel, mask, 32); if (adjust) { if (round) ADD(AL, 0, u, u, imm(1<<(adjust-1))); MOV(AL, 0, u, reg_imm(u, LSR, adjust)); } MLA(AL, 0, d, pixel, u, d); SUB(AL, 0, k, k, u); // LT -> (1-U)*(1-V) RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); LDRH(AL, pixel, txPtr.reg); SMULBB(AL, u, U, V); ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); build_and_immediate(pixel, pixel, mask, 32); if (adjust) { if (round) ADD(AL, 0, u, u, imm(1<<(adjust-1))); MOV(AL, 0, u, reg_imm(u, LSR, adjust)); } MLA(AL, 0, d, pixel, u, d); // RT -> U*(1-V) CONTEXT_LOAD(offset, generated_vars.rt); LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); SUB(AL, 0, u, k, u); ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); build_and_immediate(pixel, pixel, mask, 32); MLA(AL, 0, texel.reg, pixel, u, d);}void GGLAssembler::filter24( const fragment_parts_t& parts, pixel_t& texel, const texture_unit_t& tmu, int U, int V, pointer_t& txPtr, int FRAC_BITS){ // not supported yet (currently disabled) load(txPtr, texel, 0);}void GGLAssembler::filter32( const fragment_parts_t& parts, pixel_t& texel, const texture_unit_t& tmu, int U, int V, pointer_t& txPtr, int FRAC_BITS){ const int adjust = FRAC_BITS*2 - 8; const int round = 0; // ------------------------ // about ~38 cycles / pixel Scratch scratches(registerFile()); int pixel= scratches.obtain(); int dh = scratches.obtain(); int u = scratches.obtain(); int k = scratches.obtain(); int temp = scratches.obtain(); int dl = scratches.obtain(); int mask = scratches.obtain(); MOV(AL, 0, mask, imm(0xFF)); ORR(AL, 0, mask, mask, imm(0xFF0000)); // RB -> U * V int offset = pixel; CONTEXT_LOAD(offset, generated_vars.rt); CONTEXT_LOAD(u, generated_vars.lb); ADD(AL, 0, offset, offset, u); LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); SMULBB(AL, u, U, V); AND(AL, 0, temp, mask, pixel); if (adjust) { if (round) ADD(AL, 0, u, u, imm(1<<(adjust-1))); MOV(AL, 0, u, reg_imm(u, LSR, adjust)); } MUL(AL, 0, dh, temp, u); AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); MUL(AL, 0, dl, temp, u); RSB(AL, 0, k, u, imm(0x100)); // LB -> (1-U) * V CONTEXT_LOAD(offset, generated_vars.lb); RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); SMULBB(AL, u, U, V); AND(AL, 0, temp, mask, pixel); if (adjust) { if (round) ADD(AL, 0, u, u, imm(1<<(adjust-1))); MOV(AL, 0, u, reg_imm(u, LSR, adjust)); } MLA(AL, 0, dh, temp, u, dh); AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); MLA(AL, 0, dl, temp, u, dl); SUB(AL, 0, k, k, u); // LT -> (1-U)*(1-V) RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); LDR(AL, pixel, txPtr.reg); SMULBB(AL, u, U, V); AND(AL, 0, temp, mask, pixel); if (adjust) { if (round) ADD(AL, 0, u, u, imm(1<<(adjust-1))); MOV(AL, 0, u, reg_imm(u, LSR, adjust)); } MLA(AL, 0, dh, temp, u, dh); AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); MLA(AL, 0, dl, temp, u, dl); // RT -> U*(1-V) CONTEXT_LOAD(offset, generated_vars.rt); LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); SUB(AL, 0, u, k, u); AND(AL, 0, temp, mask, pixel); MLA(AL, 0, dh, temp, u, dh); AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); MLA(AL, 0, dl, temp, u, dl); AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); ORR(AL, 0, texel.reg, dh, dl);}void GGLAssembler::build_texture_environment( component_t& fragment, const fragment_parts_t& parts, int component, Scratch& regs){ const uint32_t component_mask = 1<<component; const bool multiTexture = mTextureMachine.activeUnits > 1; for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { texture_unit_t& tmu = mTextureMachine.tmu[i]; if (tmu.mask & component_mask) { // replace or modulate with this texture if ((tmu.replaced & component_mask) == 0) { // not replaced by a later tmu... Scratch scratches(registerFile()); pixel_t texel(parts.texel[i]); if (multiTexture && tmu.swrap == GGL_NEEDS_WRAP_11 && tmu.twrap == GGL_NEEDS_WRAP_11) { texel.reg = scratches.obtain(); texel.flags |= CORRUPTIBLE; comment("fetch texel (multitexture 1:1)"); load(parts.coords[i].ptr, texel, WRITE_BACK); } component_t incoming(fragment); modify(fragment, regs); switch (tmu.env) { case GGL_REPLACE: extract(fragment, texel, component); break; case GGL_MODULATE: modulate(fragment, incoming, texel, component); break; case GGL_DECAL: decal(fragment, incoming, texel, component); break; case GGL_BLEND: blend(fragment, incoming, texel, component, i); break; } } } }}// ---------------------------------------------------------------------------void GGLAssembler::wrapping( int d, int coord, int size, int tx_wrap, int tx_linear){ // notes: // if tx_linear is set, we need 4 extra bits of precision on the result // SMULL/UMULL is 3 cycles Scratch scratches(registerFile()); int c = coord; if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { // UMULL takes 4 cycles (interlocked), and we can get away with // 2 cycles using SMULWB, but we're loosing 16 bits of precision // out of 32 (this is not a problem because the iterator keeps // its full precision) // UMULL(AL, 0, size, d, c, size); // note: we can't use SMULTB because it's signed. MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); SMULWB(AL, d, d, size); } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { if (tx_linear) { // 1 cycle MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); } else { // 4 cycles (common case) MOV(AL, 0, d, reg_imm(coord, ASR, 16)); BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); CMP(AL, d, size); SUB(GE, 0, d, size, imm(1)); } }}// ---------------------------------------------------------------------------void GGLAssembler::modulate( component_t& dest, const component_t& incoming, const pixel_t& incomingTexel, int component){ Scratch locals(registerFile()); integer_t texel(locals.obtain(), 32, CORRUPTIBLE); extract(texel, incomingTexel, component); const int Nt = texel.size(); // Nt should always be less than 10 bits because it comes // from the TMU. int Ni = incoming.size(); // Ni could be big because it comes from previous MODULATEs if (Nt == 1) { // texel acts as a bit-mask // dest = incoming & ((texel << incoming.h)-texel) RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); AND(AL, 0, dest.reg, dest.reg, incoming.reg); dest.l = incoming.l; dest.h = incoming.h; dest.flags |= (incoming.flags & CLEAR_LO); } else if (Ni == 1) { MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); dest.l = 0; dest.h = Nt; } else { int inReg = incoming.reg; int shift = incoming.l; if ((Nt + Ni) > 32) { // we will overflow, reduce the precision of Ni to 8 bits // (Note Nt cannot be more than 10 bits which happens with // 565 textures and GGL_LINEAR) shift += Ni-8; Ni = 8; } // modulate by the component with the lowest precision if (Nt >= Ni) { if (shift) { // XXX: we should be able to avoid this shift // when shift==16 && Nt<16 && Ni<16, in which // we could use SMULBT below. MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); inReg = dest.reg; shift = 0; } // operation: (Cf*Ct)/((1<<Ni)-1) // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni // this operation doesn't change texel's size ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); else MUL(AL, 0, dest.reg, texel.reg, dest.reg); dest.l = Ni; dest.h = Nt + Ni; } else { if (shift && (shift != 16)) { // if shift==16, we can use 16-bits mul instructions later MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); inReg = dest.reg; shift = 0; } // operation: (Cf*Ct)/((1<<Nt)-1) // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt // this operation doesn't change incoming's size Scratch scratches(registerFile()); int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; if (t == inReg) t = scratches.obtain(); ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); if (Nt<16 && Ni<16) { if (shift==16) SMULBT(AL, dest.reg, t, inReg); else SMULBB(AL, dest.reg, t, inReg); } else MUL(AL, 0, dest.reg, t, inReg); dest.l = Nt; dest.h = Nt + Ni; } // low bits are not valid dest.flags |= CLEAR_LO; // no need to keep more than 8 bits/component if (dest.size() > 8) dest.l = dest.h-8; }}void GGLAssembler::decal( component_t& dest, const component_t& incoming, const pixel_t& incomingTexel, int component){ // RGBA: // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At // Av = Af Scratch locals(registerFile()); integer_t texel(locals.obtain(), 32, CORRUPTIBLE); integer_t factor(locals.obtain(), 32, CORRUPTIBLE); extract(texel, incomingTexel, component); extract(factor, incomingTexel, GGLFormat::ALPHA); // no need to keep more than 8-bits for decal int Ni = incoming.size(); int shift = incoming.l; if (Ni > 8) { shift += Ni-8; Ni = 8; } integer_t incomingNorm(incoming.reg, Ni, incoming.flags); if (shift) { MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); incomingNorm.reg = dest.reg; incomingNorm.flags |= CORRUPTIBLE; } ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); build_blendOneMinusFF(dest, factor, incomingNorm, texel);}void GGLAssembler::blend( component_t& dest, const component_t& incoming, const pixel_t& incomingTexel, int component, int tmu){ // RGBA: // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct // Av = At*Af if (component == GGLFormat::ALPHA) { modulate(dest, incoming, incomingTexel, component); return; } Scratch locals(registerFile()); integer_t color(locals.obtain(), 8, CORRUPTIBLE); integer_t factor(locals.obtain(), 32, CORRUPTIBLE); LDRB(AL, color.reg, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); extract(factor, incomingTexel, component); // no need to keep more than 8-bits for blend int Ni = incoming.size(); int shift = incoming.l; if (Ni > 8) { shift += Ni-8; Ni = 8; } integer_t incomingNorm(incoming.reg, Ni, incoming.flags); if (shift) { MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); incomingNorm.reg = dest.reg; incomingNorm.flags |= CORRUPTIBLE; } ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); build_blendOneMinusFF(dest, factor, incomingNorm, color);}// ----------------------------------------------------------------------------}; // namespace android
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -