📄 texturing.cpp

📁 Android 一些工具
💻 CPP
📖 第 1 页 / 共 3 页
字号:
    context_t const* c = mBuilderContext.c;    const needs_t& needs = mBuilderContext.needs;    int Rctx = mBuilderContext.Rctx;    // We don't have a way to spill registers automatically    // spill depth and AA regs, when we know we may have to.    // build the spill list...    uint32_t spill_list = 0;    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {        const texture_unit_t& tmu = mTextureMachine.tmu[i];        if (tmu.format_idx == 0)            continue;        if (tmu.linear) {            // we may run out of register if we have linear filtering            // at 1 or 4 bytes / pixel on any texture unit.            if (tmu.format.size == 1) {                // if depth and AA enabled, we'll run out of 1 register                if (parts.z.reg > 0 && parts.covPtr.reg > 0)                    spill_list |= 1<<parts.covPtr.reg;            }            if (tmu.format.size == 4) {                // if depth or AA enabled, we'll run out of 1 or 2 registers                if (parts.z.reg > 0)                    spill_list |= 1<<parts.z.reg;                if (parts.covPtr.reg > 0)                       spill_list |= 1<<parts.covPtr.reg;            }        }    }    Spill spill(registerFile(), *this, spill_list);    const bool multiTexture = mTextureMachine.activeUnits > 1;    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {        const texture_unit_t& tmu = mTextureMachine.tmu[i];        if (tmu.format_idx == 0)            continue;        pointer_t& txPtr = parts.coords[i].ptr;        pixel_t& texel = parts.texel[i];                    // repeat...        if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&            (tmu.twrap == GGL_NEEDS_WRAP_11))        { // 1:1 textures            comment("fetch texel");            texel.setTo(regs.obtain(), &tmu.format);            load(txPtr, texel, WRITE_BACK);        } else {            Scratch scratches(registerFile());            reg_t& s = parts.coords[i].s;            reg_t& t = parts.coords[i].t;            if ((mOptLevel&1)==0) {                comment("reload s/t (multitexture or linear filtering)");                s.reg = scratches.obtain();                t.reg = scratches.obtain();                CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]);                CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]);            }            comment("compute repeat/clamp");            int u       = scratches.obtain();            int v       = scratches.obtain();            int width   = scratches.obtain();            int height  = scratches.obtain();            int U = 0;            int V = 0;            CONTEXT_LOAD(width,  generated_vars.texture[i].width);            CONTEXT_LOAD(height, generated_vars.texture[i].height);            int FRAC_BITS = 0;            if (tmu.linear) {                // linear interpolation                if (tmu.format.size == 1) {                    // for 8-bits textures, we can afford                    // 7 bits of fractional precision at no                    // additional cost (we can't do 8 bits                    // because filter8 uses signed 16 bits muls)                    FRAC_BITS = 7;                } else if (tmu.format.size == 2) {                    // filter16() is internally limited to 4 bits, so:                    // FRAC_BITS=2 generates less instructions,                    // FRAC_BITS=3,4,5 creates unpleasant artifacts,                    // FRAC_BITS=6+ looks good                    FRAC_BITS = 6;                } else if (tmu.format.size == 4) {                    // filter32() is internally limited to 8 bits, so:                    // FRAC_BITS=4 looks good                    // FRAC_BITS=5+ looks better, but generates 3 extra ipp                    FRAC_BITS = 6;                } else {                    // for all other cases we use 4 bits.                    FRAC_BITS = 4;                }            }            wrapping(u, s.reg, width,  tmu.swrap, FRAC_BITS);            wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS);            if (tmu.linear) {                comment("compute linear filtering offsets");                // pixel size scale                const int shift = 31 - gglClz(tmu.format.size);                U = scratches.obtain();                V = scratches.obtain();                // sample the texel center                SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1)));                SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1)));                // get the fractionnal part of U,V                AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1));                AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1));                // compute width-1 and height-1                SUB(AL, 0, width,  width,  imm(1));                SUB(AL, 0, height, height, imm(1));                // get the integer part of U,V and clamp/wrap                // and compute offset to the next texel                if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) {                    // u has already been REPEATed                    MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));                    MOV(MI, 0, u, width);                                        CMP(AL, u, width);                    MOV(LT, 0, width, imm(1 << shift));                    if (shift)                        MOV(GE, 0, width, reg_imm(width, LSL, shift));                    RSB(GE, 0, width, width, imm(0));                } else {                    // u has not been CLAMPed yet                    // algorithm:                    // if ((u>>4) >= width)                    //      u = width<<4                    //      width = 0                    // else                    //      width = 1<<shift                    // u = u>>4; // get integer part                    // if (u<0)                    //      u = 0                    //      width = 0                    // generated_vars.rt = width                                        CMP(AL, width, reg_imm(u, ASR, FRAC_BITS));                    MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS));                    MOV(LE, 0, width, imm(0));                    MOV(GT, 0, width, imm(1 << shift));                    MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS));                    MOV(MI, 0, u, imm(0));                    MOV(MI, 0, width, imm(0));                }                CONTEXT_STORE(width, generated_vars.rt);                const int stride = width;                CONTEXT_LOAD(stride, generated_vars.texture[i].stride);                if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) {                    // v has already been REPEATed                    MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));                    MOV(MI, 0, v, height);                    CMP(AL, v, height);                    MOV(LT, 0, height, imm(1 << shift));                    if (shift)                        MOV(GE, 0, height, reg_imm(height, LSL, shift));                    RSB(GE, 0, height, height, imm(0));                    MUL(AL, 0, height, stride, height);                } else {                    // u has not been CLAMPed yet                    CMP(AL, height, reg_imm(v, ASR, FRAC_BITS));                    MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS));                    MOV(LE, 0, height, imm(0));                    if (shift) {                        MOV(GT, 0, height, reg_imm(stride, LSL, shift));                    } else {                        MOV(GT, 0, height, stride);                    }                    MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS));                    MOV(MI, 0, v, imm(0));                    MOV(MI, 0, height, imm(0));                }                CONTEXT_STORE(height, generated_vars.lb);            }                scratches.recycle(width);            scratches.recycle(height);            // iterate texture coordinates...            comment("iterate s,t");            int dsdx = scratches.obtain();            int dtdx = scratches.obtain();            CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);            CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);            ADD(AL, 0, s.reg, s.reg, dsdx);            ADD(AL, 0, t.reg, t.reg, dtdx);            if ((mOptLevel&1)==0) {                CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]);                CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]);                scratches.recycle(s.reg);                scratches.recycle(t.reg);            }            scratches.recycle(dsdx);            scratches.recycle(dtdx);            // merge base & offset...            comment("merge base & offset");            texel.setTo(regs.obtain(), &tmu.format);            txPtr.setTo(texel.reg, tmu.bits);            int stride = scratches.obtain();            CONTEXT_LOAD(stride,    generated_vars.texture[i].stride);            CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data);            SMLABB(AL, u, v, stride, u);    // u+v*stride             base_offset(txPtr, txPtr, u);            // load texel            if (!tmu.linear) {                comment("fetch texel");                load(txPtr, texel, 0);            } else {                // recycle registers we don't need anymore                scratches.recycle(u);                scratches.recycle(v);                scratches.recycle(stride);                comment("fetch texel, bilinear");                switch (tmu.format.size) {                case 1:  filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;                case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;                case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;                case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break;                }            }                    }    }}void GGLAssembler::build_iterate_texture_coordinates(    const fragment_parts_t& parts){    const bool multiTexture = mTextureMachine.activeUnits > 1;    for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) {        const texture_unit_t& tmu = mTextureMachine.tmu[i];        if (tmu.format_idx == 0)            continue;        if ((tmu.swrap == GGL_NEEDS_WRAP_11) &&            (tmu.twrap == GGL_NEEDS_WRAP_11))        { // 1:1 textures            const pointer_t& txPtr = parts.coords[i].ptr;            ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3));        } else {            Scratch scratches(registerFile());            int s = parts.coords[i].s.reg;            int t = parts.coords[i].t.reg;            if ((mOptLevel&1)==0) {                s = scratches.obtain();                t = scratches.obtain();                CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]);                CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]);            }            int dsdx = scratches.obtain();            int dtdx = scratches.obtain();            CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx);            CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx);            ADD(AL, 0, s, s, dsdx);            ADD(AL, 0, t, t, dtdx);            if ((mOptLevel&1)==0) {                CONTEXT_STORE(s, generated_vars.texture[i].spill[0]);                CONTEXT_STORE(t, generated_vars.texture[i].spill[1]);            }        }    }}void GGLAssembler::filter8(        const fragment_parts_t& parts,        pixel_t& texel, const texture_unit_t& tmu,        int U, int V, pointer_t& txPtr,        int FRAC_BITS){    if (tmu.format.components != GGL_ALPHA &&        tmu.format.components != GGL_LUMINANCE)    {        // this is a packed format, and we don't support        // linear filtering (it's probably RGB 332)        // Should not happen with OpenGL|ES        LDRB(AL, texel.reg, txPtr.reg);        return;    }    // ------------------------    // about ~22 cycles / pixel    Scratch scratches(registerFile());    int pixel= scratches.obtain();    int d    = scratches.obtain();    int u    = scratches.obtain();    int k    = scratches.obtain();    int rt   = scratches.obtain();    int lb   = scratches.obtain();    // RB -> U * V    CONTEXT_LOAD(rt, generated_vars.rt);    CONTEXT_LOAD(lb, generated_vars.lb);    int offset = pixel;    ADD(AL, 0, offset, lb, rt);    LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset));    SMULBB(AL, u, U, V);    SMULBB(AL, d, pixel, u);    RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2)));        // LB -> (1-U) * V    RSB(AL, 0, U, U, imm(1<<FRAC_BITS));    LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb));    SMULBB(AL, u, U, V);    SMLABB(AL, d, pixel, u, d);    SUB(AL, 0, k, k, u);        // LT -> (1-U)*(1-V)    RSB(AL, 0, V, V, imm(1<<FRAC_BITS));    LDRB(AL, pixel, txPtr.reg);    SMULBB(AL, u, U, V);    SMLABB(AL, d, pixel, u, d);    // RT -> U*(1-V)    LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt));    SUB(AL, 0, u, k, u);    SMLABB(AL, texel.reg, pixel, u, d);        for (int i=0 ; i<4 ; i++) {        if (!texel.format.c[i].h) continue;        texel.format.c[i].h = FRAC_BITS*2+8;        texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough    }    texel.format.size = 4;    texel.format.bitsPerPixel = 32;    texel.flags |= CLEAR_LO;}void GGLAssembler::filter16(        const fragment_parts_t& parts,        pixel_t& texel, const texture_unit_t& tmu,        int U, int V, pointer_t& txPtr,        int FRAC_BITS){        // compute the mask    // XXX: it would be nice if the mask below could be computed    // automatically.    uint32_t mask = 0;    int shift = 0;    int prec = 0;    switch (tmu.format_idx) {        case GGL_PIXEL_FORMAT_RGB_565:            // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb            // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb            mask = 0x07E0F81F;            shift = 16;            prec = 5;            break;        case GGL_PIXEL_FORMAT_RGBA_4444:            // 0000,1111,0000,1111 | 0000,1111,0000,1111            mask = 0x0F0F0F0F;            shift = 12;            prec = 4;            break;        case GGL_PIXEL_FORMAT_LA_88:            // 0000,0000,1111,1111 | 0000,0000,1111,1111            // AALL -> 00AA | 00LL            mask = 0x00FF00FF;            shift = 8;            prec = 8;            break;        default:            // unsupported format, do something sensical...            LOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx);            LDRH(AL, texel.reg, txPtr.reg);            return;    }    const int adjust = FRAC_BITS*2 - prec;    const int round  = 0;    // update the texel format    texel.format.size = 4;    texel.format.bitsPerPixel = 32;    texel.flags |= CLEAR_HI|CLEAR_LO;    for (int i=0 ; i<4 ; i++) {        if (!texel.format.c[i].h) continue;        const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift;        texel.format.c[i].h = tmu.format.c[i].h + offset + prec;        texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec);    }    // ------------------------    // about ~40 cycles / pixel    Scratch scratches(registerFile());    int pixel= scratches.obtain();    int d    = scratches.obtain();    int u    = scratches.obtain();    int k    = scratches.obtain();    // RB -> U * V
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -