⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 internal_bfin.s

📁 ffmpeg的完整源代码和作者自己写的文档。不但有在Linux的工程哦
💻 S
📖 第 1 页 / 共 2 页
字号:
        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask        r2 = r2 << 2 (v);        r2 = r2 & r5;        r3 = r3 | r2;        [p1++]=r3                                          || r1=[i1++]; // cy        /* Y' = y*cy */        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv        /* R = Y+ crv*(Cr-128) */        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu        r2 = r2 >> 3 (v);        r3 = r2 & r5;        /* B = Y+ cbu*(Cb-128) */        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu        r2 = r2 << 7 (v);        r2 = r2 & r5;        r3 = r3 | r2;        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask        r2 = byteop3p(r3:2, r1:0)(LO)                      || r0=[i0++];     // 4Y        r2 = r2 << 2 (v)                                   || r1.l=w[i2++];  // 2u        r2 = r2 & r5;        r3 = r3 | r2;        [p1++]=r3                                          || r1.h=w[i3++]; // 2v.L1555:                                                       r2=[i1++]; // oy        l1 = 0;        (r7:4) = [sp++];        unlink;        rts;DEFUN_END(yuv2rgb555_line)DEFUN(yuv2rgb24_line,MEM,   (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)):        link 0;        [--sp] = (r7:4);        p1 = [fp+ARG_OUT];        r3 = [fp+ARG_W];        p2 = p1;        p2 += 3;        i0 = r0;        i2 = r1;        i3 = r2;        r0 = [fp+ARG_COEFF]; // coeff buffer        i1 = r0;        b1 = i1;        l1 = COEFF_LEN;        m0 = COEFF_REL_CY_OFF;        p0 = r3;        r0   = [i0++];         // 2Y        r1.l = w[i2++];        // 2u        r1.h = w[i3++];        // 2v        p0 = p0>>2;        lsetup (.L0888, .L1888) lc0 = p0;        /*           uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv           r0 -- used to load 4ys           r1 -- used to load 2us,2vs           r4 -- y3,y2           r5 -- y1,y0           r6 -- u1,u0           r7 -- v1,v0        */                                                              r2=[i1++]; // oy.L0888:        (r4,r5) = byteop16m (r1:0, r3:2)                   || r3=[i1++]; // oc        (r7,r6) = byteop16m (r1:0, r3:2) (r);        r5 = r5 << 2 (v);               // y1,y0        r4 = r4 << 2 (v);               // y3,y2        r6 = r6 << 2 (v) || r0=[i1++];  // u1,u0, r0=zero        r7 = r7 << 2 (v) || r1=[i1++];  // v1,v0  r1=cy        /* Y' = y*cy */        a1 = r1.h*r5.h, a0 = r1.l*r5.l                     || r1=[i1++]; // crv        /* R = Y+ crv*(Cr-128) */        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);                a1 -= r1.h*r7.l,          a0 -= r1.l*r7.l  || r5=[i1++]; // rmask        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu        r2=r2>>16 || B[p1++]=r2;                     B[p2++]=r2;        /* B = Y+ cbu*(Cb-128) */        r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l);                a1 -= r1.h*r6.l,          a0 -= r1.l*r6.l  || r5=[i1++]; // bmask        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */                a1 += r1.h*r6.l,          a0 += r1.l*r6.l  || r1=[i1++]; // cgv        r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l);        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++m0]; // gmask, oy,cy,zero        r2=r2>>16 || B[p1++]=r2;                     B[p2++]=r2;        r3=r3>>16 || B[p1++]=r3;                     B[p2++]=r3                            || r1=[i1++]; // cy        p1+=3;        p2+=3;        /* Y' = y*cy */        a1 = r1.h*r4.h, a0 = r1.l*r4.l                     || r1=[i1++]; // crv        /* R = Y+ crv*(Cr-128) */        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);                a1 -= r1.h*r7.h,          a0 -= r1.l*r7.h  || r5=[i1++]; // rmask        r2 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cbu        r2=r2>>16 || B[p1++]=r2;        B[p2++]=r2;        /* B = Y+ cbu*(Cb-128) */        r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h);                a1 -= r1.h*r6.h,          a0 -= r1.l*r6.h  || r5=[i1++]; // bmask        r3 = byteop3p(r3:2, r1:0)(LO)                      || r1=[i1++]; // cgu        /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */                a1 += r1.h*r6.h,          a0 += r1.l*r6.h  || r1=[i1++]; // cgv        r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h);        r2 = byteop3p(r3:2, r1:0)(LO)                      || r5=[i1++]; // gmask        r2=r2>>16 || B[p1++]=r2 || r0 = [i0++];    // 4y                     B[p2++]=r2 || r1.l = w[i2++]; // 2u        r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v                     B[p2++]=r3 || r2=[i1++];      // oy        p1+=3;.L1888: p2+=3;        l1 = 0;        (r7:4) = [sp++];        unlink;        rts;DEFUN_END(yuv2rgb24_line)#define ARG_vdst        20#define ARG_width       24#define ARG_height      28#define ARG_lumStride   32#define ARG_chromStride 36#define ARG_srcStride   40DEFUN(uyvytoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,                         long width, long height,                         long lumStride, long chromStride, long srcStride)):        link 0;        [--sp] = (r7:4,p5:4);        p0 = r1;       // Y top even        i2 = r2; // *u        r2 = [fp + ARG_vdst];        i3 = r2; // *v        r1 = [fp + ARG_srcStride];        r2 = r0 + r1;        r1 += -8;  // i0,i1 is pre read need to correct        m0 = r1;        i0 = r0;  // uyvy_T even        i1 = r2;  // uyvy_B odd        p2 = [fp + ARG_lumStride];        p1 = p0 + p2;  // Y bot odd        p5 = [fp + ARG_width];        p4 = [fp + ARG_height];        r0 = p5;        p4 = p4 >> 1;        p5 = p5 >> 2;        r2 = [fp + ARG_chromStride];        r0 = r0 >> 1;        r2 = r2 - r0;        m1 = r2;        /*   I0,I1 - src input line pointers         *   p0,p1 - luma output line pointers         *   I2    - dstU         *   I3    - dstV         */        lsetup (0f, 1f) lc1 = p4;   // H/20:        r0 = [i0++] || r2 = [i1++];          r1 = [i0++] || r3 = [i1++];          r4 = byteop1p(r1:0, r3:2);          r5 = byteop1p(r1:0, r3:2) (r);          lsetup (2f, 3f) lc0 = p5; // W/42:          r0 = r0 >> 8(v);            r1 = r1 >> 8(v);            r2 = r2 >> 8(v);            r3 = r3 >> 8(v);            r0 = bytepack(r0, r1);            r2 = bytepack(r2, r3)         ||  [p0++] = r0;    // yyyy            r6 = pack(r5.l, r4.l)         ||  [p1++] = r2;    // yyyy            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];            r4 = byteop1p(r1:0, r3:2)     ||  w[i2++] = r6.l; // uu3:          r5 = byteop1p(r1:0, r3:2) (r) ||  w[i3++] = r6.h; // vv          i0 += m0;          i1 += m0;          i2 += m1;          i3 += m1;          p0 = p0 + p2;1:        p1 = p1 + p2;        (r7:4,p5:4) = [sp++];        unlink;        rts;DEFUN_END(uyvytoyv12)DEFUN(yuyvtoyv12, mL3,  (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,                         long width, long height,                         long lumStride, long chromStride, long srcStride)):        link 0;        [--sp] = (r7:4,p5:4);        p0 = r1;       // Y top even        i2 = r2; // *u        r2 = [fp + ARG_vdst];        i3 = r2; // *v        r1 = [fp + ARG_srcStride];        r2 = r0 + r1;        r1 += -8;  // i0,i1 is pre read need to correct        m0 = r1;        i0 = r0;  // uyvy_T even        i1 = r2;  // uyvy_B odd        p2 = [fp + ARG_lumStride];        p1 = p0 + p2;  // Y bot odd        p5 = [fp + ARG_width];        p4 = [fp + ARG_height];        r0 = p5;        p4 = p4 >> 1;        p5 = p5 >> 2;        r2 = [fp + ARG_chromStride];        r0 = r0 >> 1;        r2 = r2 - r0;        m1 = r2;        /*   I0,I1 - src input line pointers         *   p0,p1 - luma output line pointers         *   I2    - dstU         *   I3    - dstV         */        lsetup (0f, 1f) lc1 = p4;   // H/20:        r0 = [i0++] || r2 = [i1++];          r1 = [i0++] || r3 = [i1++];          r4 = bytepack(r0, r1);          r5 = bytepack(r2, r3);          lsetup (2f, 3f) lc0 = p5; // W/42:          r0 = r0 >> 8(v) || [p0++] = r4;  // yyyy-even            r1 = r1 >> 8(v) || [p1++] = r5;  // yyyy-odd            r2 = r2 >> 8(v);            r3 = r3 >> 8(v);            r4 = byteop1p(r1:0, r3:2);            r5 = byteop1p(r1:0, r3:2) (r);            r6 = pack(r5.l, r4.l);            r7 = pack(r5.h, r4.h)         ||  r0 = [i0++] || r2 = [i1++];            r6 = bytepack(r6, r7)         ||  r1 = [i0++] || r3 = [i1++];            r4 = bytepack(r0, r1)         ||  w[i2++] = r6.l; // uu3:          r5 = bytepack(r2, r3)         ||  w[i3++] = r6.h; // vv          i0 += m0;          i1 += m0;          i2 += m1;          i3 += m1;          p0 = p0 + p2;1:        p1 = p1 + p2;        (r7:4,p5:4) = [sp++];        unlink;        rts;DEFUN_END(yuyvtoyv12)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -