⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vp3dsp_mmx.c

📁 arm平台下的H264编码和解码源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
    pmullw_m2r(*(dequant_matrix + 12), r3);    psrlq_i2r(16, r4);    movq_r2m(r7, *(output_data + 8));    movq_r2r(r4, r5);    movq_r2r(r0, r7);    psrlq_i2r(16, r4);    psrlq_i2r(48, r7);    movq_r2r(r2, r6);    pand_r2r(r2, r5);    pand_r2r(r4, r6);    movq_r2m(r7, *(output_data + 40));    pxor_r2r(r6, r4);    psrlq_i2r(32, r1);    por_r2r(r5, r4);    movq_m2r(*M(3), r7);    pand_r2r(r2, r1);    movq_m2r(*(input_data + 24), r5);    psllq_i2r(16, r0);    pmullw_m2r(*(dequant_matrix + 24), r5);    pand_r2r(r0, r7);    movq_r2m(r1, *(output_data + 32));    por_r2r(r4, r7);    movq_r2r(r3, r4);    pand_r2r(r2, r3);    movq_m2r(*M(2), r1);    psllq_i2r(32, r3);    por_r2r(r3, r7);    movq_r2r(r5, r3);    psllq_i2r(48, r3);    pand_r2r(r0, r1);    movq_r2m(r7, *(output_data + 16));    por_r2r(r3, r6);    movq_m2r(*M(1), r7);    por_r2r(r1, r6);    movq_m2r(*(input_data + 28), r1);    pand_r2r(r4, r7);    pmullw_m2r(*(dequant_matrix + 28), r1);    por_r2r(r6, r7);    pand_m2r(*M(1), r0);    psrlq_i2r(32, r4);    movq_r2m(r7, *(output_data + 24));    movq_r2r(r4, r6);    movq_m2r(*M(3), r7);    pand_r2r(r2, r4);    movq_m2r(*M(1), r3);    pand_r2r(r1, r7);    pand_r2r(r5, r3);    por_r2r(r4, r0);    psllq_i2r(16, r3);    por_r2r(r0, r7);    movq_m2r(*M(2), r4);    por_r2r(r3, r7);    movq_m2r(*(input_data + 40), r0);    movq_r2r(r4, r3);    pmullw_m2r(*(dequant_matrix + 40), r0);    pand_r2r(r5, r4);    movq_r2m(r7, *(output_data + 4));    por_r2r(r4, r6);    movq_r2r(r3, r4);    psrlq_i2r(16, r6);    movq_r2r(r0, r7);    pand_r2r(r1, r4);    psllq_i2r(48, r7);    por_r2r(r4, r6);    movq_m2r(*(input_data + 44), r4);    por_r2r(r6, r7);    pmullw_m2r(*(dequant_matrix + 44), r4);    psrlq_i2r(16, r3);    movq_r2m(r7, *(output_data + 12));    pand_r2r(r1, r3);    psrlq_i2r(48, r5);    pand_r2r(r2, r1);    movq_m2r(*(input_data + 52), r6);    por_r2r(r3, r5);    pmullw_m2r(*(input_data + 52), r6);    psrlq_i2r(16, r0);    movq_r2r(r4, r7);    movq_r2r(r2, r3);    psllq_i2r(48, r7);    pand_r2r(r0, r3);    pxor_r2r(r3, r0);    psllq_i2r(32, r3);    por_r2r(r5, r7);    movq_r2r(r6, r5);    pand_m2r(*M(1), r6);    por_r2r(r3, r7);    psllq_i2r(32, r6);    por_r2r(r1, r0);    movq_r2m(r7, *(output_data + 20));    por_r2r(r6, r0);    movq_m2r(*(input_data + 60), r7);    movq_r2r(r5, r6);    pmullw_m2r(*(input_data + 60), r7);    psrlq_i2r(32, r5);    pand_r2r(r2, r6);    movq_r2r(r5, r1);    movq_r2m(r0, *(output_data + 28));    pand_r2r(r2, r1);    movq_m2r(*(input_data + 56), r0);    movq_r2r(r7, r3);    pmullw_m2r(*(dequant_matrix + 56), r0);    psllq_i2r(16, r3);    pand_m2r(*M(3), r7);    pxor_r2r(r1, r5);    por_r2r(r5, r6);    movq_r2r(r3, r5);    pand_m2r(*M(3), r5);    por_r2r(r1, r7);    movq_m2r(*(input_data + 48), r1);    pxor_r2r(r5, r3);    pmullw_m2r(*(dequant_matrix + 48), r1);    por_r2r(r3, r7);    por_r2r(r5, r6);    movq_r2r(r0, r5);    movq_r2m(r7, *(output_data + 60));    psrlq_i2r(16, r5);    pand_m2r(*M(2), r5);    movq_r2r(r0, r7);    por_r2r(r5, r6);    pand_r2r(r2, r0);    pxor_r2r(r0, r7);    psllq_i2r(32, r0);    movq_r2m(r6, *(output_data + 52));    psrlq_i2r(16, r4);    movq_m2r(*(input_data + 36), r5);    psllq_i2r(16, r7);    pmullw_m2r(*(dequant_matrix + 36), r5);    movq_r2r(r7, r6);    movq_m2r(*M(2), r3);    psllq_i2r(16, r6);    pand_m2r(*M(3), r7);    pand_r2r(r1, r3);    por_r2r(r0, r7);    movq_r2r(r1, r0);    pand_m2r(*M(3), r1);    por_r2r(r3, r6);    movq_r2r(r4, r3);    psrlq_i2r(32, r1);    pand_r2r(r2, r3);    por_r2r(r1, r7);    por_r2r(r3, r7);    movq_r2r(r4, r3);    pand_m2r(*M(1), r3);    movq_r2r(r5, r1);    movq_r2m(r7, *(output_data + 44));    psrlq_i2r(48, r5);    movq_m2r(*(input_data + 32), r7);    por_r2r(r3, r6);    pmullw_m2r(*(dequant_matrix + 32), r7);    por_r2r(r5, r6);    pand_m2r(*M(2), r4);    psllq_i2r(32, r0);    movq_r2m(r6, *(output_data + 36));    movq_r2r(r0, r6);    pand_m2r(*M(3), r0);    psllq_i2r(16, r6);    movq_m2r(*(input_data + 20), r5);    movq_r2r(r1, r3);    pmullw_m2r(*(dequant_matrix + 40), r5);    psrlq_i2r(16, r1);    pand_m2r(*M(1), r1);    por_r2r(r4, r0);    pand_r2r(r7, r2);    por_r2r(r1, r0);    por_r2r(r2, r0);    psllq_i2r(16, r3);    movq_r2r(r3, r4);    movq_r2r(r5, r2);    movq_r2m(r0, *(output_data + 56));    psrlq_i2r(48, r2);    pand_m2r(*M(2), r4);    por_r2r(r2, r6);    movq_m2r(*M(1), r2);    por_r2r(r4, r6);    pand_r2r(r7, r2);    psllq_i2r(32, r3);    por_m2r(*(output_data + 40), r3);    por_r2r(r2, r6);    movq_m2r(*M(3), r2);    psllq_i2r(16, r5);    movq_r2m(r6, *(output_data + 48));    pand_r2r(r5, r2);    movq_m2r(*M(2), r6);    pxor_r2r(r2, r5);    pand_r2r(r7, r6);    psrlq_i2r(32, r2);    pand_m2r(*M(3), r7);    por_r2r(r2, r3);    por_m2r(*(output_data + 32), r7);    por_r2r(r3, r6);    por_r2r(r5, r7);    movq_r2m(r6, *(output_data + 40));    movq_r2m(r7, *(output_data + 32));#undef M    /* at this point, function has completed dequantization + dezigzag +      * partial transposition; now do the idct itself */#define I(K) (output_data + K * 8)#define J(K) (output_data + ((K - 4) * 8) + 4)    RowIDCT();    Transpose();#undef I#undef J#define I(K) (output_data + (K * 8) + 32)#define J(K) (output_data + ((K - 4) * 8) + 36)    RowIDCT();    Transpose();#undef I#undef J#define I(K) (output_data + K * 8)#define J(K) (output_data + K * 8)    ColumnIDCT();#undef I#undef J#define I(K) (output_data + (K * 8) + 4)#define J(K) (output_data + (K * 8) + 4)    ColumnIDCT();#undef I#undef J}void vp3_idct_put_mmx(int16_t *input_data, int16_t *dequant_matrix,    int coeff_count, uint8_t *dest, int stride){    int16_t transformed_data[64];    int16_t *op;    int i, j;    uint8_t vector128[8] = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };    vp3_idct_mmx(input_data, dequant_matrix, transformed_data);    /* place in final output */    op = transformed_data;    movq_m2r(*vector128, mm0);    for (i = 0; i < 8; i++) {#if 1        for (j = 0; j < 8; j++) {            if (*op < -128)                *dest = 0;            else if (*op > 127)                *dest = 255;            else                *dest = (uint8_t)(*op + 128);            op++;            dest++;        }        dest += (stride - 8);#else/* prototype optimization */        pxor_r2r(mm1, mm1);        packsswb_m2r(*(op + 4), mm1);        movq_r2r(mm1, mm2);        psrlq_i2r(32, mm2);        packsswb_m2r(*(op + 0), mm1);        op += 8;        por_r2r(mm2, mm1);        paddb_r2r(mm0, mm1);        movq_r2m(mm1, *dest);        dest += stride;#endif    }    /* be a good MMX citizen */    emms();}void vp3_idct_add_mmx(int16_t *input_data, int16_t *dequant_matrix,    int coeff_count, uint8_t *dest, int stride){    int16_t transformed_data[64];    int16_t *op;    int i, j;    int16_t sample;    vp3_idct_mmx(input_data, dequant_matrix, transformed_data);    /* place in final output */    op = transformed_data;    for (i = 0; i < 8; i++) {        for (j = 0; j < 8; j++) {            sample = *dest + *op;            if (sample < 0)                *dest = 0;            else if (sample > 255)                *dest = 255;            else                *dest = (uint8_t)(sample & 0xFF);            op++;            dest++;        }        dest += (stride - 8);    }    /* be a good MMX citizen */    emms();}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -