📄 dsputil_mmx.c
字号:
static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\}\static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[9];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\}\static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## pixels16_mmx(dst, src, stride, 16);\}\\static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\}\\static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\}\\static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\}\\static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\}\\static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\}\\static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\}\static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\}\static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\}\static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\}\static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\}\static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\}\static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\}\static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[17*2];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\}\static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[17*2];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\}\static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[17*2];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\}#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"#define AVG_3DNOW_OP(a,b,temp, size) \"mov" #size " " #b ", " #temp " \n\t"\"pavgusb " #temp ", " #a " \n\t"\"mov" #size " " #a ", " #b " \n\t"#define AVG_MMX2_OP(a,b,temp, size) \"mov" #size " " #b ", " #temp " \n\t"\"pavgb " #temp ", " #a " \n\t"\"mov" #size " " #a ", " #b " \n\t"QPEL_BASE(put_ , ff_pw_16, _ , PUT_OP, PUT_OP)QPEL_BASE(avg_ , ff_pw_16, _ , AVG_MMX2_OP, AVG_3DNOW_OP)QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, 3dnow)QPEL_OP(avg_ , ff_pw_16, _ , AVG_3DNOW_OP, 3dnow)QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)#if 0static void just_return() { return; }#endif#define SET_QPEL_FUNC(postfix1, postfix2) \ c->put_ ## postfix1 = put_ ## postfix2;\ c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ c->avg_ ## postfix1 = avg_ ## postfix2;/* external functions, from idct_mmx.c */void ff_mmx_idct(DCTELEM *block);void ff_mmxext_idct(DCTELEM *block);/* XXX: those functions should be suppressed ASAP when all IDCTs are converted */static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block){ ff_mmx_idct (block); put_pixels_clamped_mmx(block, dest, line_size);}static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block){ ff_mmx_idct (block); add_pixels_clamped_mmx(block, dest, line_size);}static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block){ ff_mmxext_idct (block); put_pixels_clamped_mmx(block, dest, line_size);}static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block){ ff_mmxext_idct (block); add_pixels_clamped_mmx(block, dest, line_size);} void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx){ mm_flags = mm_support(); if (avctx->dsp_mask) { if (avctx->dsp_mask & FF_MM_FORCE) mm_flags |= (avctx->dsp_mask & 0xffff); else mm_flags &= ~(avctx->dsp_mask & 0xffff); }#if 0 fprintf(stderr, "libavcodec: CPU flags:"); if (mm_flags & MM_MMX) fprintf(stderr, " mmx"); if (mm_flags & MM_MMXEXT) fprintf(stderr, " mmxext"); if (mm_flags & MM_3DNOW) fprintf(stderr, " 3dnow"); if (mm_flags & MM_SSE) fprintf(stderr, " sse"); if (mm_flags & MM_SSE2) fprintf(stderr, " sse2"); fprintf(stderr, "\n");#endif if (mm_flags & MM_MMX) { const int dct_algo = avctx->dct_algo; const int idct_algo= avctx->idct_algo;#ifdef CONFIG_ENCODERS if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ if(mm_flags & MM_MMXEXT){ c->fdct = ff_fdct_mmx2; }else{ c->fdct = ff_fdct_mmx; } }#endif //CONFIG_ENCODERS if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ c->idct_put= ff_simple_idct_put_mmx; c->idct_add= ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ if(mm_flags & MM_MMXEXT){ c->idct_put= ff_libmpeg2mmx2_idct_put; c->idct_add= ff_libmpeg2mmx2_idct_add; c->idct = ff_mmxext_idct; }else{ c->idct_put= ff_libmpeg2mmx_idct_put; c->idct_add= ff_libmpeg2mmx_idct_add; c->idct = ff_mmx_idct; } c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; } #ifdef CONFIG_ENCODERS c->get_pixels = get_pixels_mmx; c->diff_pixels = diff_pixels_mmx;#endif //CONFIG_ENCODERS c->put_pixels_clamped = put_pixels_clamped_mmx; c->add_pixels_clamped = add_pixels_clamped_mmx; c->clear_blocks = clear_blocks_mmx;#ifdef CONFIG_ENCODERS c->pix_sum = pix_sum16_mmx;#endif //CONFIG_ENCODERS c->put_pixels_tab[0][0] = put_pixels16_mmx; c->put_pixels_tab[0][1] = put_pixels16_x2_mmx; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx; c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx; c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx; c->avg_pixels_tab[0][0] = avg_pixels16_mmx; c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx; c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx; c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx; c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx; c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx; c->put_pixels_tab[1][0] = put_pixels8_mmx; c->put_pixels_tab[1][1] = put_pixels8_x2_mmx; c->put_pixels_tab[1][2] = put_pixels8_y2_mmx; c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx; c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx; c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx; c->avg_pixels_tab[1][0] = avg_pixels8_mmx; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx; c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx; c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; c->add_bytes= add_bytes_mmx;#ifdef CONFIG_ENCODERS c->diff_bytes= diff_bytes_mmx; c->hadamard8_diff[0]= hadamard8_diff16_mmx; c->hadamard8_diff[1]= hadamard8_diff_mmx; c->pix_norm1 = pix_norm1_mmx; c->sse[0] = sse16_mmx;#endif //CONFIG_ENCODERS if (mm_flags & MM_MMXEXT) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;#ifdef CONFIG_ENCODERS c->hadamard8_diff[0]= hadamard8_diff16_mmx2; c->hadamard8_diff[1]= hadamard8_diff_mmx2;#endif //CONFIG_ENCODERS if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; }#if 1 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 2], qpel16_mc20_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 3], qpel16_mc30_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 4], qpel16_mc01_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 6], qpel16_mc21_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 8], qpel16_mc02_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][ 9], qpel16_mc12_mmx2) SET_QPEL_FUNC(qpel_pixels_tab[0][10], qpel16_mc22_mmx2) SET_QPEL_FUNC(qpel_p
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -