📄 jcdctmgr.c
字号:
{ /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; forward_DCT_method_ptr do_dct = fdct->do_dct; DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */ JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ { register DCTELEM *workspaceptr; register JSAMPROW elemptr; register int elemr; workspaceptr = workspace; for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col;#if DCTSIZE == 8 /* unroll the inner loop */ workspaceptr[0] = GETJSAMPLE(elemptr[0]) - CENTERJSAMPLE; workspaceptr[1] = GETJSAMPLE(elemptr[1]) - CENTERJSAMPLE; workspaceptr[2] = GETJSAMPLE(elemptr[2]) - CENTERJSAMPLE; workspaceptr[3] = GETJSAMPLE(elemptr[3]) - CENTERJSAMPLE; workspaceptr[4] = GETJSAMPLE(elemptr[4]) - CENTERJSAMPLE; workspaceptr[5] = GETJSAMPLE(elemptr[5]) - CENTERJSAMPLE; workspaceptr[6] = GETJSAMPLE(elemptr[6]) - CENTERJSAMPLE; workspaceptr[7] = GETJSAMPLE(elemptr[7]) - CENTERJSAMPLE; workspaceptr += 8; elemptr += 8;#else { register int elemc; for (elemc = DCTSIZE; elemc > 0; elemc--) { *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; } }#endif } } /* Perform the DCT */ (*do_dct) (workspace); jcquant_int( workspace, coef_blocks[bi], fdct->divisors[compptr->quant_tbl_no] ); }}#if DCTSIZE == 8 && defined(HAVE_MMX_ATT_MNEMONICS) METHODDEF(void) forward_DCT_x86float32 (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)/* This version is used for integer DCT implementations. */{ /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; forward_DCT_method_ptr do_dct = fdct->do_dct; float32_quant_method_ptr do_quant = fdct->do_float32_quant; DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; DCTELEM workspace[DCTSIZE2] ATTR_ALIGN(SIMD_ALIGN); /* work area for FDCT subroutine */ JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { register DCTELEM *workspaceptr; register JSAMPROW elemptr; register int elemr; workspaceptr = workspace; elemr = 0; elemptr = sample_data[elemr] + start_col; pxor_r2r(mm7,mm7); movq_m2r( *(mmx_t*)&int16_centrejsample, mm6 ); while(elemr < DCTSIZE) { movq_m2r( *(mmx_t*)elemptr, mm0 ); elemptr = sample_data[elemr+1] + start_col; movq_m2r( *(mmx_t*)elemptr, mm1 ); movq_r2r( mm0, mm2 ); punpcklbw_r2r( mm7, mm0 ); movq_r2r( mm1, mm3 ); punpcklbw_r2r( mm7, mm1 ); psubw_r2r( mm6, mm0 ); psubw_r2r( mm6, mm1 ); elemr += 2; punpckhbw_r2r( mm7, mm2 ); punpckhbw_r2r( mm7, mm3 ); elemptr = sample_data[elemr] + start_col; psubw_r2r( mm6, mm2 ); psubw_r2r( mm6, mm3 ); movq_r2m( mm0, *(mmx_t*)(&workspaceptr[0]) ); movq_r2m( mm2, *(mmx_t*)(&workspaceptr[4]) ); movq_r2m( mm1, *(mmx_t*)(&workspaceptr[8]) ); movq_r2m( mm3, *(mmx_t*)(&workspaceptr[12]) ); workspaceptr += 16; } emms(); /* Perform the DCT */ (*do_dct)(workspace); (*do_quant)( workspace, coef_blocks[bi], fdct->float32_divisors[compptr->quant_tbl_no] ); }}METHODDEF(void) forward_DCT_mmx (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)/* This version is used for integer DCT implementations. */{ /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; forward_DCT_method_ptr do_dct = fdct->do_dct; float32_quant_method_ptr do_quant = fdct->do_float32_quant; DCTELEM workspace[DCTSIZE2] ATTR_ALIGN(SIMD_ALIGN); /* work area for FDCT subroutine */ JDIMENSION bi; int i; sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { register DCTELEM *workspaceptr; register JSAMPROW elemptr; register int elemr; workspaceptr = workspace; elemr = 0; elemptr = sample_data[elemr] + start_col; pxor_r2r(mm7,mm7); movq_m2r( *(mmx_t*)&int16_centrejsample, mm6 ); while(elemr < DCTSIZE) { movq_m2r( *(mmx_t*)elemptr, mm0 ); elemptr = sample_data[elemr+1] + start_col; movq_m2r( *(mmx_t*)elemptr, mm1 ); movq_r2r( mm0, mm2 ); punpcklbw_r2r( mm7, mm0 ); movq_r2r( mm1, mm3 ); punpcklbw_r2r( mm7, mm1 ); psubw_r2r( mm6, mm0 ); psubw_r2r( mm6, mm1 ); elemr += 2; punpckhbw_r2r( mm7, mm2 ); punpckhbw_r2r( mm7, mm3 ); elemptr = sample_data[elemr] + start_col; psubw_r2r( mm6, mm2 ); psubw_r2r( mm6, mm3 ); movq_r2m( mm0, *(mmx_t*)(&workspaceptr[0]) ); movq_r2m( mm2, *(mmx_t*)(&workspaceptr[4]) ); movq_r2m( mm1, *(mmx_t*)(&workspaceptr[8]) ); movq_r2m( mm3, *(mmx_t*)(&workspaceptr[12]) ); workspaceptr += 16; } emms(); /* Perform the DCT */ (*do_dct)(workspace); jcquant_mmx( workspace, coef_blocks[bi], fdct->divisors[compptr->quant_tbl_no], fdct->int16_divisors[compptr->quant_tbl_no], fdct->shift ); }}#endif#ifdef DCT_FLOAT_SUPPORTEDMETHODDEF(void)forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY sample_data, JBLOCKROW coef_blocks, JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)/* This version is used for floating-point DCT implementations. */{ /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; float_DCT_method_ptr do_dct = fdct->do_float_dct; FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */ JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { /* Load data into workspace, applying unsigned->signed conversion */ { register FAST_FLOAT *workspaceptr; register JSAMPROW elemptr; register int elemr; workspaceptr = workspace; for (elemr = 0; elemr < DCTSIZE; elemr++) { elemptr = sample_data[elemr] + start_col;#if DCTSIZE == 8 /* unroll the inner loop */ *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE);#else { register int elemc; for (elemc = DCTSIZE; elemc > 0; elemc--) { *workspaceptr++ = (FAST_FLOAT) (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); } }#endif } } /* Perform the DCT */ (*do_dct) (workspace); /* Quantize/descale the coefficients, and store into coef_blocks[] */ { register FAST_FLOAT temp; register int i; register JCOEFPTR output_ptr = coef_blocks[bi]; for (i = 0; i < DCTSIZE2; i++) { /* Apply the quantization and scaling factor */ temp = workspace[i] * divisors[i]; /* Round to nearest integer. * Since C does not specify the direction of rounding for negative * quotients, we have to force the dividend positive for portability. * The maximum coefficient size is +-16K (for 12-bit data), so this * code should work for either 16-bit or 32-bit ints. */ output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); } } }}#endif /* DCT_FLOAT_SUPPORTED *//* * Initialize FDCT manager. */GLOBAL(void)jinit_forward_dct (j_compress_ptr cinfo){ my_fdct_ptr fdct; int i; fdct = (my_fdct_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_fdct_controller)); cinfo->fdct = (struct jpeg_forward_dct *) fdct; fdct->pub.start_pass = start_pass_fdctmgr; switch (cinfo->dct_method) {#ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: fdct->pub.forward_DCT = forward_DCT; fdct->do_dct = jpeg_fdct_islow; break;#endif#ifdef DCT_IFAST_SUPPORTED case JDCT_IFAST: {#if defined(HAVE_MMX_ATT_MNEMONICS) int cpu_flags = cpu_accel(); if( cpu_flags & ACCEL_X86_MMX ) { fdct->do_dct = jpeg_fdct_ifast_mmx; if( cpu_flags & ACCEL_X86_SSE ) { fdct->fast_quantiser = QUANT_FLOAT32; fdct->pub.forward_DCT = forward_DCT_x86float32; fdct->do_float32_quant = jcquant_sse; } else if( cpu_flags & ACCEL_X86_3DNOW ) { fdct->fast_quantiser = QUANT_FLOAT32; fdct->pub.forward_DCT = forward_DCT_x86float32; fdct->do_float32_quant = jcquant_3dnow; } else { fdct->fast_quantiser = QUANT_INT16; fdct->pub.forward_DCT = forward_DCT_mmx; fdct->do_float32_quant = NULL; } } else#endif { fdct->fast_quantiser = QUANT_INT; fdct->pub.forward_DCT = forward_DCT; fdct->do_dct = jpeg_fdct_ifast; fdct->do_float32_quant = NULL; } break; }#endif#ifdef DCT_FLOAT_SUPPORTED case JDCT_FLOAT: fdct->pub.forward_DCT = forward_DCT_float; fdct->do_float_dct = jpeg_fdct_float; break;#endif default: ERREXIT(cinfo, JERR_NOT_COMPILED); break; } /* Mark divisor tables unallocated */ for (i = 0; i < NUM_QUANT_TBLS; i++) { fdct->divisors[i] = NULL; fdct->int16_divisors[i] = NULL; fdct->float32_divisors[i] = NULL;#ifdef DCT_FLOAT_SUPPORTED fdct->float_divisors[i] = NULL;#endif }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -