📄 jdsample.c
字号:
METHODDEF(void)h2v1_fancy_upsample_orig (j_decompress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr);METHODDEF(void)h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr){ if (MMXAvailable) h2v1_fancy_upsample_mmx(cinfo, compptr, input_data, output_data_ptr); else h2v1_fancy_upsample_orig(cinfo, compptr, input_data, output_data_ptr);}METHODDEF(void)h2v1_fancy_upsample_mmx (j_decompress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr){ JSAMPARRAY output_data = *output_data_ptr;#if defined(HAVE_MMX_INTEL_MNEMONICS) /* see in h2v2 for comments */ register JSAMPROW inptr, outptr;#else JSAMPROW inptr, outptr;#endif int inrow, hsize = compptr->downsampled_width; for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) { inptr = input_data[inrow]; outptr = output_data[inrow];#if defined(HAVE_MMX_INTEL_MNEMONICS) _asm { mov ecx, hsize ;// horizontal line size mov esi, inptr ;// input buffer pointer mov edi, outptr ;// output buffer pointer pxor mm6, mm6 ;// zero register movq mm7, [esi] ;// input register ;// Special 1st column case - process low 8 bytes of mm7 movq mm0, mm7 ;// move 1st quadword into mm7 movq mm1, mm7 ;// make a copy movq mm2, mm7 ;// make a copy punpcklbw mm0, mm6 ;// unpack lower values; inptr[0][1][2][3] movq mm3, mm0 ;// make a copy pmullw mm0, mul3w ;// multiply by 3 psllq mm1, 8 ;// shift 1 byte for previous values; inptr[-1][0][1][2] movq mm5, mm7 ;// copy original data pand mm5, mask2 ;// mask out all but lower byte for "previous" state paddb mm1, mm5 ;// add in byte to quadword psrlq mm2, 8 ;// shift right for "next" state; inptr[1][2][3][4] punpcklbw mm1, mm6 ;// unpack punpcklbw mm2, mm6 ;// unpack paddw mm1, mm0 ;// add in result from multiply to "previous" data paddw mm1, bias1w ;// add in bias paddw mm2, mm0 ;// add in result from multiply to "next" data paddw mm2, bias2w ;// add in bias psrlw mm1, 2 ;// convert from word to byte psrlw mm2, 2 ;// convert from word to byte psllq mm2, 8 ;// prepare for interleave paddb mm2, mm1 ;// do interleave movq [edi], mm2 ;// write out results ;// process high 8 bytes of mm7 movq mm0, mm7 ;// copy input data movq mm1, mm7 ;// copy input data movq mm2, mm7 ;// copy input data movq mm3, mm7 ;// copy input data punpckhbw mm0, mm6 ;// unpack hi data pmullw mm0, mul3w ;// multiply by 3 psllq mm1, 8 ;// shift 1 byte for previous values; inptr[-1][0][1][2] psrlq mm2, 8 ;// shift right for "next" state; inptr[1][2][3][4] movq mm7, [esi+8] ;// get next quadword from input buffer movq mm5, mm7 ;// make copy psllq mm5, 56 ;// shift left to isolate LSB paddb mm2, mm5 ;// add in byte for "next" state punpckhbw mm1, mm6 ;// unpack punpckhbw mm2, mm6 ;// unpack paddw mm1, mm0 ;// add in result from multiply to "previous" data paddw mm1, bias1w ;// add in bias paddw mm2, mm0 ;// add in result from multiply to "next" data paddw mm2, bias2w ;// add in bias psrlw mm1, 2 ;// convert from word to byte psrlw mm2, 2 ;// convert from word to byte psllq mm2, 8 ;// prepare for interleave paddb mm2, mm1 ;// do interleave movq [edi+8], mm2 ;// write out results add edi, 16 ;// increment output buffer pointer add esi, 8 ;// increment input buffer pointer sub ecx, 8 ;// increment column counter cmp ecx, 8 ;// cmp with 8 jle last_col ;// if less that goto last column ;// Main Loop - process low 8 bytes of mm7 col_loop: movq mm0, mm7 ;// copy input data movq mm1, mm7 ;// copy input data movq mm2, mm7 ;// copy input data punpcklbw mm0, mm6 ;// unpack lo data pmullw mm0, mul3w ;// multiply by 3; i[0][1][2][3] psllq mm1, 8 ;// shift left to get previous byte movq mm5, mm3 ;// retrieve copy of "previous" state psrlq mm5, 56 ;// shift to get LSB paddb mm1, mm5 ;// add in byte psrlq mm2, 8 ;// shift rt for "next" state punpcklbw mm1, mm6 ;// unpack punpcklbw mm2, mm6 ;// unpack paddw mm1, mm0 ;// add in result from multiply to "previous" data paddw mm1, bias1w ;// add in bias paddw mm2, mm0 ;// add in result from multiply to "next" data paddw mm2, bias2w ;// add in bias psrlw mm1, 2 ;// convert from word to byte psrlw mm2, 2 ;// convert from word to byte psllq mm2, 8 ;// prepare for interleave paddb mm2, mm1 ;// do interleave movq [edi], mm2 ;// write out results ;// process high 8 bytes of mm7 movq mm0, mm7 ;// copy input data movq mm1, mm7 ;// copy input data movq mm2, mm7 ;// copy input data movq mm3, mm7 ;// copy input data punpckhbw mm0, mm6 ;// unpack hi data pmullw mm0, mul3w ;// multiply by 3; i[0][1][2][3] psllq mm1, 8 ;// shift left to get previous byte psrlq mm2, 8 ;// shift rt for "next" state movq mm7, [esi+8] ;// get next quadword from input buffer movq mm5, mm7 ;// make copy psllq mm5, 56 ;// shift left for LSB paddb mm2, mm5 ;// add in byte punpckhbw mm1, mm6 ;// unpack punpckhbw mm2, mm6 ;// unpack paddw mm1, mm0 ;// add in result from multiply to "previous" data paddw mm1, bias1w ;// add in bias paddw mm2, mm0 ;// add in result from multiply to "next" data paddw mm2, bias2w ;// add in bias psrlw mm1, 2 ;// convert from word to byte psrlw mm2, 2 ;// convert from word to byte psllq mm2, 8 ;// prepare for interleave paddb mm2, mm1 ;// do interleave movq [edi+8], mm2 ;// write out results add edi, 16 ;// increment output buffer pointer add esi, 8 ;// increment input buffer pointer sub ecx, 8 ;// increment column counter cmp ecx, 8 ;// cmp with 8 jg col_loop ;// if > 8 goto main loop last_col: ;// Special last column case - process low 8 bytes of mm7 movq mm0, mm7 ;// copy input data movq mm1, mm7 ;// copy input data movq mm2, mm7 ;// copy input data punpcklbw mm0, mm6 ;// unpack lo data pmullw mm0, mul3w ;// multiply by 3; i[0][1][2][3] psllq mm1, 8 ;// shift left to get previous byte movq mm5, mm3 ;// retrieve copy of "previous" state psrlq mm5, 56 ;// shift left for MSB paddb mm1, mm5 ;// add in byte psrlq mm2, 8 ;// shift rt for "next" state punpcklbw mm1, mm6 ;// unpack punpcklbw mm2, mm6 ;// unpack paddw mm1, mm0 ;// add in result from multiply to "previous" data paddw mm1, bias1w ;// add in bias paddw mm2, mm0 ;// add in result from multiply to "next" data paddw mm2, bias2w ;// add in bias psrlw mm1, 2 ;// convert from word to byte psrlw mm2, 2 ;// convert from word to byte psllq mm2, 8 ;// prepare for interleave paddb mm2, mm1 ;// do interleave movq [edi], mm2 ;// write out results ;// Special last column case - process hi 8 bytes of mm7 movq mm0, mm7 ;// copy input data movq mm1, mm7 ;// copy input data movq mm2, mm7 ;// copy input data punpckhbw mm0, mm6 ;// unpack hi data pmullw mm0, mul3w ;// multiply by 3; i[0][1][2][3] psllq mm1, 8 ;// shift left to get previous byte psrlq mm2, 8 ;// shift rt for "next" state pand mm7, mask1 ;// mask out all but MSB paddb mm2, mm7 ;// add in byte punpckhbw mm1, mm6 ;// unpack punpckhbw mm2, mm6 ;// unpack paddw mm1, mm0 ;// add in result from multiply to "previous" data paddw mm1, bias1w ;// add in bias paddw mm2, mm0 ;// add in result from multiply to "next" data paddw mm2, bias2w ;// add in bias psrlw mm1, 2 ;// convert from word to byte psrlw mm2, 2 ;// convert from word to byte psllq mm2, 8 ;// prepare for interleave paddb mm2, mm1 ;// do interleave movq [edi+8], mm2 ;// write out results emms }#endif#if defined(HAVE_MMX_ATT_MNEMONICS) __asm__ ( "movl %0, %%ecx \n\t" // horizontal line size "movl %1, %%esi \n\t" // input buffer pointer "movl %2, %%edi \n\t" // output buffer pointer "pxor %%mm6,%%mm6 \n\t" // zero register "movq (%%esi),%%mm7 \n\t" // input register // Special 1st column case - process low 8 bytes of mm7 "movq %%mm7,%%mm0 \n\t" // move 1st quadword into mm7 "movq %%mm7,%%mm1 \n\t" // make a copy "movq %%mm7,%%mm2 \n\t" // make a copy "punpcklbw %%mm6,%%mm0 \n\t" // unpack lower values; inptr[0][1][2][3] "movq %%mm0,%%mm3 \n\t" // make a copy "pmullw mul3w,%%mm0 \n\t" // multiply by 3 "psllq $8,%%mm1 \n\t" // shift 1 byte for previous values; inptr[-1][0][1][2] "movq %%mm7,%%mm5 \n\t" // copy original data "pand mask2,%%mm5 \n\t" // mask out all but lower byte for "previous" state "paddb %%mm5,%%mm1 \n\t" // add in byte to quadword "psrlq $8,%%mm2 \n\t" // shift right for "next" state; inptr[1][2][3][4] "punpcklbw %%mm6,%%mm1 \n\t" // unpack "punpcklbw %%mm6,%%mm2 \n\t" // unpack "paddw %%mm0,%%mm1 \n\t" // add in result from multiply to "previous" data "paddw bias1w,%%mm1 \n\t" // add in bias "paddw %%mm0,%%mm2 \n\t" // add in result from multiply to "next" data "paddw bias2w,%%mm2 \n\t" // add in bias "psrlw $2,%%mm1 \n\t" // convert from word to byte "psrlw $2,%%mm2 \n\t" // convert from word to byte "psllq $8,%%mm2 \n\t" // prepare for interleave "paddb %%mm1,%%mm2 \n\t" // do interleave "movq %%mm2,(%%edi) \n\t" // write out results // process high 8 bytes of mm7 "movq %%mm7,%%mm0 \n\t" // copy input data "movq %%mm7,%%mm1 \n\t" // copy input data "movq %%mm7,%%mm2 \n\t" // copy input data "movq %%mm7,%%mm3 \n\t" // copy input data "punpckhbw %%mm6,%%mm0 \n\t" // unpack hi data "pmullw mul3w,%%mm0 \n\t" // multiply by 3 "psllq $8,%%mm1 \n\t" // shift 1 byte for previous values; inptr[-1][0][1][2] "psrlq $8,%%mm2 \n\t" // shift right for "next" state; inptr[1][2][3][4] "movq 8(%%esi),%%mm7 \n\t" // get next quadword from input buffer "movq %%mm7,%%mm5 \n\t" // make copy "psllq $56,%%mm5 \n\t" // shift left to isolate LSB "paddb %%mm5,%%mm2 \n\t" // add in byte for "next" state "punpckhbw %%mm6,%%mm1 \n\t" // unpack "punpckhbw %%mm6,%%mm2 \n\t" // unpack "paddw %%mm0,%%mm1 \n\t" // add in result from multiply to "previous" data "paddw bias1w,%%mm1 \n\t" // add in bias "paddw %%mm0,%%mm2 \n\t" // add in result from multiply to "next" data "paddw bias2w,%%mm2 \n\t" // add in bias "psrlw $2,%%mm1 \n\t" // convert from word to byte "psrlw $2,%%mm2 \n\t" // convert from word to byte "psllq $8,%%mm2 \n\t" // prepare for interleave "paddb %%mm1,%%mm2 \n\t" // do interleave "movq %%mm2,8(%%edi) \n\t" // write out results "addl $16,%%edi \n\t" // increment output buffer pointer "addl $8,%%esi \n\t" // increment input buffer pointer "subl $8,%%ecx \n\t" // increment column counter "cmpl $8,%%ecx \n\t" // cmp with 8 "jle last_col \n\t" // if less that goto last column // Main Loop - process low 8 bytes of mm7 "col_loop_a: \n\t" "movq %%mm7,%%mm0 \n\t" // copy input data "movq %%mm7,%%mm1 \n\t" // copy input data "movq %%mm7,%%mm2 \n\t" // copy input data "punpcklbw %%mm6,%%mm0 \n\t" // unpack lo data "pmullw mul3w,%%mm0 \n\t" // multiply by 3; i[0][1][2][3] "psllq $8,%%mm1 \n\t" // shift left to get previous byte "movq %%mm3,%%mm5 \n\t" // retrieve copy of "previous" state "psrlq $56,%%mm5 \n\t" // shift to get LSB "paddb %%mm5,%%mm1 \n\t" // add in byte "psrlq $8,%%mm2 \n\t" // shift rt for "next" state "punpcklbw %%mm6,%%mm1 \n\t" // unpack "punpcklbw %%mm6,%%mm2 \n\t" // unpack "paddw %%mm0,%%mm1 \n\t" // add in result from multiply to "previous" data "paddw bias1w,%%mm1 \n\t" // add in bias "paddw %%mm0,%%mm2 \n\t" // add in result from multiply to "next" data "paddw bias2w,%%mm2 \n\t" // add in bias "psrlw $2,%%mm1 \n\t" // convert from word to byte "psrlw $2,%%mm2 \n\t" // convert from word to byte "psllq $8,%%mm2 \n\t" // prepare for interleave "paddb %%mm1,%%mm2 \n\t" // do interleave "movq %%mm2,(%%edi) \n\t" // write out results // process high 8 bytes of mm7 "movq %%mm7,%%mm0 \n\t" // copy input data "movq %%mm7,%%mm1 \n\t" // copy input data "movq %%mm7,%%mm2 \n\t" // copy input data "movq %%mm7,%%mm3 \n\t" // copy input data "punpckhbw %%mm6,%%mm0 \n\t" // unpack hi data "pmullw mul3w,%%mm0 \n\t" // multiply by 3; i[0][1][2][3] "psllq $8,%%mm1 \n\t" // shift left to get previous byte "psrlq $8,%%mm2 \n\t" // shift rt for "next" state "movq 8(%%esi),%%mm7 \n\t" // get next quadword from input buffer "movq %%mm7,%%mm5 \n\t" // make copy "psllq $56,%%mm5 \n\t" // shift left for LSB
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -