📄 jdsample.c
字号:
#if defined(HAVE_MMX_ATT_MNEMONICS) __asm__ ( /* This is what we are trying to accomplish here mm0 mm2 mm1 mm3 o1 = (9 * i0[0] + 3 * i1[0] + 3 * i0[-1] + i1[-1] + 8) >> 4 o3 = (9 * i0[1] + 3 * i1[1] + 3 * i0[0] + i1[0] + 8) >> 4 o5 = (9 * i0[2] + 3 * i1[2] + 3 * i0[1] + i1[1] + 8) >> 4 o7 = (9 * i0[3] + 3 * i1[3] + 3 * i0[2] + i1[2] + 8) >> 4 mm0 mm2 mm1 mm3 o2 = (9 * i0[0] + 3 * i1[0] + 3 * i0[1] + i1[1] + 7) >> 4 o4 = (9 * i0[1] + 3 * i1[1] + 3 * i0[2] + i1[2] + 7) >> 4 o6 = (9 * i0[2] + 3 * i1[2] + 3 * i0[3] + i1[3] + 7) >> 4 o8 = (9 * i0[3] + 3 * i1[3] + 3 * i0[4] + i1[4] + 7) >> 4 output_buf = [o1 o2 o3 o4 o5 o6 o7 o8] NOTE: for special first and last column cases o1 = (12 * i0[0] + 4 * i1[0] + 3 * 0 + 0 + 8) >> 4 */ // Part 1 of the output - process lo data for o1 o3 o5 o7 "movl %0, %%ecx \n\t"// columns to process "movl %1, %%edx \n\t"// input row1 "movl %2, %%esi \n\t"// input row2 "movl %3, %%edi \n\t"// output buffer "movl %4, %%eax \n\t" "movq (%%edx),%%mm0 \n\t"// get data from input row 0 "movq (%%esi),%%mm2 \n\t"// get data from input row 1 "movq %%mm0,%%mm4 \n\t"// save to process hi half of input0 "movq %%mm2,%%mm5 \n\t"// save to process hi half of input1 "punpcklbw noval,%%mm0 \n\t"// process inptr0 "movq %%mm0,%%mm1 \n\t"// copy inptr0 "psllq $16,%%mm1 \n\t"// shift for first column special case i0[-1] "pmullw mul9ws,%%mm0 \n\t"// multiply by special case constant "pmullw mul3w,%%mm1 \n\t"// multiply input1 by 3 "punpcklbw noval,%%mm2 \n\t"// process inptr1 "movq %%mm2,%%mm3 \n\t"// copy inptr0 "psllq $16,%%mm3 \n\t"// shift for first column special case i1[-1] "pmullw mul3ws,%%mm2 \n\t"// multiply by special case constant "paddw %%mm0,%%mm1 \n\t"// Add up results for "movq %%mm1,(%%eax) \n\t" "movq %%mm1,%%mm6 \n\t"// with the next results "paddw %%mm2,%%mm3 \n\t"// final o1 o3 o5 o7 "paddw %%mm3,%%mm6 \n\t"// output to be interleaved "paddw bias8w,%%mm6 \n\t"// Add even bias "psrlw $4,%%mm6 \n\t"// convert from word to byte (truncate) // Part 2 of the output - process lo data for o2 o4 o6 o8 "movq %%mm4,%%mm0 \n\t"// get data from input row 0 "movq %%mm5,%%mm2 \n\t"// get data from input row 1 "movq %%mm0,%%mm1 \n\t"// copy inptr0 for unpack "movq %%mm2,%%mm3 \n\t"// copy inptr1 for unpack "punpcklbw noval,%%mm0 \n\t"// process inptr1 "psrlq $8,%%mm1 \n\t"// shift right for i0[1][2][3][4] "punpcklbw noval,%%mm1 \n\t"// process inptr1 "pmullw mul9w,%%mm0 \n\t"// multiply by nearest point constant "pmullw mul3w,%%mm1 \n\t"// multiply by next nearest constant "punpcklbw noval,%%mm2 \n\t"// process inptr1 "psrlq $8,%%mm3 \n\t"// shift right for i1[1][2][3][4] "punpcklbw noval,%%mm3 \n\t"// process inptr1 "pmullw mul3w,%%mm2 \n\t"// multiply by next nearest constant "paddw %%mm1,%%mm0 \n\t"// Add up results for final o2 o4 o6 o8 "movq %%mm0,8(%%eax) \n\t" "paddw %%mm3,%%mm0 \n\t"// previous results for o1 o3 o5 o7 "paddw bias7w,%%mm0 \n\t"// Add odd bias "paddw %%mm2,%%mm0 \n\t"// output to be interleaved with the "psrlw $4,%%mm0 \n\t"// convert back to byte (with truncate) "psllq $8,%%mm0 \n\t"// prepare to interleave output results "paddw %%mm0,%%mm6 \n\t"// interleave results "movq %%mm6,(%%edi) \n\t"// write to output buffer "addl $8,%%edi \n\t"// increment output pointer "addl $16,%%eax \n\t" "subl $8,%%ecx \n\t" "cmpl $0,%%ecx \n\t" "jle last_column \n\t" // End of special case. Now for generic loop "col_loop: \n\t" // Part 2 of the output "movq %%mm4,%%mm0 \n\t"// get data from input row 0 "movq %%mm5,%%mm2 \n\t"// get data from input row 1 "movq %%mm0,%%mm1 \n\t"// copy inptr0 for unpack "movq %%mm2,%%mm3 \n\t"// copy inptr1 for unpack "movq %%mm0, input0 \n\t" "movq %%mm2, input1 \n\t" "punpckhbw noval,%%mm0 \n\t"// process inptr1[0] "psllq $8,%%mm1 \n\t"// shift for inptr0[-1] "punpckhbw noval,%%mm1 \n\t"// process inptr1[1] "pmullw mul9w,%%mm0 \n\t"// multiply by special case constant "pmullw mul3w,%%mm1 \n\t"// multiply inptr1 by 3 "punpckhbw noval,%%mm2 \n\t"// process inptr1[0] "psllq $8,%%mm3 \n\t"// shift for inptr1[-1] "punpckhbw noval,%%mm3 \n\t"// process inptr1 "pmullw mul3w,%%mm2 \n\t"// multiply by special case constant "paddw %%mm0,%%mm1 \n\t"// Add up results for "movq %%mm1,(%%eax) \n\t" "movq %%mm1,%%mm6 \n\t"// with the next results "paddw bias8w,%%mm6 \n\t"// Add even bias "paddw %%mm2,%%mm3 \n\t"// final o1 o3 o5 o7 "paddw %%mm3,%%mm6 \n\t"// output to be interleaved "psrlw $4,%%mm6 \n\t"// convert from word to byte (truncate) // process hi data for o2 o4 o6 o8 "movq %%mm4,%%mm1 \n\t"// get data from input row 0 "movq %%mm5,%%mm3 \n\t"// copy inptr1 for unpack "psrlq $8,%%mm1 \n\t"// shift right for i0[1][2][3][4] "movq 8(%%edx),%%mm4 \n\t"// need to add in a byte from the next column // load next inptr0 to mm4 for future use "movq %%mm4,%%mm7 \n\t" "psllq $56,%%mm7 \n\t"// shift for MSB "paddb %%mm7,%%mm1 \n\t"// add in MSB from next input0 column "punpckhbw noval,%%mm1 \n\t"// process inptr0 "pmullw mul3w,%%mm1 \n\t"// multiply by next nearest constant "psrlq $8,%%mm3 \n\t"// shift right for i1[1][2][3][4] "movq 8(%%esi),%%mm5 \n\t"// need to add in a byte from the next column // load next inptr1 to mm5 for future use "movq %%mm5,%%mm7 \n\t" "psllq $56,%%mm7 \n\t"// shift for MSB "paddb %%mm7,%%mm3 \n\t"// add in MSB from next input1 column "punpckhbw noval,%%mm3 \n\t"// process inptr1 "paddw %%mm1,%%mm0 \n\t"// Add odd bias "movq %%mm0,8(%%eax) \n\t" "paddw bias7w,%%mm3 \n\t"// Add up results for final o2 o4 o6 o8 "paddw %%mm3,%%mm0 \n\t"// output to be interleaved with the "paddw %%mm2,%%mm0 \n\t"// previous results for o1 o3 o5 o7 "psrlw $4,%%mm0 \n\t"// convert back to byte (with truncate) "psllq $8,%%mm0 \n\t"// prepare to interleave output results "paddw %%mm0,%%mm6 \n\t"// interleave results "movq %%mm6,(%%edi) \n\t"// write to output buffer "addl $8,%%edi \n\t" // Part 1 of the output - process lo data for o1 o3 o5 o7 "movq %%mm4,%%mm0 \n\t"// get data from input row 0 "movq %%mm5,%%mm2 \n\t"// get data from input row 1 "punpcklbw noval,%%mm0 \n\t"// process inptr0 "movq %%mm0,%%mm1 \n\t"// copy inptr0 "psllq $16,%%mm1 \n\t"// shift for first column special case i0[-1] "movq input0,%%mm7 \n\t" "psrlq $56,%%mm7 \n\t" "paddw %%mm7,%%mm1 \n\t" "pmullw mul9w,%%mm0 \n\t"// multiply by special case constant "pmullw mul3w,%%mm1 \n\t"// multiply input1 by 3 "punpcklbw noval,%%mm2 \n\t"// process intr1 "movq %%mm2,%%mm3 \n\t"// copy inptr0 "psllq $16,%%mm3 \n\t"// shift for first column special case i1[-1] "movq input1,%%mm7 \n\t" "psrlq $56,%%mm7 \n\t" "paddw %%mm7,%%mm3 \n\t" "pmullw mul3w,%%mm2 \n\t"// multiply by special case constant "paddw %%mm0,%%mm1 \n\t"// Add up results for "movq %%mm1,16(%%eax) \n\t" "movq %%mm1,%%mm6 \n\t"// with the next results "paddw bias8w,%%mm6 \n\t"// Add even bias "paddw %%mm2,%%mm3 \n\t"// final o1 o3 o5 o7 "paddw %%mm3,%%mm6 \n\t"// output to be interleaved "psrlw $4,%%mm6 \n\t"// convert from word to byte (truncate) // Process lo data for o2 o4 o6 o8 "movq %%mm4,%%mm1 \n\t"// copy inptr0 for unpack "movq %%mm5,%%mm3 \n\t"// copy inptr1 for unpack "psrlq $8,%%mm1 \n\t"// shift right for i0[1][2][3][4] "punpcklbw noval,%%mm1 \n\t"// process inptr1 "pmullw mul3w,%%mm1 \n\t"// multiply by next nearest constant "psrlq $8,%%mm3 \n\t"// shift right for i1[1][2][3][4] "punpcklbw noval,%%mm3 \n\t"// process inptr1 "paddw %%mm1,%%mm0 \n\t"// Add up results for final o2 o4 o6 o8 "movq %%mm0,24(%%eax) \n\t" "paddw %%mm3,%%mm0 \n\t"// previous results for o1 o3 o5 o7 "paddw bias7w,%%mm0 \n\t"// Add odd bias "paddw %%mm2,%%mm0 \n\t"// output to be interleaved with the "psrlw $4,%%mm0 \n\t"// convert back to byte (with truncate) "psllq $8,%%mm0 \n\t"// prepare to interleave output results "paddw %%mm0,%%mm6 \n\t"// interleave results "movq %%mm6,(%%edi) \n\t"// write to output buffer "addl $8,%%edi \n\t"// increment output pointer "addl $8,%%edx \n\t"// increment input0 pointer "addl $8,%%esi \n\t"// increment input1 pointer "addl $32,%%eax \n\t" "subl $8,%%ecx \n\t" "cmpl $0,%%ecx \n\t" "jg col_loop \n\t" "last_column: \n\t" // Special for last column - process hi data for o1 o3 o5 o7 "movq %%mm4,%%mm0 \n\t"// get data from input row 0 "movq %%mm0,%%mm1 \n\t"// copy inptr0 for unpack "movq %%mm5,%%mm3 \n\t"// copy inptr1 for unpack "punpckhbw noval,%%mm0 \n\t"// process inptr1[0] "psllq $8,%%mm1 \n\t"// shift for inptr0[-1] "punpckhbw noval,%%mm1 \n\t"// process inptr1[1] "pmullw mul9w,%%mm0 \n\t"// multiply by special case constant "pmullw mul3w,%%mm1 \n\t"// multiply inptr1 by 3 // punpckhbw mm2, noval ;// process inptr1[0] "psllq $8,%%mm3 \n\t"// shift for inptr1[-1] "punpckhbw noval,%%mm3 \n\t"// process inptr1 // pmullw mm2, mul3w ;// multiply by special case constant "paddw %%mm0,%%mm1 \n\t"// Add up results for "movq %%mm1,(%%eax) \n\t" "movq %%mm1,%%mm6 \n\t"// with the next results "paddw bias8w,%%mm6 \n\t"// Add even bias "paddw %%mm2,%%mm3 \n\t"// final o1 o3 o5 o7 "paddw %%mm3,%%mm6 \n\t"// output to be interleaved "psrlw $4,%%mm6 \n\t"// convert from word to byte (truncate) // Part 4 of the output - process hi data for o2 o4 o6 o8 // movq mm2, mm5 ;// get data from input row 1 "movq %%mm4,%%mm1 \n\t"// copy inptr0 for unpack "movq %%mm5,%%mm3 \n\t"// copy inptr1 for unpack "psrlq $8,%%mm1 \n\t"// shift right for i0[1][2][3][4] // load next inptr0 to mm4 for future use "pand mask1,%%mm4 \n\t" "paddb %%mm4,%%mm1 \n\t"// add in MSB from next input0 column "punpckhbw noval,%%mm1 \n\t"// process inptr0 "pmullw mul3w,%%mm1 \n\t"// multiply by next nearest constan "psrlq $8,%%mm3 \n\t"// shift right for i1[1][2][3][4] // load next inptr1 to mm5 for future use "pand mask1,%%mm5 \n\t" "paddb %%mm5,%%mm3 \n\t"// add in MSB from next input1 column "punpckhbw noval,%%mm3 \n\t"// process inptr1 "paddw %%mm1,%%mm0 \n\t"// Add odd bias "movq %%mm0,8(%%eax) \n\t" "paddw bias7w,%%mm3 \n\t"// Add up results for final o2 o4 o6 o8 "paddw %%mm3,%%mm0 \n\t"// output to be interleaved with the "paddw %%mm2,%%mm0 \n\t"// previous results for o1 o3 o5 o7 "psrlw $4,%%mm0 \n\t"// convert back to byte (with truncate) "psllq $8,%%mm0 \n\t"// prepare to interleave output results "paddw %%mm0,%%mm6 \n\t"// interleave results "movq %%mm6,(%%edi) \n\t"// write to output buffer // add edi, 8 ;// increment output pointer "addl $8,%%edx \n\t"// increment input0 pointer // add esi, 8 ;// increment input1 pointer /************* For v = 1 *****************/ "movl %0, %%ecx \n\t"// columns to process "movl %5, %%esi \n\t"// input row2 "movl %6, %%edi \n\t"// output buffer "movl %1, %%edx \n\t" "movl %4, %%eax \n\t" "movq (%%esi),%%mm2 \n\t"// get data from input row 1 "movq %%mm2,%%mm5 \n\t"// save to process hi half of input1 "punpcklbw noval,%%mm2 \n\t"// process inptr1 "movq %%mm2,%%mm3 \n\t"// copy inptr0 "psllq $16,%%mm3 \n\t"// shift for first column special case i1[-1] "pmullw mul3ws,%%mm2 \n\t"// multiply by special case constant "movq (%%eax),%%mm6 \n\t"// Add up results for "paddw %%mm2,%%mm3 \n\t"// final o1 o3 o5 o7 "paddw %%mm3,%%mm6 \n\t"// output to be interleaved "paddw bias8w,%%mm6 \n\t"// Add even bias "psrlw $4,%%mm6 \n\t"// convert from word to byte (truncate) // Part 2 of the output - process lo data for o2 o4 o6 o8 "movq %%mm5,%%mm2 \n\t"// get data from input row 1 "movq %%mm2,%%mm3 \n\t"// copy inptr1 for unpack "punpcklbw noval,%%mm2 \n\t"// process inptr1 "psrlq $8,%%mm3 \n\t"// shift right for i1[1][2][3][4] "punpcklbw noval,%%mm3 \n\t"// process inptr1 "pmullw mul3w,%%mm2 \n\t"// multiply by next nearest constant "movq 8(%%eax),%%mm0 \n\t"// Add up results for final o2 o4 o6 o8 "paddw %%mm3,%%mm0 \n\t"// previous results for o1 o3 o5 o7 "paddw bias7w,%%mm0 \n\t"// Add odd bias "paddw %%mm2,%%mm0 \n\t"// output to be interleaved with the "psrlw $4,%%mm0 \n\t"// convert back to byte (with truncate) "psllq $8,%%mm0 \n\t"// prepare to interleave output results "paddw %%mm0,%%mm6 \n\t"// interleave results "movq %%mm6,(%%edi)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -