📄 jdsample.c
字号:
paddw mm3, bias7w ;// Add up results for final o2 o4 o6 o8 paddw mm0, mm3 ;// output to be interleaved with the paddw mm0, mm2 ;// previous results for o1 o3 o5 o7 psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer add edi, 8 ;// Part 1 of the output - process lo data for o1 o3 o5 o7 movq mm0, mm4 ;// get data from input row 0 movq mm2, mm5 ;// get data from input row 1 punpcklbw mm0, noval ;// process inptr0 movq mm1, mm0 ;// copy inptr0 psllq mm1, 16 ;// shift for first column special case i0[-1] movq mm7, input0 psrlq mm7, 56 paddw mm1, mm7 pmullw mm0, mul9w ;// multiply by special case constant pmullw mm1, mul3w ;// multiply input1 by 3 punpcklbw mm2, noval ;// process intr1 movq mm3, mm2 ;// copy inptr0 psllq mm3, 16 ;// shift for first column special case i1[-1] movq mm7, input1 psrlq mm7, 56 paddw mm3, mm7 pmullw mm2, mul3w ;// multiply by special case constant paddw mm1, mm0 ;// Add up results for movq [eax+16], mm1 movq mm6, mm1 ;// with the next results paddw mm6, bias8w ;// Add even bias paddw mm3, mm2 ;// final o1 o3 o5 o7 paddw mm6, mm3 ;// output to be interleaved psrlw mm6, 4 ;// convert from word to byte (truncate) ;// Process lo data for o2 o4 o6 o8 movq mm1, mm4 ;// copy inptr0 for unpack movq mm3, mm5 ;// copy inptr1 for unpack psrlq mm1, 8 ;// shift right for i0[1][2][3][4] punpcklbw mm1, noval ;// process inptr1 pmullw mm1, mul3w ;// multiply by next nearest constant psrlq mm3, 8 ;// shift right for i1[1][2][3][4] punpcklbw mm3, noval ;// process inptr1 paddw mm0, mm1 ;// Add up results for final o2 o4 o6 o8 movq [eax+24], mm0 paddw mm0, mm3 ;// previous results for o1 o3 o5 o7 paddw mm0, bias7w ;// Add odd bias paddw mm0, mm2 ;// output to be interleaved with the psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer add edi, 8 ;// increment output pointer add edx, 8 ;// increment input0 pointer add esi, 8 ;// increment input1 pointer add eax, 32 sub ecx, 8 cmp ecx, 0 jg col_loop last_column: ;// Special for last column - process hi data for o1 o3 o5 o7 movq mm0, mm4 ;// get data from input row 0 movq mm1, mm0 ;// copy inptr0 for unpack movq mm3, mm5 ;// copy inptr1 for unpack punpckhbw mm0, noval ;// process inptr1[0] psllq mm1, 8 ;// shift for inptr0[-1] punpckhbw mm1, noval ;// process inptr1[1] pmullw mm0, mul9w ;// multiply by special case constant pmullw mm1, mul3w ;// multiply inptr1 by 3;// punpckhbw mm2, noval ;// process inptr1[0] psllq mm3, 8 ;// shift for inptr1[-1] punpckhbw mm3, noval ;// process inptr1;// pmullw mm2, mul3w ;// multiply by special case constant paddw mm1, mm0 ;// Add up results for movq [eax], mm1 movq mm6, mm1 ;// with the next results paddw mm6, bias8w ;// Add even bias paddw mm3, mm2 ;// final o1 o3 o5 o7 paddw mm6, mm3 ;// output to be interleaved psrlw mm6, 4 ;// convert from word to byte (truncate) ;// Part 4 of the output - process hi data for o2 o4 o6 o8;// movq mm2, mm5 ;// get data from input row 1 movq mm1, mm4 ;// copy inptr0 for unpack movq mm3, mm5 ;// copy inptr1 for unpack psrlq mm1, 8 ;// shift right for i0[1][2][3][4] ;// load next inptr0 to mm4 for future use pand mm4, mask1 paddb mm1, mm4 ;// add in MSB from next input0 column punpckhbw mm1, noval ;// process inptr0 pmullw mm1, mul3w ;// multiply by next nearest constan psrlq mm3, 8 ;// shift right for i1[1][2][3][4] ;// load next inptr1 to mm5 for future use pand mm5, mask1 paddb mm3, mm5 ;// add in MSB from next input1 column punpckhbw mm3, noval ;// process inptr1 paddw mm0, mm1 ;// Add odd bias movq [eax+8], mm0 paddw mm3, bias7w ;// Add up results for final o2 o4 o6 o8 paddw mm0, mm3 ;// output to be interleaved with the paddw mm0, mm2 ;// previous results for o1 o3 o5 o7 psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer;// add edi, 8 ;// increment output pointer add edx, 8 ;// increment input0 pointer;// add esi, 8 ;// increment input1 pointer/************* For v = 1 *****************/ mov ecx, dsamp ;// columns to process mov esi, inptr2 ;// input row2 mov edi, outptr2 ;// output buffer mov edx, inptr0 mov eax, save_val movq mm2, [esi] ;// get data from input row 1 movq mm5, mm2 ;// save to process hi half of input1 punpcklbw mm2, noval ;// process inptr1 movq mm3, mm2 ;// copy inptr0 psllq mm3, 16 ;// shift for first column special case i1[-1] pmullw mm2, mul3ws ;// multiply by special case constant movq mm6, [eax] ;// Add up results for paddw mm3, mm2 ;// final o1 o3 o5 o7 paddw mm6, mm3 ;// output to be interleaved paddw mm6, bias8w ;// Add even bias psrlw mm6, 4 ;// convert from word to byte (truncate) ;// Part 2 of the output - process lo data for o2 o4 o6 o8 movq mm2, mm5 ;// get data from input row 1 movq mm3, mm2 ;// copy inptr1 for unpack punpcklbw mm2, noval ;// process inptr1 psrlq mm3, 8 ;// shift right for i1[1][2][3][4] punpcklbw mm3, noval ;// process inptr1 pmullw mm2, mul3w ;// multiply by next nearest constant movq mm0, [eax+8] ;// Add up results for final o2 o4 o6 o8 paddw mm0, mm3 ;// previous results for o1 o3 o5 o7 paddw mm0, bias7w ;// Add odd bias paddw mm0, mm2 ;// output to be interleaved with the psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer add edi, 8 ;// increment output pointer add eax, 16 sub ecx, 8 cmp ecx, 0 jle last_column2 ;// End of special case. Now for generic loop col_loop2: ;// Part 2 of the output movq mm2, mm5 ;// get data from input row 1 movq mm3, mm2 ;// copy inptr1 for unpack movq mm1, mm2 punpckhbw mm2, noval ;// process inptr1[0] psllq mm3, 8 ;// shift for inptr1[-1] punpckhbw mm3, noval ;// process inptr1 pmullw mm2, mul3w ;// multiply by special case constant movq mm6, [eax] ;// with the next results paddw mm6, bias8w ;// Add even bias paddw mm3, mm2 ;// final o1 o3 o5 o7 paddw mm6, mm3 ;// output to be interleaved psrlw mm6, 4 ;// convert from word to byte (truncate) ;// process hi data for o2 o4 o6 o8 movq mm2, mm5 ;// get data from input row 1 movq mm3, mm2 ;// copy inptr1 for unpack punpckhbw mm2, noval ;// process inptr1 psrlq mm3, 8 ;// shift right for i1[1][2][3][4] movq mm5, [esi + 8] ;// need to add in a byte from the next column ;// load next inptr1 to mm5 for future use movq mm7, mm5 psllq mm7, 56 ;// shift for MSB paddb mm3, mm7 ;// add in MSB from next input1 column punpckhbw mm3, noval ;// process inptr1 pmullw mm2, mul3w ;// multiply by next nearest constant movq mm0, [eax+8] ;// Add odd bias paddw mm3, bias7w ;// Add up results for final o2 o4 o6 o8 paddw mm0, mm3 ;// output to be interleaved with the paddw mm0, mm2 ;// previous results for o1 o3 o5 o7 psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer add edi, 8 ;// Part 1 of the output - process lo data for o1 o3 o5 o7 movq mm2, mm5 ;// get data from input row 1 punpcklbw mm2, noval ;// process inptr1 movq mm3, mm2 ;// copy inptr0 psllq mm3, 16 ;// shift for first column special case i1[-1] movq mm7, mm1 psrlq mm7, 56 paddw mm3, mm7 pmullw mm2, mul3w ;// multiply by special case constant movq mm6, [eax+16] ;// Add up results for paddw mm6, bias8w ;// Add even bias paddw mm3, mm2 ;// final o1 o3 o5 o7 paddw mm6, mm3 ;// output to be interleaved psrlw mm6, 4 ;// convert from word to byte (truncate) ;// Process lo data for o2 o4 o6 o8 movq mm2, mm5 ;// get data from input row 1 movq mm3, mm2 ;// copy inptr1 for unpack punpcklbw mm2, noval ;// process inptr1 psrlq mm3, 8 ;// shift right for i1[1][2][3][4] punpcklbw mm3, noval ;// process inptr1 pmullw mm2, mul3w ;// multiply by next nearest constant movq mm0, [eax+24] ;// Add up results for final o2 o4 o6 o8 paddw mm0, mm3 ;// previous results for o1 o3 o5 o7 paddw mm0, bias7w ;// Add odd bias paddw mm0, mm2 ;// output to be interleaved with the psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer add edi, 8 ;// increment output pointer add edx, 8 ;// increment input0 pointer add esi, 8 ;// increment input1 pointer add eax, 32 movq mm4, [edx];// movq mm5, [esi] sub ecx, 8 cmp ecx, 0 jg col_loop2 last_column2: ;// Special for last column - process hi data for o1 o3 o5 o7 movq mm2, mm5 ;// get data from input row 1 movq mm3, mm2 ;// copy inptr1 for unpack punpckhbw mm2, noval ;// process inptr1[0] psllq mm3, 8 ;// shift for inptr1[-1] punpckhbw mm3, noval ;// process inptr1 pmullw mm2, mul3w ;// multiply by special case constant movq mm6, [eax] ;// with the next results paddw mm6, bias8w ;// Add even bias paddw mm3, mm2 ;// final o1 o3 o5 o7 paddw mm6, mm3 ;// output to be interleaved psrlw mm6, 4 ;// convert from word to byte (truncate) ;// Part 4 of the output - process hi data for o2 o4 o6 o8 movq mm2, mm5 ;// get data from input row 1 movq mm3, mm2 ;// copy inptr1 for unpack punpckhbw mm2, noval ;// process inptr1 psrlq mm3, 8 ;// shift right for i1[1][2][3][4] ;// load next inptr1 to mm5 for future use pand mm5, mask1 paddb mm3, mm5 ;// add in MSB from next input1 column punpckhbw mm3, noval ;// process inptr1 pmullw mm2, mul3w ;// multiply by next nearest constant movq mm0, [eax+8] ;// Add odd bias paddw mm3, bias7w ;// Add up results for final o2 o4 o6 o8 paddw mm0, mm3 ;// output to be interleaved with the paddw mm0, mm2 ;// previous results for o1 o3 o5 o7 psrlw mm0, 4 ;// convert back to byte (with truncate) psllq mm0, 8 ;// prepare to interleave output results paddw mm6, mm0 ;// interleave results movq [edi], mm6 ;// write to output buffer add edi, 8 ;// increment output pointer add edx, 8 ;// increment input0 pointer add esi, 8 ;// increment input1 pointer }#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -