📄 jdmerge.c
字号:
punpcklbw mm0, mm0 ; Cr3 Cr3 Cr2 Cr2 Cr1 Cr1 Cr0 Cr0 movq mm7, const128 punpcklwd mm0, mm0 ; Cr1 Cr1 Cr1 Cr1 Cr0 Cr0 Cr0 Cr0 movq mm4, mm0 punpcklbw mm0, mm6 ; Cr0 Cr0 Cr0 Cr0 psubsw mm0, mm7 ; Cr0 - 128:Cr0-128:Cr0-128:Cr0 -128 movd mm1, [ecx] ; Cb7 Cb6...... Cb1 Cb0 psllw mm0, 2 ; left shift by 2 bits punpcklbw mm1, mm1 ; Cb3 Cb3 Cb2 Cb2 Cb1 Cb1 Cb0 Cb0 paddsw mm0, const05 ; add (one_half/fix(x)) << 2 punpcklwd mm1, mm1 ; Cb1 Cb1 Cb1 Cb1 Cb0 Cb0 Cb0 Cb0 movq mm5, mm1 pmulhw mm0, const1 ; multiply by (fix(x) >> 1) punpcklbw mm1, mm6 ; Cb0 Cb0 Cb0 Cb0 punpckhbw mm4, mm6 ; Cr1 Cr1 Cr1 Cr1 psubsw mm1, mm7 ; Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128 punpckhbw mm5, mm6 ; Cb1 Cb1 Cb1 Cb1 psllw mm1, 2 ; left shift by 2 bits paddsw mm1, const15 ; add (one_half/fix(x)) << 2 psubsw mm4, mm7 ; Cr1 - 128:Cr1-128:Cr1-128:Cr1 -128 psubsw mm5, mm7 ; Cb1 - 128:Cb1-128:Cb1-128:Cb1 -128 pmulhw mm1, const2 ; multiply by (fix(x) >> 1) psllw mm4, 2 ; left shift by 2 bits psllw mm5, 2 ; left shift by 2 bits paddsw mm4, const45 ; add (one_half/fix(x)) << 2 movd mm7, [esi] ; Y13 Y12 Y9 Y8 Y5 Y4 Y1 Y0 pmulhw mm4, const5 ; multiply by (fix(x) >> 1) movq mm6, mm7 punpcklbw mm7, mm7 ; Y5 Y5 Y4 Y4 Y1 Y1 Y0 Y0 paddsw mm5, const55 ; add (one_half/fix(x)) << 2 paddsw mm0, mm1 ; cred0 cbl0 cgr0 cred0 movq mm1, mm7 pmulhw mm5, const6 ; multiply by (fix(x) >> 1) movq mm2, mm0 ; cred0 cbl0 cgr0 cred0 punpcklwd mm7, mm6 ; Y5 Y4 Y1 Y1 Y1 Y0 Y0 Y0 pand mm2, davemask ; 0 cbl0 cgr0 0 psrlq mm1, 16 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1 psrlq mm2, 16 ; 0 0 cbl0 cgr0 punpcklbw mm7, empty ; Y1 Y0 Y0 Y0 paddsw mm4, mm5 ; cbl1 cgr1 cred1 cbl1 movq mm3, mm4 ; cbl1 cgr1 cred1 cbl1 pand mm3, davemask ; 0 cgr1 cred1 0 paddsw mm7, mm0 ; r1 b0 g0 r0 psllq mm3, 16 ; cgr1 cred1 0 0 movq mm6, mm1 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1 por mm2, mm3 ; cgr1 cred1 cbl0 cgr0 punpcklbw mm6, empty ; Y4 Y4 Y1 Y1 movd mm3, [eax] ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2 paddsw mm6, mm2 ; g4 r4 b1 g1 packuswb mm7, mm6 ; g4 r4 b1 g1 r1 b0 g0 r0 movq mm6, mm3 ; Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2 punpcklbw mm3, mm3 ; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2 movq [edi], mm7 ; move to memory g4 r4 b1 g1 r1 b0 g0 r0 movq mm5, mm3 ; Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2 punpcklwd mm3, mm6 ; X X X X Y3 Y2 Y2 Y2 punpcklbw mm3, empty ; Y3 Y2 Y2 Y2 psrlq mm5, 16 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3 paddsw mm3, mm0 ; r3 b2 g2 r2 movq mm6, mm5 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3 movq mm0, mm1 ; 0 0 Y5 Y5 Y4 Y4 Y1 Y1 punpckldq mm6, mm6 ; X X X X Y6 Y6 Y3 Y3 punpcklbw mm6, empty ; Y6 Y6 Y3 Y3 psrlq mm1, 24 ; 0 0 0 0 0 Y5 Y5 Y4 paddsw mm6, mm2 ; g6 r6 b3 g3 packuswb mm3, mm6 ; g6 r6 b3 g3 r3 b2 g2 r2 movq mm2, mm5 ; 0 0 Y7 Y7 Y6 Y6 Y3 Y3 psrlq mm0, 32 ; 0 0 0 0 0 0 Y5 Y5 movq [edx], mm3 ; move to memory g6 r6 b3 g3 r3 b2 g2 r2 punpcklwd mm1, mm0 ; X X X X Y5 Y5 Y5 Y4 psrlq mm5, 24 ; 0 0 0 0 0 Y7 Y7 Y6 movd mm0, [ebx] ; Cr9 Cr8.....Cr3 Cr2 psrlq mm2, 32 ; 0 0 0 0 0 0 Y7 Y7 psrlq mm0, 16 punpcklbw mm1, empty ; Y5 Y5 Y5 Y4 punpcklwd mm5, mm2 ; X X X X Y7 Y7 Y7 Y6 paddsw mm1, mm4 ; b5 g5 r5 b4 punpcklbw mm5, empty ; Y7 Y7 Y7 Y6 pxor mm6, mm6 ; clear mm6 registr punpcklbw mm0, mm0 ; X X X X Cr3 Cr3 Cr2 Cr2 paddsw mm5, mm4 ; b7 g7 r7 b6 punpcklwd mm0, mm0 ; Cr3 Cr3 Cr3 Cr3 Cr2 Cr2 Cr2 Cr2 movq mm4, mm0 movd mm3, [ecx] ; Cb9 Cb8...... Cb3 Cb2 punpcklbw mm0, mm6 ; Cr2 Cr2 Cr2 Cr2 psrlq mm3, 16 psubsw mm0, const128 ; Cr2 - 128:Cr2-128:Cr2-128:Cr2 -128 punpcklbw mm3, mm3 ; X X X X Cb3 Cb3 Cb2 Cb2 psllw mm0, 2 ; left shift by 2 bits paddsw mm0, const05 ; add (one_half/fix(x)) << 2 punpcklwd mm3, mm3 ; Cb3 Cb3 Cb3 Cb3 Cb2 Cb2 Cb2 Cb2 movq mm7, mm3 pmulhw mm0, const1 ; multiply by (fix(x) >> 1) punpcklbw mm3, mm6 ; Cb2 Cb2 Cb2 Cb2 psubsw mm3, const128 ; Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128 punpckhbw mm4, mm6 ; Cr3 Cr3 Cr3 Cr3 psllw mm3, 2 ; left shift by 2 bits paddsw mm3, const15 ; add (one_half/fix(x)) << 2 punpckhbw mm7, mm6 ; Cb3 Cb3 Cb3 Cb3 pmulhw mm3, const2 ; multiply by (fix(x) >> 1) psubsw mm7, const128 ; Cb3 - 128:Cb3-128:Cb3-128:Cb3 -128 paddsw mm0, mm3 ; cred2 cbl2 cgr2 cred2 psllw mm7, 2 ; left shift by 2 bits psubsw mm4, const128 ; Cr3 - 128:Cr3-128:Cr3-128:Cr3 -128 movd mm3, [esi+4] ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8 psllw mm4, 2 ; left shift by 2 bits paddsw mm7, const55 ; add (one_half/fix(x)) << 2 movq mm6, mm3 ; Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8 movq mm2, mm0 pand mm2, davemask punpcklbw mm3, mm3 ; Y13 Y13 Y12 Y12 Y9 Y9 Y8 Y8 psrlq mm2, 16 paddsw mm4, const45 ; add (one_half/fix(x)) << 2 punpcklwd mm3, mm6 ; X X X X Y9 Y8 Y8 Y8 pmulhw mm4, const5 ; multiply by (fix(x) >> 1) pmulhw mm7, const6 ; multiply by (fix(x) >> 1) punpcklbw mm3, empty ; Y9 Y8 Y8 Y8 paddsw mm4, mm7 ; cbl3 cgr3 cred3 cbl3 paddsw mm3, mm0 ; r9 b8 g8 r8 movq mm7, mm4 packuswb mm1, mm3 ; r9 b8 g8 r8 b5 g5 r5 b4 movd mm3, [eax+4] ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10 pand mm7, davemask psrlq mm6, 8 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9 psllq mm7, 16 movq [edi+8], mm1 ; move to memory r9 b8 g8 r8 b5 g5 r5 b4 por mm2, mm7 movq mm7, mm3 ; Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10 punpcklbw mm3, mm3 ; X X X X Y11 Y11 Y10 Y10 pxor mm1, mm1 punpcklwd mm3, mm7 ; X X X X Y11 Y10 Y10 Y10 punpcklbw mm3, mm1 ; Y11 Y10 Y10 Y10 psrlq mm7, 8 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11 paddsw mm3, mm0 ; r11 b10 g10 r10 movq mm0, mm7 ; 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11 packuswb mm5, mm3 ; r11 b10 g10 r10 b7 g7 r7 b6 punpcklbw mm7, mm7 ; X X X X Y14 Y14 Y11 Y11 movq [edx+8], mm5 ; move to memory r11 b10 g10 r10 b7 g7 r7 b6 movq mm3, mm6 ; 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9 punpcklbw mm6, mm6 ; X X X X Y12 Y12 Y9 Y9 punpcklbw mm7, mm1 ; Y14 Y14 Y11 Y11 punpcklbw mm6, mm1 ; Y12 Y12 Y9 Y9 paddsw mm7, mm2 ; g14 r14 b11 g11 paddsw mm6, mm2 ; g12 r12 b9 g9 psrlq mm3, 8 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12 movq mm1, mm3 ; 0 0 Y21 Y20 Y17 Y16 Y13 Y12 punpcklbw mm3, mm3 ; X X X X Y13 Y13 Y12 Y12 add esi, 8 psrlq mm3, 16 ; X X X X X X Y13 Y13 modified on 09/24 punpcklwd mm1, mm3 ; X X X X Y13 Y13 Y13 Y12 add eax, 8 psrlq mm0, 8 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14 punpcklbw mm1, empty ; Y13 Y13 Y13 Y12 movq mm5, mm0 ; 0 0 Y23 Y22 Y19 Y18 Y15 Y14 punpcklbw mm0, mm0 ; X X X X Y15 Y15 Y14 Y14 paddsw mm1, mm4 ; b13 g13 r13 b12 psrlq mm0, 16 ; X X X X X X Y15 Y15 add edi, 24 punpcklwd mm5, mm0 ; X X X X Y15 Y15 Y15 Y14 packuswb mm6, mm1 ; b13 g13 r13 b12 g12 r12 b9 g9 add edx, 24 punpcklbw mm5, empty ; Y15 Y15 Y15 Y14 add ebx, 4 paddsw mm5, mm4 ; b15 g15 r15 b14 movq [edi-8], mm6 ; move to memory b13 g13 r13 b12 g12 r12 b9 g9 packuswb mm7, mm5 ; b15 g15 r15 b14 g14 r14 b11 g11 add ecx, 4 movq [edx-8], mm7 ; move to memory b15 g15 r15 b14 g14 r14 b11 g11 dec cols_asm jnz do_next16 EMMS }#endif#ifdef HAVE_MMX_ATT_MNEMONICS fprintf(stderr, "Using accelerated MMX code for merge !\n"); __asm__ ( "pushl %%ebx \n\t" "movl %0, %%esi \n\t" "movl %1, %%eax \n\t" "movl %2, %%ebx \n\t" "movl %3, %%ecx \n\t" "movl %4, %%edi \n\t" "movl %5, %%edx \n\t" "do_next16: \n\t" "movd (%%ebx),%%mm0 \n\t" // Cr7 Cr6.....Cr1 Cr0 "pxor %%mm6,%%mm6 \n\t" "punpcklbw %%mm0,%%mm0 \n\t" // Cr3 Cr3 Cr2 Cr2 Cr1 Cr1 Cr0 Cr0 "movq const128,%%mm7 \n\t" "punpcklwd %%mm0,%%mm0 \n\t" // Cr1 Cr1 Cr1 Cr1 Cr0 Cr0 Cr0 Cr0 "movq %%mm0,%%mm4 \n\t" "punpcklbw %%mm6,%%mm0 \n\t" // Cr0 Cr0 Cr0 Cr0 "psubsw %%mm7,%%mm0 \n\t" // Cr0 - 128:Cr0-128:Cr0-128:Cr0 -128 "movd (%%ecx),%%mm1 \n\t" // Cb7 Cb6...... Cb1 Cb0 "psllw $2,%%mm0 \n\t" // left shift by 2 bits "punpcklbw %%mm1,%%mm1 \n\t" // Cb3 Cb3 Cb2 Cb2 Cb1 Cb1 Cb0 Cb0 "paddsw const05,%%mm0 \n\t" // add (one_half/fix(x)) << 2 "punpcklwd %%mm1,%%mm1 \n\t" // Cb1 Cb1 Cb1 Cb1 Cb0 Cb0 Cb0 Cb0 "movq %%mm1,%%mm5 \n\t" "pmulhw const1,%%mm0 \n\t" // multiply by (fix(x) >> 1) "punpcklbw %%mm6,%%mm1 \n\t" // Cb0 Cb0 Cb0 Cb0 "punpckhbw %%mm6,%%mm4 \n\t" // Cr1 Cr1 Cr1 Cr1 "psubsw %%mm7,%%mm1 \n\t" // Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128 "punpckhbw %%mm6,%%mm5 \n\t" // Cb1 Cb1 Cb1 Cb1 "psllw $2,%%mm1 \n\t" // left shift by 2 bits "paddsw const15,%%mm1 \n\t" // add (one_half/fix(x)) << 2 "psubsw %%mm7,%%mm4 \n\t" // Cr1 - 128:Cr1-128:Cr1-128:Cr1 -128 "psubsw %%mm7,%%mm5 \n\t" // Cb1 - 128:Cb1-128:Cb1-128:Cb1 -128 "pmulhw const2,%%mm1 \n\t" // multiply by (fix(x) >> 1) "psllw $2,%%mm4 \n\t" // left shift by 2 bits "psllw $2,%%mm5 \n\t" // left shift by 2 bits "paddsw const45,%%mm4 \n\t" // add (one_half/fix(x)) << 2 "movd (%%esi),%%mm7 \n\t" // Y13 Y12 Y9 Y8 Y5 Y4 Y1 Y0 "pmulhw const5,%%mm4 \n\t" // multiply by (fix(x) >> 1) "movq %%mm7,%%mm6 \n\t" "punpcklbw %%mm7,%%mm7 \n\t" // Y5 Y5 Y4 Y4 Y1 Y1 Y0 Y0 "paddsw const55,%%mm5 \n\t" // add (one_half/fix(x)) << 2 "paddsw %%mm1,%%mm0 \n\t" // cred0 cbl0 cgr0 cred0 "movq %%mm7,%%mm1 \n\t" "pmulhw const6,%%mm5 \n\t" // multiply by (fix(x) >> 1) "movq %%mm0,%%mm2 \n\t" // cred0 cbl0 cgr0 cred0 "punpcklwd %%mm6,%%mm7 \n\t" // Y5 Y4 Y1 Y1 Y1 Y0 Y0 Y0 "pand davemask,%%mm2 \n\t" // 0 cbl0 cgr0 0 "psrlq $16,%%mm1 \n\t" // 0 0 Y5 Y5 Y4 Y4 Y1 Y1 "psrlq $16,%%mm2 \n\t" // 0 0 cbl0 cgr0 "punpcklbw empty,%%mm7 \n\t" // Y1 Y0 Y0 Y0 "paddsw %%mm5,%%mm4 \n\t" // cbl1 cgr1 cred1 cbl1 "movq %%mm4,%%mm3 \n\t" // cbl1 cgr1 cred1 cbl1 "pand davemask,%%mm3 \n\t" // 0 cgr1 cred1 0 "paddsw %%mm0,%%mm7 \n\t" // r1 b0 g0 r0 "psllq $16,%%mm3 \n\t" // cgr1 cred1 0 0 "movq %%mm1,%%mm6 \n\t" // 0 0 Y5 Y5 Y4 Y4 Y1 Y1 "por %%mm3,%%mm2 \n\t" // cgr1 cred1 cbl0 cgr0 "punpcklbw empty,%%mm6 \n\t" // Y4 Y4 Y1 Y1 "movd (%%eax),%%mm3 \n\t" // Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2 "paddsw %%mm2,%%mm6 \n\t" // g4 r4 b1 g1 "packuswb %%mm6,%%mm7 \n\t" // g4 r4 b1 g1 r1 b0 g0 r0 "movq %%mm3,%%mm6 \n\t" // Y15 Y14 Y11 Y10 Y7 Y6 Y3 Y2 "punpcklbw %%mm3,%%mm3 \n\t" // Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2 "movq %%mm7,(%%edi) \n\t" // move to memory g4 r4 b1 g1 r1 b0 g0 r0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -