📄 jdmerge.c
字号:
"movq %%mm3,%%mm5 \n\t" // Y7 Y7 Y6 Y6 Y3 Y3 Y2 Y2 "punpcklwd %%mm6,%%mm3 \n\t" // X X X X Y3 Y2 Y2 Y2 "punpcklbw empty,%%mm3 \n\t" // Y3 Y2 Y2 Y2 "psrlq $16,%%mm5 \n\t" // 0 0 Y7 Y7 Y6 Y6 Y3 Y3 "paddsw %%mm0,%%mm3 \n\t" // r3 b2 g2 r2 "movq %%mm5,%%mm6 \n\t" // 0 0 Y7 Y7 Y6 Y6 Y3 Y3 "movq %%mm1,%%mm0 \n\t" // 0 0 Y5 Y5 Y4 Y4 Y1 Y1 "punpckldq %%mm6,%%mm6 \n\t" // X X X X Y6 Y6 Y3 Y3 "punpcklbw empty,%%mm6 \n\t" // Y6 Y6 Y3 Y3 "psrlq $24,%%mm1 \n\t" // 0 0 0 0 0 Y5 Y5 Y4 "paddsw %%mm2,%%mm6 \n\t" // g6 r6 b3 g3 "packuswb %%mm6,%%mm3 \n\t" // g6 r6 b3 g3 r3 b2 g2 r2 "movq %%mm5,%%mm2 \n\t" // 0 0 Y7 Y7 Y6 Y6 Y3 Y3 "psrlq $32,%%mm0 \n\t" // 0 0 0 0 0 0 Y5 Y5 "movq %%mm3,(%%edx) \n\t" // move to memory g6 r6 b3 g3 r3 b2 g2 r2 "punpcklwd %%mm0,%%mm1 \n\t" // X X X X Y5 Y5 Y5 Y4 "psrlq $24,%%mm5 \n\t" // 0 0 0 0 0 Y7 Y7 Y6 "movd (%%ebx),%%mm0 \n\t" // Cr9 Cr8.....Cr3 Cr2 "psrlq $32,%%mm2 \n\t" // 0 0 0 0 0 0 Y7 Y7 "psrlq $16,%%mm0 \n\t" "punpcklbw empty,%%mm1 \n\t" // Y5 Y5 Y5 Y4 "punpcklwd %%mm2,%%mm5 \n\t" // X X X X Y7 Y7 Y7 Y6 "paddsw %%mm4,%%mm1 \n\t" // b5 g5 r5 b4 "punpcklbw empty,%%mm5 \n\t" // Y7 Y7 Y7 Y6 "pxor %%mm6,%%mm6 \n\t" // clear mm6 registr "punpcklbw %%mm0,%%mm0 \n\t" // X X X X Cr3 Cr3 Cr2 Cr2 "paddsw %%mm4,%%mm5 \n\t" // b7 g7 r7 b6 "punpcklwd %%mm0,%%mm0 \n\t" // Cr3 Cr3 Cr3 Cr3 Cr2 Cr2 Cr2 Cr2 "movq %%mm0,%%mm4 \n\t" "movd (%%ecx),%%mm3 \n\t" // Cb9 Cb8...... Cb3 Cb2 "punpcklbw %%mm6,%%mm0 \n\t" // Cr2 Cr2 Cr2 Cr2 "psrlq $16,%%mm3 \n\t" "psubsw const128,%%mm0 \n\t" // Cr2 - 128:Cr2-128:Cr2-128:Cr2 -128 "punpcklbw %%mm3,%%mm3 \n\t" // X X X X Cb3 Cb3 Cb2 Cb2 "psllw $2,%%mm0 \n\t" // left shift by 2 bits "paddsw const05,%%mm0 \n\t" // add (one_half/fix(x)) << 2 "punpcklwd %%mm3,%%mm3 \n\t" // Cb3 Cb3 Cb3 Cb3 Cb2 Cb2 Cb2 Cb2 "movq %%mm3,%%mm7 \n\t" "pmulhw const1,%%mm0 \n\t" // multiply by (fix(x) >> 1) "punpcklbw %%mm6,%%mm3 \n\t" // Cb2 Cb2 Cb2 Cb2 "psubsw const128,%%mm3 \n\t" // Cb0 - 128:Cb0-128:Cb0-128:Cb0 -128 "punpckhbw %%mm6,%%mm4 \n\t" // Cr3 Cr3 Cr3 Cr3 "psllw $2,%%mm3 \n\t" // left shift by 2 bits "paddsw const15,%%mm3 \n\t" // add (one_half/fix(x)) << 2 "punpckhbw %%mm6,%%mm7 \n\t" // Cb3 Cb3 Cb3 Cb3 "pmulhw const2,%%mm3 \n\t" // multiply by (fix(x) >> 1) "psubsw const128,%%mm7 \n\t" // Cb3 - 128:Cb3-128:Cb3-128:Cb3 -128 "paddsw %%mm3,%%mm0 \n\t" // cred2 cbl2 cgr2 cred2 "psllw $2,%%mm7 \n\t" // left shift by 2 bits "psubsw const128,%%mm4 \n\t" // Cr3 - 128:Cr3-128:Cr3-128:Cr3 -128 "movd 4(%%esi),%%mm3 \n\t" // Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8 "psllw $2,%%mm4 \n\t" // left shift by 2 bits "paddsw const55,%%mm7 \n\t" // add (one_half/fix(x)) << 2 "movq %%mm3,%%mm6 \n\t" // Y21 Y20 Y17 Y16 Y13 Y12 Y9 Y8 "movq %%mm0,%%mm2 \n\t" "pand davemask,%%mm2 \n\t" "punpcklbw %%mm3,%%mm3 \n\t" // Y13 Y13 Y12 Y12 Y9 Y9 Y8 Y8 "psrlq $16,%%mm2 \n\t" "paddsw const45,%%mm4 \n\t" // add (one_half/fix(x)) << 2 "punpcklwd %%mm6,%%mm3 \n\t" // X X X X Y9 Y8 Y8 Y8 "pmulhw const5,%%mm4 \n\t" // multiply by (fix(x) >> 1) "pmulhw const6,%%mm7 \n\t" // multiply by (fix(x) >> 1) "punpcklbw empty,%%mm3 \n\t" // Y9 Y8 Y8 Y8 "paddsw %%mm7,%%mm4 \n\t" // cbl3 cgr3 cred3 cbl3 "paddsw %%mm0,%%mm3 \n\t" // r9 b8 g8 r8 "movq %%mm4,%%mm7 \n\t" "packuswb %%mm3,%%mm1 \n\t" // r9 b8 g8 r8 b5 g5 r5 b4 "movd 4(%%eax),%%mm3 \n\t" // Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10 "pand davemask,%%mm7 \n\t" "psrlq $8,%%mm6 \n\t" // 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9 "psllq $16,%%mm7 \n\t" "movq %%mm1,8(%%edi) \n\t" // move to memory r9 b8 g8 r8 b5 g5 r5 b4 "por %%mm7,%%mm2 \n\t" "movq %%mm3,%%mm7 \n\t" // Y23 Y22 Y19 Y18 Y15 Y14 Y11 Y10 "punpcklbw %%mm3,%%mm3 \n\t" // X X X X Y11 Y11 Y10 Y10 "pxor %%mm1,%%mm1 \n\t" "punpcklwd %%mm7,%%mm3 \n\t" // X X X X Y11 Y10 Y10 Y10 "punpcklbw %%mm1,%%mm3 \n\t" // Y11 Y10 Y10 Y10 "psrlq $8,%%mm7 \n\t" // 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11 "paddsw %%mm0,%%mm3 \n\t" // r11 b10 g10 r10 "movq %%mm7,%%mm0 \n\t" // 0 Y23 Y22 Y19 Y18 Y15 Y14 Y11 "packuswb %%mm3,%%mm5 \n\t" // r11 b10 g10 r10 b7 g7 r7 b6 "punpcklbw %%mm7,%%mm7 \n\t" // X X X X Y14 Y14 Y11 Y11 "movq %%mm5,8(%%edx) \n\t" // move to memory r11 b10 g10 r10 b7 g7 r7 b6 "movq %%mm6,%%mm3 \n\t" // 0 Y21 Y20 Y17 Y16 Y13 Y12 Y9 "punpcklbw %%mm6,%%mm6 \n\t" // X X X X Y12 Y12 Y9 Y9 "punpcklbw %%mm1,%%mm7 \n\t" // Y14 Y14 Y11 Y11 "punpcklbw %%mm1,%%mm6 \n\t" // Y12 Y12 Y9 Y9 "paddsw %%mm2,%%mm7 \n\t" // g14 r14 b11 g11 "paddsw %%mm2,%%mm6 \n\t" // g12 r12 b9 g9 "psrlq $8,%%mm3 \n\t" // 0 0 Y21 Y20 Y17 Y16 Y13 Y12 "movq %%mm3,%%mm1 \n\t" // 0 0 Y21 Y20 Y17 Y16 Y13 Y12 "punpcklbw %%mm3,%%mm3 \n\t" // X X X X Y13 Y13 Y12 Y12 "addl $8,%%esi \n\t" "psrlq $16,%%mm3 \n\t" // X X X X X X Y13 Y13 modified on 09/24 "punpcklwd %%mm3,%%mm1 \n\t" // X X X X Y13 Y13 Y13 Y12 "addl $8,%%eax \n\t" "psrlq $8,%%mm0 \n\t" // 0 0 Y23 Y22 Y19 Y18 Y15 Y14 "punpcklbw empty,%%mm1 \n\t" // Y13 Y13 Y13 Y12 "movq %%mm0,%%mm5 \n\t" // 0 0 Y23 Y22 Y19 Y18 Y15 Y14 "punpcklbw %%mm0,%%mm0 \n\t" // X X X X Y15 Y15 Y14 Y14 "paddsw %%mm4,%%mm1 \n\t" // b13 g13 r13 b12 "psrlq $16,%%mm0 \n\t" // X X X X X X Y15 Y15 "addl $24,%%edi \n\t" "punpcklwd %%mm0,%%mm5 \n\t" // X X X X Y15 Y15 Y15 Y14 "packuswb %%mm1,%%mm6 \n\t" // b13 g13 r13 b12 g12 r12 b9 g9 "addl $24,%%edx \n\t" "punpcklbw empty,%%mm5 \n\t" // Y15 Y15 Y15 Y14 "addl $4,%%ebx \n\t" "paddsw %%mm4,%%mm5 \n\t" // b15 g15 r15 b14 "movq %%mm6,-8(%%edi) \n\t" // move to memory b13 g13 r13 b12 g12 r12 b9 g9 "packuswb %%mm5,%%mm7 \n\t" // b15 g15 r15 b14 g14 r14 b11 g11 "addl $4,%%ecx \n\t" "movq %%mm7,-8(%%edx) \n\t" // move to memory b15 g15 r15 b14 g14 r14 b11 g11 "decl %6 \n\t" "jnz do_next16 \n\t" "emms \n\t" "popl %%ebx \n\t" : //"=m"(&cols_asm) : "m"(inptr00), "m"(inptr01), "m"(inptr2), "m"(inptr1), "m"(outptr1), "m"(outptr0),"m"(cols_asm) /* was (&cols_asm) */ : "eax", "ecx", "edx", "edi", "esi", "st", "cc", "memory" );#if 0 "movl $inptr00, %%esi \n\t" "movl $inptr01, %%eax \n\t" "movl $inptr2, %%ebx \n\t" "movl $inptr1, %%ecx \n\t" "movl $outptr0, %%edi \n\t" "movl $outptr1, %%edx \n\t"#endif#endif inptr1 += (cols_asm_copy<<2); inptr2 += (cols_asm_copy<<2); inptr00 += (cols_asm_copy<<3); inptr01 += (cols_asm_copy<<3); outptr0 += cols_asm_copy*24; outptr1 += cols_asm_copy*24; //for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ /*cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb];*/ /* Fetch 4 Y values and emit 4 pixels */ /*y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; outptr1 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; outptr1 += RGB_PIXELSIZE; } */ for (col = diff >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; outptr1 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; outptr1 += RGB_PIXELSIZE; } /* If image width is odd, do the last output column separately */ //if (cinfo->output_width & 1) { if (diff & 1) { cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr00); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; y = GETJSAMPLE(*inptr01); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; } }#elseMETHODDEF(void)h2v2_merged_upsample (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf){ my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample; register int y, cred, cgreen, cblue; int cb, cr; register JSAMPROW outptr0, outptr1; JSAMPROW inptr00, inptr01, inptr1, inptr2; JDIMENSION col; /* copy these pointers into registers if possible */ register JSAMPLE * range_limit = cinfo->sample_range_limit; int * Crrtab = upsample->Cr_r_tab; int * Cbbtab = upsample->Cb_b_tab; INT32 * Crgtab = upsample->Cr_g_tab; INT32 * Cbgtab = upsample->Cb_g_tab; SHIFT_TEMPS inptr00 = input_buf[0][in_row_group_ctr*2]; inptr01 = input_buf[0][in_row_group_ctr*2 + 1]; inptr1 = input_buf[1][in_row_group_ctr]; inptr2 = input_buf[2][in_row_group_ctr]; outptr0 = output_buf[0]; outptr1 = output_buf[1]; /* Loop for each group of output pixels */ for (col = cinfo->output_width >> 1; col > 0; col--) { /* Do the chroma part of the calculation */ cb = GETJSAMPLE(*inptr1++); cr = GETJSAMPLE(*inptr2++); cred = Crrtab[cr]; cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; /* Fetch 4 Y values and emit 4 pixels */ y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr00++); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; outptr0 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; outptr1 += RGB_PIXELSIZE; y = GETJSAMPLE(*inptr01++); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; outptr1 += RGB_PIXELSIZE; } /* If image width is odd, do the last output column separately */ if (cinfo->output_width & 1) { cb = GETJSAMPLE(*inptr1); cr = GETJSAMPLE(*inptr2); cred = Crrtab[cr]; cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS); cblue = Cbbtab[cb]; y = GETJSAMPLE(*inptr00); outptr0[RGB_RED] = range_limit[y + cred]; outptr0[RGB_GREEN] = range_limit[y + cgreen]; outptr0[RGB_BLUE] = range_limit[y + cblue]; y = GETJSAMPLE(*inptr01); outptr1[RGB_RED] = range_limit[y + cred]; outptr1[RGB_GREEN] = range_limit[y + cgreen]; outptr1[RGB_BLUE] = range_limit[y + cblue]; }}#endif/* * Module initialization routine for merged upsampling/color conversion. * * NB: this is called under the conditions determined by use_merged_upsample() * in jdmaster.c. That routine MUST correspond to the actual capabilities * of this module; no safety checks are made here. */GLOBAL(void)jinit_merged_upsampler (j_decompress_ptr cinfo){ my_upsample_ptr upsample; upsample = (my_upsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_upsampler)); cinfo->upsample = (struct jpeg_upsampler *) upsample; upsample->pub.start_pass = start_pass_merged_upsample; upsample->pub.need_context_rows = FALSE; upsample->out_row_width = cinfo->output_width * cinfo->out_color_components; if (cinfo->max_v_samp_factor == 2) { upsample->pub.upsample = merged_2v_upsample; upsample->upmethod = h2v2_merged_upsample; /* Allocate a spare row buffer */ upsample->spare_row = (JSAMPROW) (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE, (size_t) (upsample->out_row_width * SIZEOF(JSAMPLE))); } else { upsample->pub.upsample = merged_1v_upsample; upsample->upmethod = h2v1_merged_upsample; /* No spare row needed */ upsample->spare_row = NULL; } build_ycc_rgb_table(cinfo);}#endif /* UPSAMPLE_MERGING_SUPPORTED */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -