📄 jdcolor.c
字号:
/* * jdcolor.c * * Copyright (C) 1991-1997, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains output colorspace conversion routines. *///#include <asm/msr.h>#define JPEG_INTERNALS#include "jinclude.h"#include "jpeglib.h"/* Private subobject */typedef struct { struct jpeg_color_deconverter pub; /* public fields */ /* Private state for YCC->RGB conversion */ int * Cr_r_tab; /* => table for Cr to R conversion */ int * Cb_b_tab; /* => table for Cb to B conversion */ INT32 * Cr_g_tab; /* => table for Cr to G conversion */ INT32 * Cb_g_tab; /* => table for Cb to G conversion */} my_color_deconverter;typedef my_color_deconverter * my_cconvert_ptr;/**************** YCbCr -> RGB conversion: most common case **************//* * YCbCr is defined per CCIR 601-1, except that Cb and Cr are * normalized to the range 0..MAXJSAMPLE rather than -0.5 .. 0.5. * The conversion equations to be implemented are therefore * R = Y + 1.40200 * Cr * G = Y - 0.34414 * Cb - 0.71414 * Cr * B = Y + 1.77200 * Cb * where Cb and Cr represent the incoming values less CENTERJSAMPLE. * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.) * * To avoid floating-point arithmetic, we represent the fractional constants * as integers scaled up by 2^16 (about 4 digits precision); we have to divide * the products by 2^16, with appropriate rounding, to get the correct answer. * Notice that Y, being an integral input, does not contribute any fraction * so it need not participate in the rounding. * * For even more speed, we avoid doing any multiplications in the inner loop * by precalculating the constants times Cb and Cr for all possible values. * For 8-bit JSAMPLEs this is very reasonable (only 256 entries per table); * for 12-bit samples it is still acceptable. It's not very reasonable for * 16-bit samples, but if you want lossless storage you shouldn't be changing * colorspace anyway. * The Cr=>R and Cb=>B values can be rounded to integers in advance; the * values for the G calculation are left scaled up, since we must add them * together before rounding. */#define SCALEBITS 16 /* speediest right-shift on some machines */#define ONE_HALF ((INT32) 1 << (SCALEBITS-1))#define FIX(x) ((INT32) ((x) * (1L<<SCALEBITS) + 0.5))/* * Initialize tables for YCC->RGB colorspace conversion. */LOCAL(void)build_ycc_rgb_table (j_decompress_ptr cinfo){ my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert; int i; INT32 x; SHIFT_TEMPS; cconvert->Cr_r_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int)); cconvert->Cb_b_tab = (int *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(int)); cconvert->Cr_g_tab = (INT32 *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32)); cconvert->Cb_g_tab = (INT32 *) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, (MAXJSAMPLE+1) * SIZEOF(INT32)); /* not needed in MMX YUV->RGB */ for (i = 0, x = -CENTERJSAMPLE; i <= MAXJSAMPLE; i++, x++) { /* i is the actual input pixel value, in the range 0..MAXJSAMPLE */ /* The Cb or Cr value we are thinking of is x = i - CENTERJSAMPLE */ /* Cr=>R value is nearest int to 1.40200 * x */ cconvert->Cr_r_tab[i] = (int) RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS); /* Cb=>B value is nearest int to 1.77200 * x */ cconvert->Cb_b_tab[i] = (int) RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS); /* Cr=>G value is scaled-up -0.71414 * x */ cconvert->Cr_g_tab[i] = (- FIX(0.71414)) * x; /* Cb=>G value is scaled-up -0.34414 * x */ /* We also add in ONE_HALF so that need not do it in inner loop */ cconvert->Cb_g_tab[i] = (- FIX(0.34414)) * x + ONE_HALF; }} /* * Convert some rows of samples to the output colorspace. * * Note that we change from noninterleaved, one-plane-per-component format * to interleaved-pixel format. The output buffer is therefore three times * as wide as the input buffer. * A starting row offset is provided only for the input buffer. The caller * can easily adjust the passed output_buf value to accommodate any row * offset required on that side. */#if defined(HAVE_MMX_INTEL_MNEMONICS) || defined(HAVE_MMX_ATT_MNEMONICS)#if defined(__GNUC__)#define int64 unsigned long long#endif#if defined(HAVE_MMX_INTEL_MNEMONICS)static const int64 bpte0 = 0x0080008000800080; // 128static const int64 bpte1 = 0x7168e9f97168e9f9; // for cb (Cb/b, Cb/g, Cb/b, Cb/g)static const int64 bpte2 = 0xd21a59bad21a59ba; // for cr (Cr/g, Cr/r, Cr/g, Cr/r)#elsestatic const int64 te0 = 0x0200020002000200LL; // -128 << 2static const int64 te1 = 0xe9fa7168e9fa7168LL; // for cbstatic const int64 te2 = 0x59bad24d59bad24dLL; // for cr#endif//static const int64 te2 = 0x59ba524b59ba524b; // for cr/* How to calculate the constants (see constants from above for YCbCr->RGB): trunc(-0.34414*16384) << 16 + trunc(1.772 * 16348) || mind that negative numbers are in 2-complement form (2^32+x+1) *//* * R = Y + 1.40200 * Cr * G = Y - 0.34414 * Cb - 0.71414 * Cr * B = Y + 1.77200 * Cb */METHODDEF(void)ycc_rgb_convert_mmx (j_decompress_ptr cinfo, JSAMPIMAGE input_buf, JDIMENSION input_row, JSAMPARRAY output_buf, int num_rows){ INT32 y, cb, cr; int temp; JSAMPROW outptr; JSAMPROW inptr0, inptr1, inptr2; JDIMENSION col; JDIMENSION num_cols = cinfo->output_width; while (--num_rows >= 0) { inptr0 = input_buf[0][input_row]; inptr1 = input_buf[1][input_row]; inptr2 = input_buf[2][input_row]; input_row++; outptr = *output_buf++; num_cols/=4; for (col = 0; col < num_cols; col++) {#if defined(HAVE_MMX_INTEL_MNEMONICS)// implemented by Brian Potetz <bpotetz@cs.cmu.edu> - thanks for that ! /gz :-) // #error "jdcolor's MMX routines haven't been converted to INTEL assembler yet - contact JPEGlib/MMX" _asm { // :"=m"(outptr[0]) // :"m"(inptr0),"m"(inptr1),"m"(inptr2) //y cb cr // :"eax", "ebx", "ecx", "st" mov eax, inptr0 // mov %1, %%eax mov ebx, inptr1 // mov %2, %%ebx mov ecx, inptr2 // mov %3, %%ecx mov edx, outptr // ------------- keep output pointer in register. movd mm0, [eax] // movd (%%eax),%%mm0 mm0: 0 0 0 0 y3 y2 y1 y0 - 8 bit movd mm1, [ebx] // movd (%%ebx),%%mm1 mm1: 0 0 0 0 cb3 cb2 cb1 cb0 movd mm2, [ecx] // movd (%%ecx),%%mm2 mm2: 0 0 0 0 cr3 cr2 cr1 cr0 pxor mm7, mm7 // pxor %%mm7,%%mm7 mm7 = 0 punpcklbw mm0, mm7 // punpcklbw %%mm7,%%mm0 mm0: y3 y2 y1 y0 - expand to 16 bit punpcklbw mm1, mm7 // punpcklbw %%mm7,%%mm1 mm1: cb3 cb2 cb1 cb0 punpcklbw mm2, mm7 // punpcklbw %%mm7,%%mm2 mm2: cr3 cr2 cr1 cr0 psubw mm1, bpte0 // psubw te0,%%mm1 minus 128 for cb and cr psubw mm2, bpte0 // psubw te0,%%mm2 psllw mm1, 2 // psllw $2,%%mm1 shift left 2 bits for Cr and Cb to fit the mult constants psllw mm2, 2 // psllw $2,%%mm2 //------------------------------------- // prepare for RGB 1 & 0 movq mm3, mm1 // movq %%mm1,%%mm3 mm3_16: cb3 cb2 cb1 cb0 movq mm4, mm2 // movq %%mm2,%%mm4 mm4_16: cr3 cr2 cr1 cr0 punpcklwd mm3, mm3 // punpcklwd %%mm3,%%mm3 expand to 32 bit: mm3: cb1 cb1 cb0 cb0 punpcklwd mm4, mm4 // punpcklwd %%mm4,%%mm4 mm4: cr1 cr1 cr0 cr0 // Y Y Y Y // CD*b CB*g 0 0 // 0 CR*g CR*r 0 //------------------ // B G R // Multiply in the constants: pmulhw mm3, bpte1 // pmulhw te1,%%mm3 mm3: cb1/b cb1/g cb0/b cb0/g pmulhw mm4, bpte2 // pmulhw te2,%%mm4 mm4: cr1/g cb1/r cr0/g cr0/r movq mm5, mm0 // movq %%mm0,%%mm5 mm5: y3 y2 y1 y0 punpcklwd mm5, mm5 // punpcklwd %%mm5,%%mm5 expand to 32 bit: y1 y1 y0 y0 movq mm6, mm5 // movq %%mm5,%%mm6 mm6: y1 y1 y0 y0 punpcklwd mm5, mm5 // punpcklwd %%mm5,%%mm5 mm5: y0 y0 y0 y0 punpckhwd mm6, mm6 // punpckhwd %%mm6,%%mm6 mm6: y1 y1 y1 y1 // RGB 0 movq mm7, mm3 // movq %%mm3,%%mm7 mm7: cb1/g cb1/b cb0/g cb0/b psllq mm7, 32 // psllq $32,%%mm7 shift left 32 bits: mm7: cb0/b cb0/g 0 0 paddw mm5, mm7 // paddw %%mm7,%%mm5 add: mm7: y+cb movq mm7, mm4 // movq %%mm4,%%mm7 mm7 = cr1 cr1 cr0 cr0 psllq mm7, 32 // psllq $32,%%mm7 shift left 32 bits: mm7: cr0/g cr0/r 0 0 psrlq mm7, 16 // psrlq $16,%%mm7 mm7 = 0 cr0/g cr0/r 0 paddw mm5, mm7 // paddw %%mm7,%%mm5 y+cb+cr->mm5= r g b ? // RGB 1 psrlq mm4, 32 // psrlq $32,%%mm4 mm4: 0 0 cr1/g cr1/r paddw mm6, mm4 // paddw %%mm4,%%mm6 y+cr psrlq mm3, 32 // psrlq $32,%%mm3 mm3: 0 0 cb1/b cb1/g psllq mm3, 16 // psllq $16,%%mm4 mm4: 0 cr1/b cr1/g 0 paddw mm6, mm3 // paddw %%mm3,%%mm6 y+cr+cb->mm6 = ? r g b packuswb mm5, mm6 // packuswb %%mm6,%%mm5 mm5 = ? r1 g1 b1 r0 g0 b0 ? psrlq mm5, 8 // psrlq $8,%%mm5 mm5: 0 ? r1 g1 b1 r0 g0 b0 movq [edx], mm5 // movq %%mm5,%0 store mm5 // prepare for RGB 2 & 3 punpckhwd mm0, mm0 // punpckhwd %%mm0,%%mm0 mm0 = y3 y3 y2 y2 punpckhwd mm1, mm1 // punpckhwd %%mm1,%%mm1 mm1 = cb3 cb3 cb2 cb2 punpckhwd mm2, mm2 // punpckhwd %%mm2,%%mm2 mm2 = cr3 cr3 cr2 cr2 pmulhw mm1, bpte1 // pmulhw te1,%%mm1 mm1 = cb * ? pmulhw mm2, bpte2 // pmulhw te2,%%mm2 mm2 = cr * ? movq mm3, mm0 // movq %%mm0,%%mm3 mm3 = y3 y3 y2 y2 punpcklwd mm3, mm3 // punpcklwd %%mm3,%%mm3 mm3 = y2 y2 y2 y2 punpckhwd mm0, mm0 // punpckhwd %%mm0,%%mm0 mm0 = y3 y3 y3 y3 // RGB 2 movq mm4, mm1 // movq %%mm1,%%mm4 mm4 = cb3 cb3 cb2 cb2 movq mm5, mm2 // movq %%mm2,%%mm5 mm5 = cr3 cr3 cr2 cr2 psllq mm4, 32 // psllq $32,%%mm4 mm4 = cb2/b cb2/g 0 0 psllq mm5, 32 // psllq $32,%%mm5 mm5 = cr2/g cr2/r 0 0 psrlq mm5, 16 // psrlq $16,%%mm4 mm5 = 0 cr2/g cr2/g 0 paddw mm3, mm4 // paddw %%mm4,%%mm3 y+cb paddw mm3, mm5 // paddw %%mm5,%%mm3 mm3 = y+cb+cr // RGB 3 psrlq mm2, 32 // psrlq $32,%%mm2 mm2 = 0 0 cr3/g cr3/r psrlq mm1, 32 // psrlq $32,%%mm1 mm1 = 0 0 cb3/b cb3/g psllq mm1, 16 // psllq $16,%%mm2 mm1 = 0 cb3/b cb3/g 0 paddw mm0, mm2 // paddw %%mm2,%%mm0 y+cr paddw mm0, mm1 // paddw %%mm1,%%mm0 y+cb+cr packuswb mm3, mm0 // packuswb %%mm0,%%mm3 pack in a quadword psrlq mm3, 8 // psrlq $8,%%mm3 shift to the right corner movq [edx+6], mm3 // movq %%mm3,6%0 save two more RGB pixels }#endif#endif#if defined(HAVE_MMX_ATT_MNEMONICS) __asm__( "pushl %%ebx\n" "mov %1, %%eax\n" "mov %2, %%ebx\n" "mov %3, %%ecx\n" "movd (%%eax),%%mm0\n" // mm0: 0 0 0 0 y3 y2 y1 y0 - 8 bit "movd (%%ebx),%%mm1\n" // mm1: 0 0 0 0 cb3 cb2 cb1 cb0 "movd (%%ecx),%%mm2\n" // mm2: 0 0 0 0 cr3 cr2 cr1 cr0 "pxor %%mm7,%%mm7\n" // mm7 = 0 "punpcklbw %%mm7,%%mm0\n" // mm0: y3 y2 y1 y0 - expand to 16 bit "punpcklbw %%mm7,%%mm1\n" // mm1: cb3 cb2 cb1 cb0 "punpcklbw %%mm7,%%mm2\n" // mm2: cr3 cr2 cr1 cr0 "psubw %4,%%mm1\n" // minus 128 for cb and cr "psubw %4,%%mm2\n" "psllw $2,%%mm1\n" // shift left 2 bits for Cr and Cb to fit the mult constants "psllw $2,%%mm2\n" // prepare for RGB 1 & 0 "movq %%mm1,%%mm3\n" // mm3_16: cb3 cb2 cb1 cb0 "movq %%mm2,%%mm4\n" // mm4_16: cr3 cr2 cr1 cr0 "punpcklwd %%mm3,%%mm3\n"// expand to 32 bit: mm3: cb1 cb1 cb0 cb0 "punpcklwd %%mm4,%%mm4\n"// mm4: cr1 cr1 cr0 cr0 // Y Y Y Y // 0 CB*g CB*b 0 // CR*r CR*g 0 0 //------------------ // R G B "pmulhw %5,%%mm3\n"// multiplicate in the constants: mm3: cb1/green cb1/blue cb0/green cb0/blue "pmulhw %6,%%mm4\n"// mm4: cr1/red cb1/green cr0/red cr0/green "movq %%mm0,%%mm5\n" // mm5: y3 y2 y1 y0 "punpcklwd %%mm5,%%mm5\n" // expand to 32 bit: y1 y1 y0 y0 "movq %%mm5,%%mm6\n" // mm6: y1 y1 y0 y0 "punpcklwd %%mm5,%%mm5\n" // mm5: y0 y0 y0 y0 "punpckhwd %%mm6,%%mm6\n" // mm6: y1 y1 y1 y1 // RGB 0 "movq %%mm3,%%mm7\n" // mm7: cb1/g cb1/b cb0/g cb0/b "psllq $32,%%mm7\n" // shift left 32 bits: mm7: cb0/g cb0/b 0 0 "psrlq $16,%%mm7\n" // mm7 = 0 cb0/g cb0/b 0 "paddw %%mm7,%%mm5\n" // add: mm7: y+cb "movq %%mm4,%%mm7\n" // mm7 = cr1 cr1 cr0 cr0 "psllq $32,%%mm7\n" // shift left 32 bits: mm7: cr0/r cr0/g 0 0 "paddw %%mm7,%%mm5\n" // y+cb+cr r g b ? // RGB 1 "psrlq $32,%%mm4\n" // mm4: 0 0 cr1 cr1 "psllq $16,%%mm4\n" // mm4: 0 cr1 cr1 0 "paddw %%mm4,%%mm6\n" //y+cr "psrlq $32,%%mm3\n" // mm3: 0 0 cb1 cb1 "paddw %%mm3,%%mm6\n" //y+cr+cb: mm6 = r g b "packuswb %%mm6,%%mm5\n" //mm5 = ? r1 g1 b1 r0 g0 b0 ? "psrlq $8,%%mm5\n" // mm5: 0 ? r1 g1 b1 r0 g0 b0 "movq %%mm5,%0\n" // store mm5 // prepare for RGB 2 & 3 "punpckhwd %%mm0,%%mm0\n" //mm0 = y3 y3 y2 y2 "punpckhwd %%mm1,%%mm1\n" //mm1 = cb3 cb3 cb2 cb2 "punpckhwd %%mm2,%%mm2\n" //mm2 = cr3 cr3 cr2 cr2 "pmulhw %5,%%mm1\n" //mm1 = cb * ? "pmulhw %6,%%mm2\n" //mm2 = cr * ? "movq %%mm0,%%mm3\n" //mm3 = y3 y3 y2 y2 "punpcklwd %%mm3,%%mm3\n" //mm3 = y2 y2 y2 y2 "punpckhwd %%mm0,%%mm0\n" //mm0 = y3 y3 y3 y3 // RGB 2 "movq %%mm1,%%mm4\n" //mm4 = cb3 cb3 cb2 cb2 "movq %%mm2,%%mm5\n" //mm5 = cr3 cr3 cr2 cr2 "psllq $32,%%mm4\n" //mm4 = cb2 cb2 0 0 "psllq $32,%%mm5\n" //mm5 = cr2 cr2 0 0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -