📄 video_out_rgb.c
字号:
static void__dummy_convert_yv12(uint8_t* yuv[], rgb_planar_t* rgb, uint32_t pitches[], uint32_t width, uint32_t height){ uint8_t* y_data = yuv[0]; uint8_t* u_data = yuv[1]; uint8_t* v_data = yuv[2]; uint8_t* r_buffer = rgb->r; uint8_t* g_buffer = rgb->g; uint8_t* b_buffer = rgb->b; uint32_t line_size = width >> 1; /* 2 pixels at once */ uint32_t samples = line_size * (height >> 1); while(samples--) { int y, u, v; int m1, m2, m3, m4; int r, g, b; u = *(u_data++) - 128; v = *(v_data++) - 128; m1 = v * dw_factors[0]; m2 = v * dw_factors[1]; m3 = u * dw_factors[2]; m4 = u * dw_factors[3]; y = *(y_data) << 16; r = (int) (y + m1) >> 16; g = (int) (y - m2 - m3) >> 16; b = (int) (y + m4) >> 16; *(r_buffer) = range(r); *(g_buffer) = range(g); *(b_buffer) = range(b); y = *(y_data + 1) << 16; r = (int) (y + m1) >> 16; g = (int) (y - m2 - m3) >> 16; b = (int) (y + m4) >> 16; *(r_buffer + 1) = range(r); *(g_buffer + 1) = range(g); *(b_buffer + 1) = range(b); y = *(y_data + pitches[0]) << 16; r = (int) (y + m1) >> 16; g = (int) (y - m2 - m3) >> 16; b = (int) (y + m4) >> 16; *(r_buffer + width) = range(r); *(g_buffer + width) = range(g); *(b_buffer + width) = range(b); y = *(y_data + pitches[0] + 1) << 16; r = (int) (y + m1) >> 16; g = (int) (y - m2 - m3) >> 16; b = (int) (y + m4) >> 16; *(r_buffer + width + 1) = range(r); *(g_buffer + width + 1) = range(g); *(b_buffer + width + 1) = range(b); y_data += 2; r_buffer += 2; g_buffer += 2; b_buffer += 2; if(!(--line_size)) { line_size = width >> 1; y_data += pitches[0]; r_buffer += width; g_buffer += width; b_buffer += width; } }}static const rgbout_converter_t convert_methods[] ={/* <name> <accel> <convert yuy2> <convert yv12> */ { NULL, 0, __dummy_convert_yuy2, __dummy_convert_yv12},#ifdef COMPILE_ASSEMBLY { "MMX", MM_MMX, __mmx_convert_yuy2, __mmx_convert_yv12}, { "SSE", MM_SSE, __sse_convert_yuy2, __sse_convert_yv12}, {"3DNow!", MM_3DNOW, __3dnow_convert_yuy2, __3dnow_convert_yv12}#endif/* currently 3DNow is the best function, therefore it's preferred on AMD cpus */};static void__pack_argb(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ static const uint32_t alpha[] = { 0xffffffff, 0xffffffff }; uint8_t* buffer = (uint8_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "por (%0), %%mm3\n\t" :: "r" (alpha) : "memory"); while(n--) { __asm__ __volatile__( "prefetchw 320(%3)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [ff r3 ff r2 ff r1 ff r0] */ "punpcklbw %%mm1, %%mm2\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "movq %%mm2, %%mm4\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "punpcklwd %%mm0, %%mm4\n\t" /* mm4 = [ff r1 g1 b1 ff r0 g0 b0] */ "movntq %%mm4, (%3)\n\t" "punpckhwd %%mm0, %%mm2\n\t" /* mm2 = [ff r3 g3 b3 ff r2 g2 b2] */ "movntq %%mm2, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "por (%0), %%mm3\n\t" :: "r" (alpha) : "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [ff r3 ff r2 ff r1 ff r0] */ "punpcklbw %%mm1, %%mm2\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "movq %%mm2, %%mm4\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "punpcklwd %%mm0, %%mm4\n\t" /* mm4 = [ff r1 g1 b1 ff r0 g0 b0] */ "movq %%mm4, (%3)\n\t" "punpckhwd %%mm0, %%mm2\n\t" /* mm2 = [ff r3 g3 b3 ff r2 g2 b2] */ "movq %%mm2, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { *(buffer) = *(b_data++); *(buffer + 1) = *(g_data++); *(buffer + 2) = *(r_data++); *(buffer + 3) = 0xff; buffer += 4; }}static void__pack_argb1555(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ static const uint16_t alpha[] = { 0x8000, 0x8000, 0x8000, 0x8000 }; uint16_t* buffer = (uint16_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; int32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" "movq (%0), %%mm4\n\t" :: "r" (alpha) : "memory"); while(n--) { __asm__ __volatile__( "prefetchw 320(%3)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm3, %%mm1\n\t" /* mm1 = [0 g3 0 g2 0 g1 0 g0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [0 b3 0 b2 0 b1 0 b0] */ "psrlw $3, %%mm0\n\t" /* word = [00000000 000rrrrr] */ "psrlw $3, %%mm1\n\t" /* word = [00000000 000ggggg] */ "psrlw $3, %%mm2\n\t" /* word = [00000000 000bbbbb] */ "psllw $10, %%mm0\n\t" /* word = [0rrrrr00 00000000] */ "psllw $5, %%mm1\n\t" /* word = [000000gg ggg00000] */ "por %%mm2, %%mm1\n\t" /* word = [000000gg gggbbbbb] */ "por %%mm1, %%mm0\n\t" /* word = [0rrrrrgg gggbbbbb] */ "por %%mm4, %%mm0\n\t" /* word = [1rrrrrgg gggbbbbb] */ "movntq %%mm0, (%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $8, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" "movq (%0), %%mm4\n\t" :: "r" (alpha) : "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm3, %%mm1\n\t" /* mm1 = [0 g3 0 g2 0 g1 0 g0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [0 b3 0 b2 0 b1 0 b0] */ "psrlw $3, %%mm0\n\t" /* word = [00000000 000rrrrr] */ "psrlw $3, %%mm1\n\t" /* word = [00000000 000ggggg] */ "psrlw $3, %%mm2\n\t" /* word = [00000000 000bbbbb] */ "psllw $10, %%mm0\n\t" /* word = [0rrrrr00 00000000] */ "psllw $5, %%mm1\n\t" /* word = [000000gg ggg00000] */ "por %%mm2, %%mm1\n\t" /* word = [000000gg gggbbbbb] */ "por %%mm1, %%mm0\n\t" /* word = [0rrrrrgg gggbbbbb] */ "por %%mm4, %%mm0\n\t" /* word = [arrrrrgg gggbbbbb] */ "movq %%mm0, (%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $8, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { uint16_t r5, g5, b5; r5 = *(r_data++) >> 3; g5 = *(g_data++) >> 3; b5 = *(b_data++) >> 3; *(buffer++) = (alpha[0] | (r5 << 10) | (g5 << 5) | b5); }}static void__pack_rgb32(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ uint8_t* buffer = (uint8_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "prefetchw 320(%0)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm1, %%mm2\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "movq %%mm2, %%mm4\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "punpcklwd %%mm0, %%mm4\n\t" /* mm4 = [0 r1 g1 b1 0 r0 g0 b0] */ "movntq %%mm4, (%3)\n\t" "punpckhwd %%mm0, %%mm2\n\t" /* mm2 = [0 r3 g3 b3 0 r2 g2 b2] */ "movntq %%mm2, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm1, %%mm2\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "movq %%mm2, %%mm4\n\t" /* mm2 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "punpcklwd %%mm0, %%mm4\n\t" /* mm4 = [0 r1 g1 b1 0 r0 g0 b0] */ "movq %%mm4, (%3)\n\t" "punpckhwd %%mm0, %%mm2\n\t" /* mm2 = [0 r3 g3 b3 0 r2 g2 b2] */ "movq %%mm2, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { *(buffer) = *(b_data++); *(buffer + 1) = *(g_data++); *(buffer + 2) = *(r_data++); buffer += 4; }}static void__pack_rgb24(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ uint8_t* buffer = (uint8_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;/* MMXEXT doesn't speed up here */#ifdef COMPILE_ASSEMBLY if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -