📄 video_out_rgb.c
字号:
::: "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm1, %%mm2\n\t" /* mm1 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm3 = [0 r3 0 r2 0 r1 0 r0] */ "movq %%mm2, %%mm4\n\t" /* mm4 = [g3 b3 g2 b2 g1 b1 g0 b0] */ "punpcklwd %%mm0, %%mm4\n\t" /* mm4 = [0 r1 g1 b1 0 r0 g0 b0] */ "movd %%mm4, (%3)\n\t" "punpckhdq %%mm4, %%mm4\n\t" /* mm4 = [0 r1 g1 b1 0 r1 g1 b1] */ "movd %%mm4, 3(%3)\n\t" "punpckhwd %%mm0, %%mm2\n\t" /* mm2 = [0 r3 g3 b3 0 r2 g2 b2] */ "movd %%mm2, 6(%3)\n\t" "punpckhdq %%mm2, %%mm2\n\t" /* mm2 = [0 r3 g3 b3 0 r3 g3 b3] */ "movd %%mm2, 9(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $12, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { *(buffer) = *(b_data++); *(buffer + 1) = *(g_data++); *(buffer + 2) = *(r_data++); buffer += 3; }}static void__pack_rgb16(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ uint16_t* buffer = (uint16_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "prefetchw 320(%3)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm3, %%mm1\n\t" /* mm1 = [0 g3 0 g2 0 g1 0 g0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [0 b3 0 b2 0 b1 0 b0] */ "psrlw $3, %%mm0\n\t" /* word = [00000000 000rrrrr] */ "psrlw $2, %%mm1\n\t" /* word = [00000000 00gggggg] */ "psrlw $3, %%mm2\n\t" /* word = [00000000 000bbbbb] */ "psllw $11, %%mm0\n\t" /* word = [rrrrr000 00000000] */ "psllw $5, %%mm1\n\t" /* word = [00000ggg ggg00000] */ "por %%mm2, %%mm1\n\t" /* word = [00000ggg gggbbbbb] */ "por %%mm1, %%mm0\n\t" /* word = [rrrrrggg gggbbbbb] */ "movntq %%mm0, (%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $8, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm3, %%mm1\n\t" /* mm1 = [0 g3 0 g2 0 g1 0 g0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [0 b3 0 b2 0 b1 0 b0] */ "psrlw $3, %%mm0\n\t" /* word = [00000000 000rrrrr] */ "psrlw $2, %%mm1\n\t" /* word = [00000000 00gggggg] */ "psrlw $3, %%mm2\n\t" /* word = [00000000 000bbbbb] */ "psllw $11, %%mm0\n\t" /* word = [rrrrr000 00000000] */ "psllw $5, %%mm1\n\t" /* word = [00000ggg ggg00000] */ "por %%mm2, %%mm1\n\t" /* word = [00000ggg gggbbbbb] */ "por %%mm1, %%mm0\n\t" /* word = [rrrrrggg gggbbbbb] */ "movq %%mm0, (%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $8, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { uint16_t r16, g16, b16; r16 = *(r_data++) >> 3; g16 = *(g_data++) >> 2; b16 = *(b_data++) >> 3; *(buffer++) = ((r16 << 11) | (g16 << 5) | b16); }}static void__pack_bgra(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ static const uint32_t alpha[] = { 0xffffffff, 0xffffffff }; uint8_t* buffer = (uint8_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ while(n--) { __asm__ __volatile__( "prefetchw 320(%3)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "por (%4), %%mm3\n\t" /* mm3 = [ff ff ff ff ff ff ff ff] */ "punpcklbw %%mm0, %%mm3\n\t" /* mm3 = [r3 ff r2 ff r1 ff r0 ff] */ "punpcklbw %%mm2, %%mm1\n\t" /* mm1 = [b3 g3 b2 g2 b1 g1 b0 g0] */ "movq %%mm3, %%mm4\n\t" /* mm4 = [r3 ff r2 ff r1 ff r0 ff] */ "punpcklwd %%mm1, %%mm4\n\t" /* mm4 = [b1 g1 r1 ff b1 g1 r1 ff] */ "movntq %%mm4, (%3)\n\t" "punpckhwd %%mm1, %%mm3\n\t" /* mm3 = [b3 g3 r3 ff b2 g2 r2 ff] */ "movntq %%mm0, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "r" (alpha), "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "por (%4), %%mm3\n\t" /* mm3 = [ff ff ff ff ff ff ff ff] */ "punpcklbw %%mm0, %%mm3\n\t" /* mm3 = [r3 ff r2 ff r1 ff r0 ff] */ "punpcklbw %%mm2, %%mm1\n\t" /* mm1 = [b3 g3 b2 g2 b1 g1 b0 g0] */ "movq %%mm3, %%mm4\n\t" /* mm4 = [r3 ff r2 ff r1 ff r0 ff] */ "punpcklwd %%mm1, %%mm4\n\t" /* mm4 = [b1 g1 r1 ff b1 g1 r1 ff] */ "movq %%mm4, (%3)\n\t" "punpckhwd %%mm1, %%mm3\n\t" /* mm3 = [b3 g3 r3 ff b2 g2 r2 ff] */ "movq %%mm0, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "r" (alpha), "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { *(buffer) = 0xff; *(buffer + 1) = *(r_data++); *(buffer + 2) = *(g_data++); *(buffer + 3) = *(b_data++); buffer += 4; }}static void__pack_bgra5551(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ static const uint16_t alpha[] = { 1, 1, 1, 1 }; uint16_t* buffer = (uint16_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "prefetchw 320(%3)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm3, %%mm1\n\t" /* mm1 = [0 g3 0 g2 0 g1 0 g0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [0 b3 0 b2 0 b1 0 b0] */ "psrlw $2, %%mm0\n\t" /* word = [00000000 00rrrrrr] */ "psrlw $3, %%mm1\n\t" /* word = [00000000 000ggggg] */ "psrlw $3, %%mm2\n\t" /* word = [00000000 000bbbbb] */ "psllw $11, %%mm2\n\t" /* word = [bbbbb000 00000000] */ "psllw $6, %%mm1\n\t" /* word = [00000ggg gg000000] */ "por (%4), %%mm0\n\t" /* word = [00000000 00rrrrra] */ "por %%mm2, %%mm1\n\t" /* word = [bbbbbggg gg000000] */ "por %%mm1, %%mm0\n\t" /* word = [bbbbbggg ggrrrrra] */ "movntq %%mm0, (%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $8, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "r" (alpha), "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm3, %%mm0\n\t" /* mm0 = [0 r3 0 r2 0 r1 0 r0] */ "punpcklbw %%mm3, %%mm1\n\t" /* mm1 = [0 g3 0 g2 0 g1 0 g0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [0 b3 0 b2 0 b1 0 b0] */ "psrlw $2, %%mm0\n\t" /* word = [00000000 00rrrrrr] */ "psrlw $3, %%mm1\n\t" /* word = [00000000 000ggggg] */ "psrlw $3, %%mm2\n\t" /* word = [00000000 000bbbbb] */ "psllw $11, %%mm2\n\t" /* word = [bbbbb000 00000000] */ "psllw $6, %%mm1\n\t" /* word = [00000ggg gg000000] */ "por (%4), %%mm0\n\t" /* word = [00000000 00rrrrra] */ "por %%mm2, %%mm1\n\t" /* word = [bbbbbggg gg000000] */ "por %%mm1, %%mm0\n\t" /* word = [bbbbbggg ggrrrrra] */ "movq %%mm0, (%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $8, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "r" (alpha), "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("emms\n\t"); }#endif while(rest--) { uint16_t r5, g5, b5; r5 = *(r_data++) >> 2; g5 = *(g_data++) >> 3; b5 = *(b_data++) >> 3; *(buffer++) = ((b5 << 11) | (g5 << 6) | r5 | 1); }}static void__pack_bgr32(rgb_planar_t* data, void* dest, uint32_t pixels, uint32_t accel){ uint8_t* buffer = (uint8_t*) dest; uint8_t* r_data = data->r; uint8_t* g_data = data->g; uint8_t* b_data = data->b; uint32_t rest = pixels;#ifdef COMPILE_ASSEMBLY if((accel & MM_MMXEXT) == MM_MMXEXT) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "prefetchw 320(%0)\n\t" "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm1, %%mm0\n\t" /* mm0 = [g3 r3 g2 r2 g1 r1 g0 r0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [b3 0 b2 0 b1 0 b0 0] */ "movq %%mm0, %%mm4\n\t" /* mm4 = [g3 r3 g2 r2 g1 r1 g0 r0] */ "punpcklwd %%mm2, %%mm4\n\t" /* mm4 = [0 b1 g1 r1 0 b0 g0 r0] */ "movntq %%mm4, (%3)\n\t" "punpckhwd %%mm2, %%mm0\n\t" /* mm0 = [0 b3 g3 r3 0 b2 g2 r2] */ "movntq %%mm0, 8(%3)\n\t" "addl $4, %0\n\t" "addl $4, %1\n\t" "addl $4, %2\n\t" "addl $16, %3\n\t" : "=&r" (r_data), "=&r" (g_data), "=&r" (b_data), "=&r" (buffer) : "0" (r_data), "1" (g_data), "2" (b_data), "3" (buffer) : "memory"); } __asm__ __volatile__("sfence; emms\n\t"); } else if((accel & MM_MMX) == MM_MMX) { uint32_t n = pixels >> 2; /* (width * height) / 4 */ rest = pixels & 3; /* pixels - (n * 4) */ __asm__ __volatile__( "pxor %%mm3, %%mm3\n\t" ::: "memory"); while(n--) { __asm__ __volatile__( "movd (%0), %%mm0\n\t" /* mm0 = [0 0 0 0 r3 r2 r1 r0] */ "movd (%1), %%mm1\n\t" /* mm1 = [0 0 0 0 g3 g2 g1 g0] */ "movd (%2), %%mm2\n\t" /* mm2 = [0 0 0 0 b3 b2 b1 b0] */ "punpcklbw %%mm1, %%mm0\n\t" /* mm0 = [g3 r3 g2 r2 g1 r1 g0 r0] */ "punpcklbw %%mm3, %%mm2\n\t" /* mm2 = [b3 0 b2 0 b1 0 b0 0] */ "movq %%mm0, %%mm4\n\t" /* mm4 = [g3 r3 g2 r2 g1 r1 g0 r0] */ "punpcklwd %%mm2, %%mm4\n\t" /* mm4 = [0 b1 g1 r1 0 b0 g0 r0] */ "movq %%mm4, (%3)\n\t"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -