📄 i420_rgb16.c
字号:
p_pic_start = p_pic; for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; ) { SSE2_CALL ( SSE2_INIT_16_ALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_15_ALIGNED ); p_y += 16; p_u += 8; p_v += 8; p_buffer += 16; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; SSE2_CALL ( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_15_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; } SCALE_WIDTH; SCALE_HEIGHT( 420, 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } p_buffer = b_hscale ? p_buffer_start : p_pic; } } else { /* use slower SSE2 unaligned fetch and store */ for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; ) { SSE2_CALL ( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_15_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; p_buffer += 16; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; SSE2_CALL ( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_15_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; } SCALE_WIDTH; SCALE_HEIGHT( 420, 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } p_buffer = b_hscale ? p_buffer_start : p_pic; } } /* make sure all SSE2 stores are visible thereafter */ SSE2_END;#else // defined (MODULE_NAME_IS_i420_rgb_mmx) if( p_filter->fmt_in.video.i_width & 7 ) { i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 ); } else { i_rewind = 0; } for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; ) { MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_15 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_15 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } SCALE_WIDTH; SCALE_HEIGHT( 420, 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } } /* re-enable FPU registers */ MMX_END;#endif}void I420_R5G6B5( filter_t *p_filter, picture_t *p_src, picture_t *p_dest ){ /* We got this one from the old arguments */ uint16_t *p_pic = (uint16_t*)p_dest->p->p_pixels; uint8_t *p_y = p_src->Y_PIXELS; uint8_t *p_u = p_src->U_PIXELS; uint8_t *p_v = p_src->V_PIXELS; bool b_hscale; /* horizontal scaling type */ unsigned int i_vscale; /* vertical scaling type */ unsigned int i_x, i_y; /* horizontal and vertical indexes */ int i_right_margin; int i_rewind; int i_scale_count; /* scale modulo counter */ int i_chroma_width = p_filter->fmt_in.video.i_width / 2; /* chroma width */ uint16_t * p_pic_start; /* beginning of the current line for copy */ /* Conversion buffer pointer */ uint16_t * p_buffer_start = (uint16_t*)p_filter->p_sys->p_buffer; uint16_t * p_buffer; /* Offset array pointer */ int * p_offset_start = p_filter->p_sys->p_offset; int * p_offset; const int i_source_margin = p_src->p[0].i_pitch - p_src->p[0].i_visible_pitch; const int i_source_margin_c = p_src->p[1].i_pitch - p_src->p[1].i_visible_pitch; i_right_margin = p_dest->p->i_pitch - p_dest->p->i_visible_pitch; /* Rule: when a picture of size (x1,y1) with aspect ratio r1 is rendered * on a picture of size (x2,y2) with aspect ratio r2, if x1 grows to x1' * then y1 grows to y1' = x1' * y2/x2 * r2/r1 */ SetOffset( p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height, p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height, &b_hscale, &i_vscale, p_offset_start ); /* * Perform conversion */ i_scale_count = ( i_vscale == 1 ) ? p_filter->fmt_out.video.i_height : p_filter->fmt_in.video.i_height;#if defined (MODULE_NAME_IS_i420_rgb_sse2) if( p_filter->fmt_in.video.i_width & 15 ) { i_rewind = 16 - ( p_filter->fmt_in.video.i_width & 15 ); } else { i_rewind = 0; } /* ** SSE2 128 bits fetch/store instructions are faster ** if memory access is 16 bytes aligned */ p_buffer = b_hscale ? p_buffer_start : p_pic; if( 0 == (15 & (p_src->p[Y_PLANE].i_pitch| p_dest->p->i_pitch| ((intptr_t)p_y)| ((intptr_t)p_buffer))) ) { /* use faster SSE2 aligned fetch and store */ for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ ) { p_pic_start = p_pic; for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; ) { SSE2_CALL ( SSE2_INIT_16_ALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_16_ALIGNED ); p_y += 16; p_u += 8; p_v += 8; p_buffer += 16; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; SSE2_CALL ( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_16_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; } SCALE_WIDTH; SCALE_HEIGHT( 420, 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } p_buffer = b_hscale ? p_buffer_start : p_pic; } } else { /* use slower SSE2 unaligned fetch and store */ for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = p_filter->fmt_in.video.i_width/16; i_x--; ) { SSE2_CALL( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_16_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; p_buffer += 16; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; SSE2_CALL( SSE2_INIT_16_UNALIGNED SSE2_YUV_MUL SSE2_YUV_ADD SSE2_UNPACK_16_UNALIGNED ); p_y += 16; p_u += 8; p_v += 8; } SCALE_WIDTH; SCALE_HEIGHT( 420, 2 ); p_y += i_source_margin; if( i_y % 2 ) { p_u += i_source_margin_c; p_v += i_source_margin_c; } p_buffer = b_hscale ? p_buffer_start : p_pic; } } /* make sure all SSE2 stores are visible thereafter */ SSE2_END;#else // defined (MODULE_NAME_IS_i420_rgb_mmx) if( p_filter->fmt_in.video.i_width & 7 ) { i_rewind = 8 - ( p_filter->fmt_in.video.i_width & 7 ); } else { i_rewind = 0; } for( i_y = 0; i_y < p_filter->fmt_in.video.i_height; i_y++ ) { p_pic_start = p_pic; p_buffer = b_hscale ? p_buffer_start : p_pic; for ( i_x = p_filter->fmt_in.video.i_width / 8; i_x--; ) { MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_16 ); p_y += 8; p_u += 4; p_v += 4; p_buffer += 8; } /* Here we do some unaligned reads and duplicate conversions, but * at least we have all the pixels */ if( i_rewind ) { p_y -= i_rewind; p_u -= i_rewind >> 1; p_v -= i_rewind >> 1; p_buffer -= i_rewind; MMX_CALL ( MMX_INIT_16 MMX_YUV_MUL MMX_YUV_ADD MMX_UNPACK_16 ); p_y += 8;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -