📄 overlay.c
字号:
{ (16 << 12) / 16, 2, 0, 0 }, { (16 << 12) / 12, 2, 0, 1 }, // mode 4, 1, 0 (as used by YUV12) is impossible { (16 << 12) / 8, 4, 1, 1 }, { (16 << 12) / 6, 4, 1, 2 }, { (16 << 12) / 4, 4, 2, 2 }, { (16 << 12) / 3, 4, 2, 3 }, { (16 << 12) / 2, 4, 3, 3 }, { (16 << 12) / 1, 4, 4, 4 }};static hscale_factor scale_YUV12[] = { { (16 << 12) / 16, 2, 0, 0 }, { (16 << 12) / 12, 4, 1, 0 }, { (16 << 12) / 12, 2, 0, 1 }, { (16 << 12) / 8, 4, 1, 1 }, { (16 << 12) / 6, 4, 1, 2 }, { (16 << 12) / 4, 4, 2, 2 }, { (16 << 12) / 3, 4, 2, 3 }, { (16 << 12) / 2, 4, 3, 3 }, { (int)((16 << 12) / 1.5), 4, 3, 4 }, { (int)((16 << 12) / 1.0), 4, 4, 4 }, { (int)((16 << 12) / 0.75), 4, 4, 5 }, { (int)((16 << 12) / 0.5), 4, 5, 5 }};#define min3( a, b, c ) (min( (a), min( (b), (c) )))static hscale_factor scale_YUV9[] = { { min3( (16 << 12) / 12, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 2, 0, 0 }, { min3( (16 << 12) / 8, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 4, 1, 0 }, { min3( (16 << 12) / 10, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 2, 0, 1 }, { min3( (16 << 12) / 6, (3 << 12) * 1, (2 << 12) * 4 * 1 ), 4, 1, 1 }, { min3( (16 << 12) / 5, (3 << 12) * 1, (2 << 12) * 4 * 2 ), 4, 1, 2 }, { min3( (16 << 12) / 3, (3 << 12) * 2, (2 << 12) * 4 * 2 ), 4, 2, 2 }, { min3( (int)((16 << 12) / 2.5), (3 << 12) * 1, (2 << 12) * 4 * 4 ), 4, 2, 3 }, // probably, it should be (3 << 12) * 2 { min3( (int)((16 << 12) / 1.5), (3 << 12) * 4, (2 << 12) * 4 * 4 ), 4, 3, 3 }, { min3( (int)((16 << 12) / 0.75), (3 << 12) * 8, (2 << 12) * 4 * 8 ), 4, 4, 4 }, { min3( (int)((16 << 12) / 0.625), (3 << 12) * 8, (2 << 12) * 4 * 16 ), 4, 4, 5 }, { min3( (int)((16 << 12) / 0.375), (3 << 12) * 16, (2 << 12) * 4 * 16 ), 4, 5, 5 }};// parameters of an overlay colour spacetypedef struct { uint8 bpp_shift; // log2( bytes per pixel (main plain) ) uint8 bpuv_shift; // log2( bytes per pixel (uv-plane) ); // if there is one plane only: bpp=bpuv uint8 num_planes; // number of planes uint8 h_uv_sub_sample_shift; // log2( horizontal pixels per uv pair ) uint8 v_uv_sub_sample_shift; // log2( vertical pixels per uv pair ) hscale_factor *factors; // scaling/filter table uint8 num_factors;} space_params;static space_params space_params_table[16] = { { 0, 0, 0, 0, 0, NULL, 0 }, // reserved { 0, 0, 0, 0, 0, NULL, 0 }, // reserved { 0, 0, 0, 0, 0, NULL, 0 }, // reserved { 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) }, // RGB15 { 1, 1, 1, 0, 0, scale_RGB16, count_of( scale_RGB16 ) }, // RGB16 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved { 2, 2, 1, 0, 0, scale_RGB32, count_of( scale_RGB32 ) }, // RGB32 { 0, 0, 0, 0, 0, NULL, 0 }, // reserved { 0, 0, 0, 0, 0, NULL, 0 }, // reserved { 0, 0, 3, 2, 2, scale_YUV9, count_of( scale_YUV9 ) }, // YUV9 { 0, 0, 3, 1, 1, scale_YUV12, count_of( scale_YUV12 ) }, // YUV12, three-plane { 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) }, // VYUY422 { 1, 1, 1, 1, 0, scale_YUV, count_of( scale_YUV ) }, // YVYU422 { 0, 1, 2, 1, 1, scale_YUV12, count_of( scale_YUV12 ) }, // YUV12, two-plane { 0, 1, 2, 1, 1, NULL, 0 }, // ??? { 0, 0, 0, 0, 0, NULL, 0 } // reserved};// get appropriate scaling/filter parametersstatic hscale_factor *getHScaleFactor( space_params *params, uint32 src_left, uint32 src_right, uint32 *h_inc ){ uint words_per_p1_line, words_per_p23_line, max_words_per_line; bool p1_4tap_allowed, p23_4tap_allowed; uint i; uint num_factors; hscale_factor *factors; SHOW_FLOW0( 3, "" ); // check whether fifo is large enough to feed vertical 4-tap-filter words_per_p1_line = ceilShiftDiv( (src_right - 1) << params->bpp_shift, 4 ) - ((src_left << params->bpp_shift) >> 4) + 1; words_per_p23_line = ceilShiftDiv( (src_right - 1) << params->bpuv_shift, 4 ) - ((src_left << params->bpuv_shift) >> 4) + 1; // overlay buffer for one line; this value is probably // higher on newer Radeons (or smaller on older Radeons?) max_words_per_line = 96; switch( params->num_planes ) { case 3: p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2; p23_4tap_allowed = words_per_p23_line < max_words_per_line / 4; break; case 2: p1_4tap_allowed = words_per_p1_line < max_words_per_line / 2; p23_4tap_allowed = words_per_p23_line < max_words_per_line / 2; break; case 1: default: p1_4tap_allowed = p23_4tap_allowed = words_per_p1_line < max_words_per_line; break; } SHOW_FLOW( 3, "p1_4tap_allowed=%d, p23_4t_allowed=%d", (int)p1_4tap_allowed, (int)p23_4tap_allowed ); // search for proper scaling/filter entry factors = params->factors; num_factors = params->num_factors; if( factors == NULL || num_factors == 0 ) return NULL; for( i = 0; i < num_factors; ++i, ++factors ) { if( *h_inc <= factors->max_scale && (factors->p1_step_by > 0 || p1_4tap_allowed) && (factors->p23_step_by > 0 || p23_4tap_allowed)) break; } if( i == num_factors ) { // overlay is asked to be scaled down more than allowed, // so use least scaling factor supported --factors; *h_inc = factors->max_scale; } SHOW_FLOW( 3, "group_size=%d, p1_step_by=%d, p23_step_by=%d", factors->group_size, factors->p1_step_by, factors->p23_step_by ); return factors;} #define I2FF( a, shift ) ((uint32)((a) * (1 << (shift))))// show overlay on screenstatic status_t Radeon_ShowOverlay( accelerator_info *ai, int crtc_idx ){ virtual_card *vc = ai->vc; shared_info *si = ai->si; vuint8 *regs = ai->regs; overlay_info *overlay = &si->pending_overlay; overlay_buffer_node *node = overlay->on; crtc_info *crtc = &si->crtc[crtc_idx]; uint32 ecp_div; uint32 v_inc, h_inc; uint32 src_v_inc, src_h_inc; uint32 src_left, src_top, src_right, src_bottom; int32 dest_left, dest_top, dest_right, dest_bottom; uint32 offset; uint32 tmp; uint32 p1_h_accum_init, p23_h_accum_init, p1_v_accum_init, p23_v_accum_init; uint32 p1_active_lines, p23_active_lines; hscale_factor *factors; space_params *params; uint32 p1_h_inc, p23_h_inc; uint32 p1_x_start, p1_x_end; uint32 p23_x_start, p23_x_end; /*uint32 buffer[20*2]; uint idx = 0;*/ SHOW_FLOW0( 0, "" ); Radeon_SetColourKey( ai, &overlay->ow ); // overlay unit can only handle up to 175 MHz; if pixel clock is higher, // only every second pixel is handled // (this devider is gets written into PLL by InitOverlay, // so we don't need to do it ourself) if( crtc->mode.timing.pixel_clock < 175000 ) ecp_div = 0; else ecp_div = 1; // scaling is independant of clipping, get this first { uint32 src_width, src_height; src_width = overlay->ov.width; src_height = overlay->ov.height; // this is for graphics card v_inc = (src_height << 20) / overlay->ow.height; h_inc = (src_width << (12 + ecp_div)) / overlay->ow.width; // this is for us src_v_inc = (src_height << 16) / overlay->ow.height; src_h_inc = (src_width << 16) / overlay->ow.width; } // calculate unclipped position/size // TBD: I assume that overlay_window.offset_xyz is only a hint where // no overlay is visible; another interpretation were to zoom // the overlay so it fits into remaining space src_left = (overlay->ov.h_start << 16) + overlay->ow.offset_left * src_h_inc; src_top = (overlay->ov.v_start << 16) + overlay->ow.offset_top * src_v_inc; src_right = ((overlay->ov.h_start + overlay->ov.width) << 16) - overlay->ow.offset_right * src_h_inc; src_bottom = ((overlay->ov.v_start + overlay->ov.height) << 16) - overlay->ow.offset_top * src_v_inc; dest_left = overlay->ow.h_start + overlay->ow.offset_left; dest_top = overlay->ow.v_start + overlay->ow.offset_top; dest_right = overlay->ow.h_start + overlay->ow.width - overlay->ow.offset_right; dest_bottom = overlay->ow.v_start + overlay->ow.height - overlay->ow.offset_bottom; SHOW_FLOW( 3, "ow: h=%d, v=%d, width=%d, height=%d", overlay->ow.h_start, overlay->ow.v_start, overlay->ow.width, overlay->ow.height ); SHOW_FLOW( 3, "offset_left=%d, offset_right=%d, offset_top=%d, offset_bottom=%d", overlay->ow.offset_left, overlay->ow.offset_right, overlay->ow.offset_top, overlay->ow.offset_bottom ); // apply virtual screen dest_left -= vc->mode.h_display_start + crtc->rel_x; dest_top -= vc->mode.v_display_start + crtc->rel_y; dest_right -= vc->mode.h_display_start + crtc->rel_x; dest_bottom -= vc->mode.v_display_start + crtc->rel_y; // clip to visible area if( dest_left < 0 ) { src_left += -dest_left * src_h_inc; dest_left = 0; } if( dest_top < 0 ) { src_top += -dest_top * src_v_inc; dest_top = 0; } SHOW_FLOW( 3, "mode: w=%d, h=%d", crtc->mode.timing.h_display, crtc->mode.timing.v_display ); if( dest_right > crtc->mode.timing.h_display ) dest_right = crtc->mode.timing.h_display; if( dest_bottom > crtc->mode.timing.v_display ) dest_bottom = crtc->mode.timing.v_display; SHOW_FLOW( 3, "src=(%d, %d, %d, %d)", src_left, src_top, src_right, src_bottom ); SHOW_FLOW( 3, "dest=(%d, %d, %d, %d)", dest_left, dest_top, dest_right, dest_bottom ); // especially with multi-screen modes the overlay may not be on screen at all if( dest_left >= dest_right || dest_top >= dest_bottom || src_left >= src_right || src_top >= src_bottom ) { Radeon_TempHideOverlay( ai ); goto done; } // let's calculate all those nice register values SHOW_FLOW( 3, "ati_space=%d", node->ati_space ); params = &space_params_table[node->ati_space]; // choose proper scaler { factors = getHScaleFactor( params, src_left >> 16, src_right >> 16, &h_inc ); if( factors == NULL ) return B_ERROR; p1_h_inc = factors->p1_step_by > 0 ? h_inc >> (factors->p1_step_by - 1) : h_inc; p23_h_inc = (factors->p23_step_by > 0 ? h_inc >> (factors->p23_step_by - 1) : h_inc) >> params->h_uv_sub_sample_shift; SHOW_FLOW( 3, "p1_h_inc=%x, p23_h_inc=%x", p1_h_inc, p23_h_inc ); } // get register value for start/end position of overlay image (pixel-precise only) { uint32 p1_step_size, p23_step_size; uint32 p1_left, p1_right, p1_width; uint32 p23_left, p23_right, p23_width; p1_left = src_left >> 16; p1_right = src_right >> 16; p1_width = p1_right - p1_left; p1_step_size = factors->p1_step_by > 0 ? (1 << (factors->p1_step_by - 1)) : 1; p1_x_start = p1_left % (16 >> params->bpp_shift); p1_x_end = ((p1_x_start + p1_width - 1) / p1_step_size) * p1_step_size; SHOW_FLOW( 3, "p1_x_start=%d, p1_x_end=%d", p1_x_start, p1_x_end ); p23_left = (src_left >> 16) >> params->h_uv_sub_sample_shift; p23_right = (src_right >> 16) >> params->h_uv_sub_sample_shift; p23_width = p23_right - p23_left; p23_step_size = factors->p23_step_by > 0 ? (1 << (factors->p23_step_by - 1)) : 1; // if resolution of Y and U/V differs but YUV are stored in one // plane then UV alignment depends on Y data, therefore the hack // (you are welcome to replace this with some cleaner code ;) p23_x_start = p23_left % ((16 >> params->bpuv_shift) / (node->ati_space == 11 || node->ati_space == 12 ? 2 : 1)); p23_x_end = (int)((p23_x_start + p23_width - 1) / p23_step_size) * p23_step_size; SHOW_FLOW( 3, "p23_x_start=%d, p23_x_end=%d", p23_x_start, p23_x_end ); // get memory location of first word to be read by scaler // (save relative offset for fast update) si->active_overlay.rel_offset = (src_top >> 16) * node->buffer.bytes_per_row + ((p1_left << params->bpp_shift) & ~0xf); offset = node->mem_offset + si->active_overlay.rel_offset; SHOW_FLOW( 3, "rel_offset=%x", si->active_overlay.rel_offset ); } // get active lines for scaler // (we could add additional blank lines for DVD letter box mode, // but this is not supported by API; additionally, this only makes // sense if want to put subtitles onto the black border, which is // supported neither) { uint16 int_top, int_bottom; int_top = src_top >> 16; int_bottom = (src_bottom >> 16); p1_active_lines = int_bottom - int_top - 1; p23_active_lines = ceilShiftDiv( int_bottom - 1, params->v_uv_sub_sample_shift ) - (int_top >> params->v_uv_sub_sample_shift); SHOW_FLOW( 3, "p1_active_lines=%d, p23_active_lines=%d", p1_active_lines, p23_active_lines ); } // if picture is stretched for flat panel, we need to scale all // vertical values accordingly // TBD: there is no description at all concerning this, so v_accum_init may // need to be initialized based on original value { if( (crtc->active_displays & (dd_lvds | dd_dvi)) != 0 ) { uint64 v_ratio; // convert 32.32 format to 16.16 format; else we // cannot multiply two fixed point values without // overflow v_ratio = si->flatpanels[crtc->flatpanel_port].v_ratio >> (FIX_SHIFT - 16); v_inc = (v_inc * v_ratio) >> 16; } SHOW_FLOW( 3, "v_inc=%x", v_inc );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -