📄 r128_accel.c
字号:
x11perf -seg1 2960000.0/sec 2990000.0/sec x11perf -copyplane100 4400.0/sec 6700.0/sec x11perf -putimagexy100 138.0/sec 191.0/sec*/static void R128SetupForScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, int fg, int bg, int rop, unsigned int planemask){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; R128WaitForFifo(pScrn, 4);#if X_BYTE_ORDER == X_LITTLE_ENDIAN OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl | R128_GMC_DST_CLIPPING | R128_GMC_BRUSH_NONE | (bg == -1 ? R128_GMC_SRC_DATATYPE_MONO_FG_LA : R128_GMC_SRC_DATATYPE_MONO_FG_BG) | R128_ROP[rop].rop | R128_GMC_BYTE_LSB_TO_MSB | R128_DP_SRC_SOURCE_HOST_DATA));#else /* X_BYTE_ORDER == X_BIG_ENDIAN */ OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl | R128_GMC_DST_CLIPPING | R128_GMC_BRUSH_NONE | (bg == -1 ? R128_GMC_SRC_DATATYPE_MONO_FG_LA : R128_GMC_SRC_DATATYPE_MONO_FG_BG) | R128_ROP[rop].rop | R128_DP_SRC_SOURCE_HOST_DATA));#endif OUTREG(R128_DP_WRITE_MASK, planemask); OUTREG(R128_DP_SRC_FRGD_CLR, fg); OUTREG(R128_DP_SRC_BKGD_CLR, bg);}/* Subsequent XAA indirect CPU-to-screen color expansion. This is only called once for each rectangle. */static void R128SubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, int x, int y, int w, int h, int skipleft){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; int x1clip = x+skipleft; int x2clip = x+w; info->scanline_h = h; info->scanline_words = (w + 31) >> 5;#if 0 /* Seems as though the Rage128's doesn't like blitting directly * as we must be overwriting something too quickly, therefore we * render to the buffer first and then blit */ if ((info->scanline_words * h) <= 9) { /* Turn on direct for less than 9 dword colour expansion */ info->scratch_buffer[0] = (unsigned char *)(ADDRREG(R128_HOST_DATA_LAST) - (info->scanline_words - 1)); info->scanline_direct = 1; } else#endif { /* Use indirect for anything else */ info->scratch_buffer[0] = info->scratch_save; info->scanline_direct = 0; } if (pScrn->bitsPerPixel == 24) { x1clip *= 3; x2clip *= 3; } R128WaitForFifo(pScrn, 4 + (info->scanline_direct ? (info->scanline_words * h) : 0) ); OUTREG(R128_SC_TOP_LEFT, (y << 16) | (x1clip & 0xffff)); OUTREG(R128_SC_BOTTOM_RIGHT, ((y+h-1) << 16) | ((x2clip-1) & 0xffff)); OUTREG(R128_DST_Y_X, (y << 16) | (x & 0xffff)); /* Have to pad the width here and use clipping engine */ OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | ((w + 31) & ~31));}/* Subsequent XAA indirect CPU-to-screen color expansion. This is called once for each scanline. */static void R128SubsequentColorExpandScanline(ScrnInfoPtr pScrn, int bufno){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; CARD32 *p = (pointer)info->scratch_buffer[bufno]; int i; int left = info->scanline_words; volatile CARD32 *d; if (info->scanline_direct) return; --info->scanline_h; while (left) { write_mem_barrier(); if (left <= 8) { /* Last scanline - finish write to DATA_LAST */ if (info->scanline_h == 0) { R128WaitForFifo(pScrn, left); /* Unrolling doesn't improve performance */ for (d = ADDRREG(R128_HOST_DATA_LAST) - (left - 1); left; --left) *d++ = *p++; return; } else { R128WaitForFifo(pScrn, left); /* Unrolling doesn't improve performance */ for (d = ADDRREG(R128_HOST_DATA7) - (left - 1); left; --left) *d++ = *p++; } } else { R128WaitForFifo(pScrn, 8); /* Unrolling doesn't improve performance */ for (d = ADDRREG(R128_HOST_DATA0), i = 0; i < 8; i++) *d++ = *p++; left -= 8; } }}/* Setup for XAA indirect image write. 1024x768@76Hz 8bpp Without With x11perf -putimage10 37500.0/sec 39300.0/sec x11perf -putimage100 2150.0/sec 1170.0/sec x11perf -putimage500 108.0/sec 49.8/sec */static void R128SetupForScanlineImageWrite(ScrnInfoPtr pScrn, int rop, unsigned int planemask, int trans_color, int bpp, int depth){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; info->scanline_bpp = bpp; R128WaitForFifo(pScrn, 2); OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl | R128_GMC_DST_CLIPPING | R128_GMC_BRUSH_1X8_COLOR | R128_GMC_SRC_DATATYPE_COLOR | R128_ROP[rop].rop | R128_GMC_BYTE_LSB_TO_MSB | R128_DP_SRC_SOURCE_HOST_DATA)); OUTREG(R128_DP_WRITE_MASK, planemask); if (trans_color != -1) { /* Set up for transparency */ R128WaitForFifo(pScrn, 3); OUTREG(R128_CLR_CMP_CLR_SRC, trans_color); OUTREG(R128_CLR_CMP_MASK, R128_CLR_CMP_MSK); OUTREG(R128_CLR_CMP_CNTL, (R128_SRC_CMP_NEQ_COLOR | R128_CLR_CMP_SRC_SOURCE)); }}/* Subsequent XAA indirect image write. This is only called once for each rectangle. */static void R128SubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn, int x, int y, int w, int h, int skipleft){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; int x1clip = x+skipleft; int x2clip = x+w; int shift = 0; /* 32bpp */ if (pScrn->bitsPerPixel == 8) shift = 3; else if (pScrn->bitsPerPixel == 16) shift = 1; info->scanline_h = h; info->scanline_words = (w * info->scanline_bpp + 31) >> 5;#if 0 /* Seeing as the CPUToScreen doesn't like this, I've done this * here too, as it uses pretty much the same path. */ if ((info->scanline_words * h) <= 9) { /* Turn on direct for less than 9 dword colour expansion */ info->scratch_buffer[0] = (unsigned char *)(ADDRREG(R128_HOST_DATA_LAST) - (info->scanline_words - 1)); info->scanline_direct = 1; } else#endif { /* Use indirect for anything else */ info->scratch_buffer[0] = info->scratch_save; info->scanline_direct = 0; } if (pScrn->bitsPerPixel == 24) { x1clip *= 3; x2clip *= 3; } R128WaitForFifo(pScrn, 4 + (info->scanline_direct ? (info->scanline_words * h) : 0) ); OUTREG(R128_SC_TOP_LEFT, (y << 16) | (x1clip & 0xffff)); OUTREG(R128_SC_BOTTOM_RIGHT, ((y+h-1) << 16) | ((x2clip-1) & 0xffff)); OUTREG(R128_DST_Y_X, (y << 16) | (x & 0xffff)); /* Have to pad the width here and use clipping engine */ OUTREG(R128_DST_HEIGHT_WIDTH, (h << 16) | ((w + shift) & ~shift));}/* Subsequent XAA indirect iamge write. This is called once for each scanline. */static void R128SubsequentImageWriteScanline(ScrnInfoPtr pScrn, int bufno){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; CARD32 *p = (pointer)info->scratch_buffer[bufno]; int i; int left = info->scanline_words; volatile CARD32 *d; if (info->scanline_direct) return; --info->scanline_h; while (left) { write_mem_barrier(); if (left <= 8) { /* Last scanline - finish write to DATA_LAST */ if (info->scanline_h == 0) { R128WaitForFifo(pScrn, left); /* Unrolling doesn't improve performance */ for (d = ADDRREG(R128_HOST_DATA_LAST) - (left - 1); left; --left) *d++ = *p++; return; } else { R128WaitForFifo(pScrn, left); /* Unrolling doesn't improve performance */ for (d = ADDRREG(R128_HOST_DATA7) - (left - 1); left; --left) *d++ = *p++; } } else { R128WaitForFifo(pScrn, 8); /* Unrolling doesn't improve performance */ for (d = ADDRREG(R128_HOST_DATA0), i = 0; i < 8; i++) *d++ = *p++; left -= 8; } }}/* Initialize the acceleration hardware. */void R128EngineInit(ScrnInfoPtr pScrn){ R128InfoPtr info = R128PTR(pScrn); unsigned char *R128MMIO = info->MMIO; R128TRACE(("EngineInit (%d/%d)\n", info->CurrentLayout.pixel_code, info->CurrentLayout.bitsPerPixel)); OUTREG(R128_SCALE_3D_CNTL, 0); R128EngineReset(pScrn); switch (info->CurrentLayout.pixel_code) { case 8: info->datatype = 2; break; case 15: info->datatype = 3; break; case 16: info->datatype = 4; break; case 24: info->datatype = 5; break; case 32: info->datatype = 6; break; default: R128TRACE(("Unknown depth/bpp = %d/%d (code = %d)\n", info->CurrentLayout.depth, info->CurrentLayout.bitsPerPixel, info->CurrentLayout.pixel_code)); } info->pitch = (info->CurrentLayout.displayWidth / 8) * (info->CurrentLayout.pixel_bytes == 3 ? 3 : 1); R128TRACE(("Pitch for acceleration = %d\n", info->pitch)); R128WaitForFifo(pScrn, 2); OUTREG(R128_DEFAULT_OFFSET, pScrn->fbOffset); OUTREG(R128_DEFAULT_PITCH, info->pitch); R128WaitForFifo(pScrn, 4); OUTREG(R128_AUX_SC_CNTL, 0); OUTREG(R128_DEFAULT_SC_BOTTOM_RIGHT, (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX)); OUTREG(R128_SC_TOP_LEFT, 0); OUTREG(R128_SC_BOTTOM_RIGHT, (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX)); info->dp_gui_master_cntl = ((info->datatype << R128_GMC_DST_DATATYPE_SHIFT) | R128_GMC_CLR_CMP_CNTL_DIS | R128_GMC_AUX_CLIP_DIS); R128WaitForFifo(pScrn, 1); OUTREG(R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl | R128_GMC_BRUSH_SOLID_COLOR | R128_GMC_SRC_DATATYPE_COLOR)); R128WaitForFifo(pScrn, 8); OUTREG(R128_DST_BRES_ERR, 0); OUTREG(R128_DST_BRES_INC, 0); OUTREG(R128_DST_BRES_DEC, 0); OUTREG(R128_DP_BRUSH_FRGD_CLR, 0xffffffff); OUTREG(R128_DP_BRUSH_BKGD_CLR, 0x00000000); OUTREG(R128_DP_SRC_FRGD_CLR, 0xffffffff); OUTREG(R128_DP_SRC_BKGD_CLR, 0x00000000); OUTREG(R128_DP_WRITE_MASK, 0xffffffff); R128WaitForFifo(pScrn, 1);#if X_BYTE_ORDER == X_BIG_ENDIAN /* FIXME: this is a kludge for texture uploads in the 3D driver. Look at * how the radeon driver handles HOST_DATA_SWAP if you want to implement * CCE ImageWrite acceleration or anything needing this bit */#ifdef XF86DRI if (info->directRenderingEnabled) OUTREGP(R128_DP_DATATYPE, 0, ~R128_HOST_BIG_ENDIAN_EN); else#endif OUTREGP(R128_DP_DATATYPE, R128_HOST_BIG_ENDIAN_EN, ~R128_HOST_BIG_ENDIAN_EN);#else /* X_LITTLE_ENDIAN */ OUTREGP(R128_DP_DATATYPE, 0, ~R128_HOST_BIG_ENDIAN_EN);#endif#ifdef XF86DRI info->sc_left = 0x00000000; info->sc_right = R128_DEFAULT_SC_RIGHT_MAX; info->sc_top = 0x00000000; info->sc_bottom = R128_DEFAULT_SC_BOTTOM_MAX; info->re_top_left = 0x00000000; info->re_width_height = ((0x7ff << R128_RE_WIDTH_SHIFT) | (0x7ff << R128_RE_HEIGHT_SHIFT)); info->aux_sc_cntl = 0x00000000;#endif R128WaitForIdle(pScrn);}#ifdef XF86DRI/* Setup for XAA SolidFill. */static void R128CCESetupForSolidFill(ScrnInfoPtr pScrn, int color, int rop, unsigned int planemask){ R128InfoPtr info = R128PTR(pScrn); RING_LOCALS; R128CCE_REFRESH( pScrn, info ); BEGIN_RING( 8 ); OUT_RING_REG( R128_DP_GUI_MASTER_CNTL, (info->dp_gui_master_cntl | R128_GMC_BRUSH_SOLID_COLOR | R128_GMC_SRC_DATATYPE_COLOR | R128_ROP[rop].pattern) ); OUT_RING_REG( R128_DP_BRUSH_FRGD_CLR, color ); OUT_RING_REG( R128_DP_WRITE_MASK, planemask ); OUT_RING_REG( R128_DP_CNTL, (R128_DST_X_LEFT_TO_RIGHT | R128_DST_Y_TOP_TO_BOTTOM)); ADVANCE_RING();}/* Subsequent XAA SolidFillRect. Tests: xtest CH06/fllrctngl, xterm*/static void R128CCESubsequentSolidFillRect(ScrnInfoPtr pScrn,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -