📄 gapi.cpp
字号:
Description : Blt from sys mem to vid mem with x2 upscale via Slave Port.
Note that the blt is unrotated because the gapi surf is rotated
by the same amount as the primary.
This is for evaluating what the performace will be if the GAPI Legacy surface
is alloc'd from system memory.
************************************************************************************/
void MBX::GapiScaleBltSP(PGAPI_BLT pBlt)
{
#if !GAPI_USE_STRIPE_BUFFER
ULONG ulScaleFactor;
ULONG ulScaleFactorX;
ULONG ulScaleFactorY;
/* We have 10bits to represent the scale-factor */
ulScaleFactor = ((pBlt->sSrc.dwWidth<<16)/pBlt->sDst.dwWidth) & ~0xFFE007FF;
ulScaleFactorX = ulScaleFactor >> (16-MBX2D_STRETCH_FPSHIFT); /* = 0x10 for x2 upscale */
ulScaleFactor = ((pBlt->sSrc.dwHeight<<16)/pBlt->sDst.dwHeight) & ~0xFFE007FF;
ulScaleFactorY = ulScaleFactor >> (16-MBX2D_STRETCH_FPSHIFT);
/* ACQUIRE SLAVE PORT */
PVRSRVAcquireSlavePort(m_sDevData.psDevInfoKM, PVRSRV_SLAVEPORT_2D, IMG_TRUE);
SlavePortInitWrites();
/* Set Dest Surf */
/* update DST surface attributes */
SlavePortWrite( MBX2D_DST_CTRL_BH | EGPEFormatToDestHW[gpe16Bpp]
| ((pBlt->sDst.dwStride<<MBX2D_DST_STRIDE_SHIFT) & MBX2D_DST_STRIDE_MASK));
SlavePortWrite( ((pBlt->sDst.dwPhysAddress >> MBX2D_DST_ADDR_ALIGNSHIFT)
<< MBX2D_DST_ADDR_SHIFT) & MBX2D_DST_ADDR_MASK);
/* Scaling */
SlavePortWrite( MBX2D_STRETCH_BH
| (ulScaleFactorX<<MBX2D_X_STRETCH_SHIFT)
| (ulScaleFactorY<<MBX2D_Y_STRETCH_SHIFT) );
/* Set Src format and stride */
SlavePortWrite(MBX2D_SRC_CTRL_BH
| /* MBX2D_SRC_FBMEM */ MBX2D_SRC_SYSTEMMEM
| EGPEFormatToSrcHW[gpe16Bpp]
| ((pBlt->sSrc.dwStride<<MBX2D_SRC_STRIDE_SHIFT)
& MBX2D_SRC_STRIDE_MASK) );
SlavePortWrite( 0 ); /* Dummy src address ? */
/* Send Blit Command */
SlavePortWrite( MBX2D_BLIT_BH | (0xCCCC & MBX2D_ROP4_MASK) ); /* SrcCopy */
SlavePortWrite( 0 ); /* Dummy fill colour */
SlavePortWrite( ((SHORT)0 & MBX2D_DST_YSTART_MASK) |
(((SHORT)0 << MBX2D_DST_XSTART_SHIFT) & MBX2D_DST_XSTART_MASK) );
SlavePortWrite( ((SHORT)pBlt->sDst.dwHeight & MBX2D_DST_YEND_MASK) |
(((SHORT)pBlt->sDst.dwWidth << MBX2D_DST_XEND_SHIFT) & MBX2D_DST_XEND_MASK) );
SlavePortFlushWrites();
DWORD dwLine;
DWORD dwPixel;
DWORD dwPixel1;
DWORD dwPixel2;
DWORD dwSPCount;
DWORD *pdwSrcData;
DWORD *pdwSrcStartOfLine;
DWORD dwPixData;
DWORD dwLineRepeat=0;
pdwSrcData = (DWORD*) pBlt->sSrc.pvBuffer;
dwSPCount=0;
/* Now send 320x240 pixels, ie 320x120 dwords */
for (dwLine=0; dwLine<240; dwLine++) /* Line loop */
{
pdwSrcStartOfLine = pdwSrcData;
for (dwLineRepeat=0; dwLineRepeat<2; dwLineRepeat++)
{
for (dwPixel=0; dwPixel<(320>>1); dwPixel++)
{
dwPixData = *pdwSrcData; /* Read two pixels */
dwPixel1 = dwPixData & 0x0000FFFF; /* extract */
dwPixel1 |= dwPixel1 <<16; /* pack */
dwPixel2 = dwPixData >>16; /* extract */
dwPixel2 |= dwPixel2 <<16; /* pack */
/* Don't overrun the slave port */
if (dwSPCount<=0)
{
AQUIRE_2D_FIFOSPACE(SP_CHUNK_SIZE);
dwSPCount = SP_CHUNK_SIZE;
}
SLAVEPORT_WRITE2DDW(dwPixel1); // Write
SLAVEPORT_WRITE2DDW(dwPixel1); // Replicate
SLAVEPORT_WRITE2DDW(dwPixel2); // Write
SLAVEPORT_WRITE2DDW(dwPixel2); // Replicate
dwSPCount -= 4;
pdwSrcData++; // Next source pixel
}// pixel loop
if (dwLineRepeat == 0)
{
pdwSrcData = pdwSrcStartOfLine; // Rewind to start of line
}
} /* line repeat loop (we have to send the line twice for a x2 upscale) */
} /* line loop */
SLAVEPORT_WRITE2DDW( MBX2D_FENCE_BH );
/* Now do OpComplete blt. */
/* Dest surf */
SlavePortWrite(MBX2D_DST_CTRL_BH | MBX2D_DST_8888ARGB);
SlavePortWrite(((gdwGapiOpCompletePhysAddr >> MBX2D_DST_ADDR_ALIGNSHIFT)
<< MBX2D_DST_ADDR_SHIFT) & MBX2D_DST_ADDR_MASK );
/* 1 pixel colourfill blit using gdwOpCompleteTag for colour. */
SlavePortWrite( MBX2D_BLIT_BH | MBX2D_USE_FILL | MBX2D_ROP3_PATCOPY );
SlavePortWrite( ++gdwOpCompleteTag );
SlavePortWrite( 0 );
SlavePortWrite( (1<<MBX2D_DST_XEND_SHIFT) | (1<<MBX2D_DST_YEND_SHIFT) );
/* Insert a fence to ensure that everything is written through
* and write all to slaveport */
SlavePortFencedWrites();
/* RELEASE SLAVE PORT */
PVRSRVReleaseSlavePort( m_sDevData.psDevInfoKM, PVRSRV_SLAVEPORT_2D);
#endif /* #if !GAPI_USE_STRIPE_BUFFER */
} /* GapiScaleBltSP */
/***********************************************************************************
Function Name : GapiScaleBltCPU
Inputs : pBlt describes the blt.
Outputs :
Returns :
Description : Blt from vid mem to sys mem x2 downscale via CPU.
************************************************************************************/
void MBX::GapiScaleBltCPU(PGAPI_BLT pBlt)
{
DWORD dwReadCount;
DWORD dwFirstPair;
DWORD dwSecondPair;
DWORD dwWriteBuf;
DWORD dwLine;
DWORD *pdwSrcData;
DWORD *pdwDstData;
DWORD *pdwSrcStartOfLine;
DWORD *pdwDstStartOfLine;
pdwSrcData = (DWORD*) pBlt->sSrc.pvBuffer;
pdwDstData = (DWORD*) pBlt->sDst.pvBuffer;
// Now read 640x480 pixels and write 320x240 pixels.
for (dwLine=0; dwLine<240; dwLine++) // read alternate lines
{
pdwSrcStartOfLine = pdwSrcData;
pdwDstStartOfLine = pdwDstData;
// Read one line from the source bitmap, 640 pixels = 320 dwords
// 1 dword = 2 pixels = 1 output pixel
for (dwReadCount=0; dwReadCount<(640>>2); dwReadCount++) // 640 pixels, 2 pixels per dword, 2 dwords read per loop
{
// Skip alternate pixels (X decimation)
dwFirstPair = *pdwSrcData++; // First pair of source pixels
dwSecondPair = *pdwSrcData++; // Second pair of source pixels
dwWriteBuf = (dwFirstPair & 0x0000ffff) | (dwSecondPair << 16);
// Combine into two dest pixels
// Write data
*pdwDstData++ = dwWriteBuf;
}
// Start of next line
pdwSrcData = (DWORD*)((DWORD)pdwSrcStartOfLine + pBlt->sSrc.dwStride);
pdwDstData = (DWORD*)((DWORD)pdwDstStartOfLine + pBlt->sDst.dwStride);
// Skip alternate source lines (Y decimation)
pdwSrcData = (DWORD*)((DWORD)pdwSrcData + pBlt->sSrc.dwStride);
}
}//GapiScaleBltCPU
/***********************************************************************************
Function Name : GapiScaleBltStripeBuf
Inputs : pBlt describes the blt.
Outputs :
Returns :
Description : Blt from sys mem to vid mem with x2 upscale via Striping buffer
************************************************************************************/
void MBX::GapiScaleBltStripeBuf(PGAPI_BLT pBlt)
{
#if GAPI_USE_STRIPE_BUFFER
/* Use double buffering, ie half the stripe buffer for each blt. */
ULONG ulStripeSize = m_ulStripeSize * (NUM_OF_STRIPED_BUFFERS/2);
unsigned char *pbyStripeLinBase = m_pbyStripeLinBase;
ULONG ulStripePhyBase = m_ulStripePhyBase;
ULONG ulBltCounter=0;
ULONG ulNoOfBlts;
ULONG ulScaleFactor;
ULONG ulScaleFactorX;
ULONG ulScaleFactorY;
ULONG ulChunkSizeBytes;
ULONG ulSrcLines; /* Number of lines to blt in one go via the striping buffer. */
ULONG ulDstLines;
const ULONG ulSrcWidthBytes = pBlt->sSrc.dwStride; /* memcpy size of one line is based on the stride */
const ULONG ulSrcHeightPixels = pBlt->sSrc.dwHeight;
ULONG ulSrcOffsetBytes=0;
ULONG ulDstOffsetBytes=0;
ULONG ulStripeOffsetBytes=0;
/* Copy the 320x240 source into video memory (stripe buffer) then use that as the source. */
/* The number of source lines we can handle in one go is dependant
* on the width of each line and the size of the stripe buffer. */
ulSrcLines = ulStripeSize / ulSrcWidthBytes;
ulNoOfBlts = ulSrcHeightPixels/ulSrcLines;
ulDstLines = ulSrcLines<<1; // x2 upscale
ulChunkSizeBytes = ulSrcLines * ulSrcWidthBytes; /* memcpy size should = ulStripeSize */
/* We have 10bits to represent the scale-factor */
ulScaleFactor = ((pBlt->sSrc.dwWidth<<16)/pBlt->sDst.dwWidth) & ~0xFFE007FF;
ulScaleFactorX = ulScaleFactor >> (16-MBX2D_STRETCH_FPSHIFT); /* = 0x10 for x2 upscale */
ulScaleFactor = ((pBlt->sSrc.dwHeight<<16)/pBlt->sDst.dwHeight) & ~0xFFE007FF;
ulScaleFactorY = ulScaleFactor >> (16-MBX2D_STRETCH_FPSHIFT);
/* ACQUIRE SLAVE PORT */
PVRSRVAcquireSlavePort(m_sDevData.psDevInfoKM, PVRSRV_SLAVEPORT_2D, IMG_TRUE);
SlavePortInitWrites();
while (ulBltCounter < ulNoOfBlts)
{
/* Copy source buffer into stripe buffer linear base */
memcpy ( pbyStripeLinBase + ulStripeOffsetBytes, /* dst */
(char*)pBlt->sSrc.pvBuffer + ulSrcOffsetBytes, /* src */
ulChunkSizeBytes );
/* Set Dest Surf */
/* update DST surface attributes */
SlavePortWrite( MBX2D_DST_CTRL_BH | EGPEFormatToDestHW[gpe16Bpp]
| ((pBlt->sDst.dwStride<<MBX2D_DST_STRIDE_SHIFT) & MBX2D_DST_STRIDE_MASK));
SlavePortWrite( (( (pBlt->sDst.dwPhysAddress+ulDstOffsetBytes) >> MBX2D_DST_ADDR_ALIGNSHIFT)
<< MBX2D_DST_ADDR_SHIFT) & MBX2D_DST_ADDR_MASK);
/* Scaling */
SlavePortWrite( MBX2D_STRETCH_BH
| (ulScaleFactorX<<MBX2D_X_STRETCH_SHIFT)
| (ulScaleFactorY<<MBX2D_Y_STRETCH_SHIFT) );
/* Set Src format and stride */
SlavePortWrite(MBX2D_SRC_CTRL_BH
| MBX2D_SRC_FBMEM
| EGPEFormatToSrcHW[gpe16Bpp]
| ((pBlt->sSrc.dwStride<<MBX2D_SRC_STRIDE_SHIFT)
& MBX2D_SRC_STRIDE_MASK) );
/* Src address */
SlavePortWrite((( (ulStripePhyBase + ulStripeOffsetBytes)
>> MBX2D_SRC_ADDR_ALIGNSHIFT)
<< MBX2D_SRC_ADDR_SHIFT)
& MBX2D_SRC_ADDR_MASK );
/* Specify the Src starting pixel coordinate */
SlavePortWrite(MBX2D_SRC_OFF_BH
| ((0<<MBX2D_SRCOFF_XSTART_SHIFT) & MBX2D_SRCOFF_XSTART_MASK)
| ((0<<MBX2D_SRCOFF_YSTART_SHIFT) & MBX2D_SRCOFF_YSTART_MASK) );
/* Send Blit Command */
SlavePortWrite( MBX2D_BLIT_BH | (0xCCCC & MBX2D_ROP4_MASK) ); /* SrcCopy */
SlavePortWrite( 0 ); // Dummy fill colour
SlavePortWrite( ((SHORT)0 & MBX2D_DST_YSTART_MASK) |
(((SHORT)0 << MBX2D_DST_XSTART_SHIFT) & MBX2D_DST_XSTART_MASK) );
SlavePortWrite( ((SHORT)ulDstLines & MBX2D_DST_YEND_MASK) |
(((SHORT)pBlt->sDst.dwWidth << MBX2D_DST_XEND_SHIFT) & MBX2D_DST_XEND_MASK) );
/* Now do OpComplete blt. */
/* Dest surf */
SlavePortWrite(MBX2D_DST_CTRL_BH | MBX2D_DST_8888ARGB);
SlavePortWrite(((gdwGapiOpCompletePhysAddr >> MBX2D_DST_ADDR_ALIGNSHIFT)
<< MBX2D_DST_ADDR_SHIFT) & MBX2D_DST_ADDR_MASK );
/* 1 pixel colourfill blit using gdwOpCompleteTag for colour. */
SlavePortWrite( MBX2D_BLIT_BH | MBX2D_USE_FILL | MBX2D_ROP3_PATCOPY );
SlavePortWrite( ++gdwOpCompleteTag );
SlavePortWrite( 0 );
SlavePortWrite( (1<<MBX2D_DST_XEND_SHIFT) | (1<<MBX2D_DST_YEND_SHIFT) );
/* Insert a fence to ensure that everything is written through
* and write all to slaveport */
SlavePortFencedWrites();
ulBltCounter++;
/* We are toggling between two halves of the stripe buffer
* so that by using double buffering, the memcpy and the blt can be concurrent. */
if (ulBltCounter&1)
{
ulStripeOffsetBytes = ulChunkSizeBytes;
}
else
{
ulStripeOffsetBytes = 0;
}
ulSrcOffsetBytes += ulChunkSizeBytes;
ulDstOffsetBytes += pBlt->sDst.dwStride * ulDstLines;
/* Wait for next stripe buffer (from two blts ago) to become free. */
do
{
DWORD dwGapiOpCompleteLinAddr = *gpdwGapiOpCompleteLinAddr;
if (dwGapiOpCompleteLinAddr == gdwOpCompleteTag) /* current buffer free (catches wrap-around case) */
{
break; /* ready */
}
if (dwGapiOpCompleteLinAddr == (gdwOpCompleteTag-1) ) /* next buffer free */
{
break; /* ready */
}
Sleep(0); /* Reduce number of vid mem reads. */
} while(1); /* always */
}
/* RELEASE SLAVE PORT */
PVRSRVReleaseSlavePort( m_sDevData.psDevInfoKM, PVRSRV_SLAVEPORT_2D);
#endif /* GAPI_USE_STRIPE_BUFFER */
} /* GapiScaleBltStripeBuf */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -