📄 pixman-mmx.c
字号:
vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0); *q = ULLONG(vdest); } twidth--; p++; q++; } maskLine += maskStride; dstLine += dstStride; } _mm_empty ();}voidfbCompositeIn_nx8x8mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint8_t *dstLine, *dst; uint8_t *maskLine, *mask; int dstStride, maskStride; uint16_t w; uint32_t src; uint8_t sa; __m64 vsrc, vsrca; fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); fbComposeGetSolid(pSrc, src, pDst->bits.format); sa = src >> 24; if (sa == 0) return; vsrc = load8888(src); vsrca = expand_alpha(vsrc); while (height--) { dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; if ((((unsigned long)pDst & 3) == 0) && (((unsigned long)pSrc & 3) == 0)) { while (w >= 4) { uint32_t m; __m64 vmask; __m64 vdest; m = 0; vmask = load8888 (*(uint32_t *)mask); vdest = load8888 (*(uint32_t *)dst); *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest)); dst += 4; mask += 4; w -= 4; } } while (w--) { uint16_t tmp; uint8_t a; uint32_t m, d; uint32_t r; a = *mask++; d = *dst; m = FbInU (sa, 0, a, tmp); r = FbInU (m, 0, d, tmp); *dst++ = r; } } _mm_empty();}voidfbCompositeIn_8x8mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint8_t *dstLine, *dst; uint8_t *srcLine, *src; int srcStride, dstStride; uint16_t w; fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; if ((((unsigned long)pDst & 3) == 0) && (((unsigned long)pSrc & 3) == 0)) { while (w >= 4) { uint32_t *s = (uint32_t *)src; uint32_t *d = (uint32_t *)dst; *d = store8888 (in (load8888 (*s), load8888 (*d))); w -= 4; dst += 4; src += 4; } } while (w--) { uint8_t s, d; uint16_t tmp; s = *src; d = *dst; *dst = FbInU (s, 0, d, tmp); src++; dst++; } } _mm_empty ();}voidfbCompositeSrcAdd_8888x8x8mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint8_t *dstLine, *dst; uint8_t *maskLine, *mask; int dstStride, maskStride; uint16_t w; uint32_t src; uint8_t sa; __m64 vsrc, vsrca; fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); fbComposeGetSolid(pSrc, src, pDst->bits.format); sa = src >> 24; if (sa == 0) return; vsrc = load8888(src); vsrca = expand_alpha(vsrc); while (height--) { dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; if ((((unsigned long)pMask & 3) == 0) && (((unsigned long)pDst & 3) == 0)) { while (w >= 4) { __m64 vmask = load8888 (*(uint32_t *)mask); __m64 vdest = load8888 (*(uint32_t *)dst); *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest)); w -= 4; dst += 4; mask += 4; } } while (w--) { uint16_t tmp; uint16_t a; uint32_t m, d; uint32_t r; a = *mask++; d = *dst; m = FbInU (sa, 0, a, tmp); r = FbAdd (m, d, 0, tmp); *dst++ = r; } } _mm_empty();}voidfbCompositeSrcAdd_8000x8000mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint8_t *dstLine, *dst; uint8_t *srcLine, *src; int dstStride, srcStride; uint16_t w; uint8_t s, d; uint16_t t; CHECKPOINT(); fbComposeGetStart (pSrc, xSrc, ySrc, uint8_t, srcStride, srcLine, 1); fbComposeGetStart (pDst, xDst, yDst, uint8_t, dstStride, dstLine, 1); while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; while (w && (unsigned long)dst & 7) { s = *src; d = *dst; t = d + s; s = t | (0 - (t >> 8)); *dst = s; dst++; src++; w--; } while (w >= 8) { *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); dst += 8; src += 8; w -= 8; } while (w) { s = *src; d = *dst; t = d + s; s = t | (0 - (t >> 8)); *dst = s; dst++; src++; w--; } } _mm_empty();}voidfbCompositeSrcAdd_8888x8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ __m64 dst64; uint32_t *dstLine, *dst; uint32_t *srcLine, *src; int dstStride, srcStride; uint16_t w; CHECKPOINT(); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; while (w && (unsigned long)dst & 7) { *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), _mm_cvtsi32_si64(*dst))); dst++; src++; w--; } while (w >= 2) { dst64 = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst); *(ullong*)dst = ULLONG(dst64); dst += 2; src += 2; w -= 2; } if (w) { *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src), _mm_cvtsi32_si64(*dst))); } } _mm_empty();}pixman_bool_tpixman_blt_mmx (uint32_t *src_bits, uint32_t *dst_bits, int src_stride, int dst_stride, int src_bpp, int dst_bpp, int src_x, int src_y, int dst_x, int dst_y, int width, int height){ uint8_t * src_bytes; uint8_t * dst_bytes; int byte_width; if (src_bpp != dst_bpp) return FALSE; if (src_bpp == 16) { src_stride = src_stride * (int) sizeof (uint32_t) / 2; dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); byte_width = 2 * width; src_stride *= 2; dst_stride *= 2; } else if (src_bpp == 32) { src_stride = src_stride * (int) sizeof (uint32_t) / 4; dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); byte_width = 4 * width; src_stride *= 4; dst_stride *= 4; } else { return FALSE; } while (height--) { int w; uint8_t *s = src_bytes; uint8_t *d = dst_bytes; src_bytes += src_stride; dst_bytes += dst_stride; w = byte_width; while (w >= 2 && ((unsigned long)d & 3)) { *(uint16_t *)d = *(uint16_t *)s; w -= 2; s += 2; d += 2; } while (w >= 4 && ((unsigned long)d & 7)) { *(uint32_t *)d = *(uint32_t *)s; w -= 4; s += 4; d += 4; } while (w >= 64) {#ifdef __GNUC__ __asm__ ( "movq (%1), %%mm0\n" "movq 8(%1), %%mm1\n" "movq 16(%1), %%mm2\n" "movq 24(%1), %%mm3\n" "movq 32(%1), %%mm4\n" "movq 40(%1), %%mm5\n" "movq 48(%1), %%mm6\n" "movq 56(%1), %%mm7\n" "movq %%mm0, (%0)\n" "movq %%mm1, 8(%0)\n" "movq %%mm2, 16(%0)\n" "movq %%mm3, 24(%0)\n" "movq %%mm4, 32(%0)\n" "movq %%mm5, 40(%0)\n" "movq %%mm6, 48(%0)\n" "movq %%mm7, 56(%0)\n" : : "r" (d), "r" (s) : "memory", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");#else __m64 v0 = *(__m64 *)(s + 0); __m64 v1 = *(__m64 *)(s + 8); __m64 v2 = *(__m64 *)(s + 16); __m64 v3 = *(__m64 *)(s + 24); __m64 v4 = *(__m64 *)(s + 32); __m64 v5 = *(__m64 *)(s + 40); __m64 v6 = *(__m64 *)(s + 48); __m64 v7 = *(__m64 *)(s + 56); *(__m64 *)(d + 0) = v0; *(__m64 *)(d + 8) = v1; *(__m64 *)(d + 16) = v2; *(__m64 *)(d + 24) = v3; *(__m64 *)(d + 32) = v4; *(__m64 *)(d + 40) = v5; *(__m64 *)(d + 48) = v6; *(__m64 *)(d + 56) = v7;#endif w -= 64; s += 64; d += 64; } while (w >= 4) { *(uint32_t *)d = *(uint32_t *)s; w -= 4; s += 4; d += 4; } if (w >= 2) { *(uint16_t *)d = *(uint16_t *)s; w -= 2; s += 2; d += 2; } } _mm_empty(); return TRUE;}voidfbCompositeCopyAreammx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ pixman_blt_mmx (pSrc->bits.bits, pDst->bits.bits, pSrc->bits.rowstride, pDst->bits.rowstride, PIXMAN_FORMAT_BPP (pSrc->bits.format), PIXMAN_FORMAT_BPP (pDst->bits.format), xSrc, ySrc, xDst, yDst, width, height);}voidfbCompositeOver_x888x8x8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t *src, *srcLine; uint32_t *dst, *dstLine; uint8_t *mask, *maskLine; int srcStride, maskStride, dstStride; uint16_t w; fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); while (height--) { src = srcLine; srcLine += srcStride; dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; while (w--) { ullong m = *mask; if (m) { __m64 s = load8888 (*src | 0xff000000); if (m == 0xff) *dst = store8888 (s); else { __m64 sa = expand_alpha (s); __m64 vm = expand_alpha_rev (M64(m)); __m64 vdest = in_over(s, sa, vm, load8888 (*dst)); *dst = store8888 (vdest); } } mask++; dst++; src++; } } _mm_empty();}#endif /* USE_MMX */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -