📄 pixman-mmx.c
字号:
{ __m64 vdest = load8888(*q); vdest = in_over(vsrc, vsrca, load8888(m), vdest); *q = store8888(vdest); } twidth--; p++; q++; } dstLine += dstStride; maskLine += maskStride; } _mm_empty();}voidfbCompositeSrc_8888x8x8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t *dstLine, *dst; uint32_t *srcLine, *src; uint32_t mask; __m64 vmask; int dstStride, srcStride; uint16_t w; __m64 srca; CHECKPOINT(); fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); fbComposeGetSolid (pMask, mask, pDst->bits.format); mask = mask | mask >> 8 | mask >> 16 | mask >> 24; vmask = load8888 (mask); srca = MC(4x00ff); while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; while (w && (unsigned long)dst & 7) { __m64 s = load8888 (*src); __m64 d = load8888 (*dst); *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); w--; dst++; src++; } while (w >= 2) { __m64 vs = *(__m64 *)src; __m64 vd = *(__m64 *)dst; __m64 vsrc0 = expand8888 (vs, 0); __m64 vsrc1 = expand8888 (vs, 1); *(__m64 *)dst = pack8888 ( in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)), in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1))); w -= 2; dst += 2; src += 2; } while (w) { __m64 s = load8888 (*src); __m64 d = load8888 (*dst); *dst = store8888 (in_over (s, expand_alpha (s), vmask, d)); w--; dst++; src++; } } _mm_empty();}voidfbCompositeSrc_x888xnx8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t *dstLine, *dst; uint32_t *srcLine, *src; uint32_t mask; __m64 vmask; int dstStride, srcStride; uint16_t w; __m64 srca; CHECKPOINT(); fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); fbComposeGetSolid (pMask, mask, pDst->bits.format); mask = mask | mask >> 8 | mask >> 16 | mask >> 24; vmask = load8888 (mask); srca = MC(4x00ff); while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; while (w && (unsigned long)dst & 7) { __m64 s = load8888 (*src | 0xff000000); __m64 d = load8888 (*dst); *dst = store8888 (in_over (s, srca, vmask, d)); w--; dst++; src++; } while (w >= 16) { __m64 vd0 = *(__m64 *)(dst + 0); __m64 vd1 = *(__m64 *)(dst + 2); __m64 vd2 = *(__m64 *)(dst + 4); __m64 vd3 = *(__m64 *)(dst + 6); __m64 vd4 = *(__m64 *)(dst + 8); __m64 vd5 = *(__m64 *)(dst + 10); __m64 vd6 = *(__m64 *)(dst + 12); __m64 vd7 = *(__m64 *)(dst + 14); __m64 vs0 = *(__m64 *)(src + 0); __m64 vs1 = *(__m64 *)(src + 2); __m64 vs2 = *(__m64 *)(src + 4); __m64 vs3 = *(__m64 *)(src + 6); __m64 vs4 = *(__m64 *)(src + 8); __m64 vs5 = *(__m64 *)(src + 10); __m64 vs6 = *(__m64 *)(src + 12); __m64 vs7 = *(__m64 *)(src + 14); vd0 = pack8888 ( in_over (expand8888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)), in_over (expand8888 (vs0, 1), srca, vmask, expand8888 (vd0, 1))); vd1 = pack8888 ( in_over (expand8888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)), in_over (expand8888 (vs1, 1), srca, vmask, expand8888 (vd1, 1))); vd2 = pack8888 ( in_over (expand8888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)), in_over (expand8888 (vs2, 1), srca, vmask, expand8888 (vd2, 1))); vd3 = pack8888 ( in_over (expand8888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)), in_over (expand8888 (vs3, 1), srca, vmask, expand8888 (vd3, 1))); vd4 = pack8888 ( in_over (expand8888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)), in_over (expand8888 (vs4, 1), srca, vmask, expand8888 (vd4, 1))); vd5 = pack8888 ( in_over (expand8888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)), in_over (expand8888 (vs5, 1), srca, vmask, expand8888 (vd5, 1))); vd6 = pack8888 ( in_over (expand8888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)), in_over (expand8888 (vs6, 1), srca, vmask, expand8888 (vd6, 1))); vd7 = pack8888 ( in_over (expand8888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)), in_over (expand8888 (vs7, 1), srca, vmask, expand8888 (vd7, 1))); *(__m64 *)(dst + 0) = vd0; *(__m64 *)(dst + 2) = vd1; *(__m64 *)(dst + 4) = vd2; *(__m64 *)(dst + 6) = vd3; *(__m64 *)(dst + 8) = vd4; *(__m64 *)(dst + 10) = vd5; *(__m64 *)(dst + 12) = vd6; *(__m64 *)(dst + 14) = vd7; w -= 16; dst += 16; src += 16; } while (w) { __m64 s = load8888 (*src | 0xff000000); __m64 d = load8888 (*dst); *dst = store8888 (in_over (s, srca, vmask, d)); w--; dst++; src++; } } _mm_empty();}voidfbCompositeSrc_8888x8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t *dstLine, *dst; uint32_t *srcLine, *src; uint32_t s; int dstStride, srcStride; uint8_t a; uint16_t w; CHECKPOINT(); fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1); while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; while (w--) { s = *src++; a = s >> 24; if (a == 0xff) *dst = s; else if (a) { __m64 ms, sa; ms = load8888(s); sa = expand_alpha(ms); *dst = store8888(over(ms, sa, load8888(*dst))); } dst++; } } _mm_empty();}voidfbCompositeSrc_8888x0565mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint16_t *dstLine, *dst; uint32_t *srcLine, *src; int dstStride, srcStride; uint16_t w; CHECKPOINT(); fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);#if 0 /* FIXME */ assert (pSrc->pDrawable == pMask->pDrawable);#endif while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; CHECKPOINT(); while (w && (unsigned long)dst & 7) { __m64 vsrc = load8888 (*src); ullong d = *dst; __m64 vdest = expand565 (M64(d), 0); vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0); *dst = ULLONG(vdest); w--; dst++; src++; } CHECKPOINT(); while (w >= 4) { __m64 vsrc0, vsrc1, vsrc2, vsrc3; __m64 vdest; vsrc0 = load8888(*(src + 0)); vsrc1 = load8888(*(src + 1)); vsrc2 = load8888(*(src + 2)); vsrc3 = load8888(*(src + 3)); vdest = *(__m64 *)dst; vdest = pack565(over(vsrc0, expand_alpha(vsrc0), expand565(vdest, 0)), vdest, 0); vdest = pack565(over(vsrc1, expand_alpha(vsrc1), expand565(vdest, 1)), vdest, 1); vdest = pack565(over(vsrc2, expand_alpha(vsrc2), expand565(vdest, 2)), vdest, 2); vdest = pack565(over(vsrc3, expand_alpha(vsrc3), expand565(vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; w -= 4; dst += 4; src += 4; } CHECKPOINT(); while (w) { __m64 vsrc = load8888 (*src); ullong d = *dst; __m64 vdest = expand565 (M64(d), 0); vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0); *dst = ULLONG(vdest); w--; dst++; src++; } } _mm_empty();}voidfbCompositeSolidMask_nx8x8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t src, srca; uint32_t *dstLine, *dst; uint8_t *maskLine, *mask; int dstStride, maskStride; uint16_t w; __m64 vsrc, vsrca; ullong srcsrc; CHECKPOINT(); fbComposeGetSolid(pSrc, src, pDst->bits.format); srca = src >> 24; if (srca == 0) return; srcsrc = (ullong)src << 32 | src; fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; CHECKPOINT(); while (w && (unsigned long)dst & 7) { ullong m = *mask; if (m) { __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), load8888(*dst)); *dst = store8888(vdest); } w--; mask++; dst++; } CHECKPOINT(); while (w >= 2) { ullong m0, m1; m0 = *mask; m1 = *(mask + 1); if (srca == 0xff && (m0 & m1) == 0xff) { *(ullong *)dst = srcsrc; } else if (m0 | m1) { __m64 vdest; __m64 dest0, dest1; vdest = *(__m64 *)dst; dest0 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m0)), expand8888(vdest, 0)); dest1 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m1)), expand8888(vdest, 1)); *(__m64 *)dst = pack8888(dest0, dest1); } mask += 2; dst += 2; w -= 2; } CHECKPOINT(); while (w) { ullong m = *mask; if (m) { __m64 vdest = load8888(*dst); vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), vdest); *dst = store8888(vdest); } w--; mask++; dst++; } } _mm_empty();}pixman_bool_tpixman_fill_mmx (uint32_t *bits, int stride, int bpp, int x, int y, int width, int height, uint32_t xor){ ullong fill; __m64 vfill; uint32_t byte_width; uint8_t *byte_line;#ifdef __GNUC__ __m64 v1, v2, v3, v4, v5, v6, v7;#endif if (bpp == 16 && (xor >> 16 != (xor & 0xffff))) return FALSE; if (bpp != 16 && bpp != 32) return FALSE; if (bpp == 16) { stride = stride * (int) sizeof (uint32_t) / 2; byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); byte_width = 2 * width; stride *= 2; } else { stride = stride * (int) sizeof (uint32_t) / 4; byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); byte_width = 4 * width; stride *= 4; } fill = ((ullong)xor << 32) | xor; vfill = M64(fill);#ifdef __GNUC__ __asm__ ( "movq %7, %0\n" "movq %7, %1\n" "movq %7, %2\n" "movq %7, %3\n" "movq %7, %4\n" "movq %7, %5\n" "movq %7, %6\n" : "=y" (v1), "=y" (v2), "=y" (v3), "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7) : "y" (vfill));#endif while (height--) { int w; uint8_t *d = byte_line; byte_line += stride; w = byte_width; while (w >= 2 && ((unsigned long)d & 3)) { *(uint16_t *)d = xor; w -= 2; d += 2; } while (w >= 4 && ((unsigned long)d & 7)) { *(uint32_t *)d = xor; w -= 4; d += 4; } while (w >= 64) {#ifdef __GNUC__ __asm__ ( "movq %1, (%0)\n" "movq %2, 8(%0)\n" "movq %3, 16(%0)\n" "movq %4, 24(%0)\n" "movq %5, 32(%0)\n" "movq %6, 40(%0)\n" "movq %7, 48(%0)\n" "movq %8, 56(%0)\n" : : "r" (d), "y" (vfill), "y" (v1), "y" (v2), "y" (v3), "y" (v4), "y" (v5), "y" (v6), "y" (v7) : "memory");#else *(__m64*) (d + 0) = vfill; *(__m64*) (d + 8) = vfill; *(__m64*) (d + 16) = vfill;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -