📄 pixman-mmx.c
字号:
*(__m64*) (d + 24) = vfill; *(__m64*) (d + 32) = vfill; *(__m64*) (d + 40) = vfill; *(__m64*) (d + 48) = vfill; *(__m64*) (d + 56) = vfill;#endif w -= 64; d += 64; } while (w >= 4) { *(uint32_t *)d = xor; w -= 4; d += 4; } if (w >= 2) { *(uint16_t *)d = xor; w -= 2; d += 2; } } _mm_empty(); return TRUE;}voidfbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t src, srca; uint32_t *dstLine, *dst; uint8_t *maskLine, *mask; int dstStride, maskStride; uint16_t w; __m64 vsrc, vsrca; ullong srcsrc; CHECKPOINT(); fbComposeGetSolid(pSrc, src, pDst->bits.format); srca = src >> 24; if (srca == 0) { pixman_fill_mmx (pDst->bits.bits, pDst->bits.rowstride, PIXMAN_FORMAT_BPP (pDst->bits.format), xDst, yDst, width, height, 0); return; } srcsrc = (ullong)src << 32 | src; fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; CHECKPOINT(); while (w && (unsigned long)dst & 7) { ullong m = *mask; if (m) { __m64 vdest = in(vsrc, expand_alpha_rev (M64(m))); *dst = store8888(vdest); } else { *dst = 0; } w--; mask++; dst++; } CHECKPOINT(); while (w >= 2) { ullong m0, m1; m0 = *mask; m1 = *(mask + 1); if (srca == 0xff && (m0 & m1) == 0xff) { *(ullong *)dst = srcsrc; } else if (m0 | m1) { __m64 vdest; __m64 dest0, dest1; vdest = *(__m64 *)dst; dest0 = in(vsrc, expand_alpha_rev (M64(m0))); dest1 = in(vsrc, expand_alpha_rev (M64(m1))); *(__m64 *)dst = pack8888(dest0, dest1); } else { *(ullong *)dst = 0; } mask += 2; dst += 2; w -= 2; } CHECKPOINT(); while (w) { ullong m = *mask; if (m) { __m64 vdest = load8888(*dst); vdest = in(vsrc, expand_alpha_rev (M64(m))); *dst = store8888(vdest); } else { *dst = 0; } w--; mask++; dst++; } } _mm_empty();}voidfbCompositeSolidMask_nx8x0565mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t src, srca; uint16_t *dstLine, *dst; uint8_t *maskLine, *mask; int dstStride, maskStride; uint16_t w; __m64 vsrc, vsrca, tmp; ullong srcsrcsrcsrc, src16; CHECKPOINT(); fbComposeGetSolid(pSrc, src, pDst->bits.format); srca = src >> 24; if (srca == 0) return; fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint8_t, maskStride, maskLine, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); tmp = pack565(vsrc, _mm_setzero_si64(), 0); src16 = ULLONG(tmp); srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 | (ullong)src16 << 16 | (ullong)src16; while (height--) { dst = dstLine; dstLine += dstStride; mask = maskLine; maskLine += maskStride; w = width; CHECKPOINT(); while (w && (unsigned long)dst & 7) { ullong m = *mask; if (m) { ullong d = *dst; __m64 vd = M64(d); __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565(vd, 0)); vd = pack565(vdest, _mm_setzero_si64(), 0); *dst = ULLONG(vd); } w--; mask++; dst++; } CHECKPOINT(); while (w >= 4) { ullong m0, m1, m2, m3; m0 = *mask; m1 = *(mask + 1); m2 = *(mask + 2); m3 = *(mask + 3); if (srca == 0xff && (m0 & m1 & m2 & m3) == 0xff) { *(ullong *)dst = srcsrcsrcsrc; } else if (m0 | m1 | m2 | m3) { __m64 vdest; __m64 vm0, vm1, vm2, vm3; vdest = *(__m64 *)dst; vm0 = M64(m0); vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0); vm1 = M64(m1); vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1); vm2 = M64(m2); vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2); vm3 = M64(m3); vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; } w -= 4; mask += 4; dst += 4; } CHECKPOINT(); while (w) { ullong m = *mask; if (m) { ullong d = *dst; __m64 vd = M64(d); __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), expand565(vd, 0)); vd = pack565(vdest, _mm_setzero_si64(), 0); *dst = ULLONG(vd); } w--; mask++; dst++; } } _mm_empty();}voidfbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint16_t *dstLine, *dst; uint32_t *srcLine, *src; int dstStride, srcStride; uint16_t w; CHECKPOINT(); fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);#if 0 /* FIXME */ assert (pSrc->pDrawable == pMask->pDrawable);#endif while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; CHECKPOINT(); while (w && (unsigned long)dst & 7) { __m64 vsrc = load8888 (*src); ullong d = *dst; __m64 vdest = expand565 (M64(d), 0); vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); *dst = ULLONG(vdest); w--; dst++; src++; } CHECKPOINT(); while (w >= 4) { uint32_t s0, s1, s2, s3; unsigned char a0, a1, a2, a3; s0 = *src; s1 = *(src + 1); s2 = *(src + 2); s3 = *(src + 3); a0 = (s0 >> 24); a1 = (s1 >> 24); a2 = (s2 >> 24); a3 = (s3 >> 24); if ((a0 & a1 & a2 & a3) == 0xFF) { __m64 vdest; vdest = pack565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0); vdest = pack565(invert_colors(load8888(s1)), vdest, 1); vdest = pack565(invert_colors(load8888(s2)), vdest, 2); vdest = pack565(invert_colors(load8888(s3)), vdest, 3); *(__m64 *)dst = vdest; } else if (a0 | a1 | a2 | a3) { __m64 vdest = *(__m64 *)dst; vdest = pack565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0); vdest = pack565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1); vdest = pack565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2); vdest = pack565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3); *(__m64 *)dst = vdest; } w -= 4; dst += 4; src += 4; } CHECKPOINT(); while (w) { __m64 vsrc = load8888 (*src); ullong d = *dst; __m64 vdest = expand565 (M64(d), 0); vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0); *dst = ULLONG(vdest); w--; dst++; src++; } } _mm_empty();}/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */voidfbCompositeSrc_8888RevNPx8888mmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t *dstLine, *dst; uint32_t *srcLine, *src; int dstStride, srcStride; uint16_t w; CHECKPOINT(); fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1); fbComposeGetStart (pSrc, xSrc, ySrc, uint32_t, srcStride, srcLine, 1);#if 0 /* FIXME */ assert (pSrc->pDrawable == pMask->pDrawable);#endif while (height--) { dst = dstLine; dstLine += dstStride; src = srcLine; srcLine += srcStride; w = width; while (w && (unsigned long)dst & 7) { __m64 s = load8888 (*src); __m64 d = load8888 (*dst); *dst = store8888 (over_rev_non_pre (s, d)); w--; dst++; src++; } while (w >= 2) { ullong s0, s1; unsigned char a0, a1; __m64 d0, d1; s0 = *src; s1 = *(src + 1); a0 = (s0 >> 24); a1 = (s1 >> 24); if ((a0 & a1) == 0xFF) { d0 = invert_colors(load8888(s0)); d1 = invert_colors(load8888(s1)); *(__m64 *)dst = pack8888 (d0, d1); } else if (a0 | a1) { __m64 vdest = *(__m64 *)dst; d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0)); d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1)); *(__m64 *)dst = pack8888 (d0, d1); } w -= 2; dst += 2; src += 2; } while (w) { __m64 s = load8888 (*src); __m64 d = load8888 (*dst); *dst = store8888 (over_rev_non_pre (s, d)); w--; dst++; src++; } } _mm_empty();}voidfbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op, pixman_image_t * pSrc, pixman_image_t * pMask, pixman_image_t * pDst, int16_t xSrc, int16_t ySrc, int16_t xMask, int16_t yMask, int16_t xDst, int16_t yDst, uint16_t width, uint16_t height){ uint32_t src, srca; uint16_t *dstLine; uint32_t *maskLine; int dstStride, maskStride; __m64 vsrc, vsrca; CHECKPOINT(); fbComposeGetSolid(pSrc, src, pDst->bits.format); srca = src >> 24; if (srca == 0) return; fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1); fbComposeGetStart (pMask, xMask, yMask, uint32_t, maskStride, maskLine, 1); vsrc = load8888 (src); vsrca = expand_alpha (vsrc); while (height--) { int twidth = width; uint32_t *p = (uint32_t *)maskLine; uint16_t *q = (uint16_t *)dstLine; while (twidth && ((unsigned long)q & 7)) { uint32_t m = *(uint32_t *)p; if (m) { ullong d = *q; __m64 vdest = expand565 (M64(d), 0); vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0); *q = ULLONG(vdest); } twidth--; p++; q++; } while (twidth >= 4) { uint32_t m0, m1, m2, m3; m0 = *p; m1 = *(p + 1); m2 = *(p + 2); m3 = *(p + 3); if ((m0 | m1 | m2 | m3)) { __m64 vdest = *(__m64 *)q; vdest = pack565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0); vdest = pack565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1); vdest = pack565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2); vdest = pack565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3); *(__m64 *)q = vdest; } twidth -= 4; p += 4; q += 4; } while (twidth) { uint32_t m; m = *(uint32_t *)p; if (m) { ullong d = *q; __m64 vdest = expand565(M64(d), 0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -