📄 qdrawhelper_x86.cpp
字号:
/******************************************************************************** Copyright (C) 1992-2006 Trolltech ASA. All rights reserved.**** This file is part of the QtGui module of the Qt Toolkit.**** This file may be used under the terms of the GNU General Public** License version 2.0 as published by the Free Software Foundation** and appearing in the file LICENSE.GPL included in the packaging of** this file. Please review the following information to ensure GNU** General Public Licensing requirements will be met:** http://www.trolltech.com/products/qt/opensource.html**** If you are unsure which license is appropriate for your use, please** review the following information:** http://www.trolltech.com/products/qt/licensing.html or contact the** sales department at sales@trolltech.com.**** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.******************************************************************************/#include <private/qdrawhelper_p.h>#include <private/qpaintengine_raster_p.h>#if (defined(QT_HAVE_SSE) && (!defined(__APPLE__) || defined(__i386__))) || defined(QT_HAVE_IWMMXT)#if defined(Q_CC_GNU) && !defined(Q_CC_INTEL)# include <mmintrin.h># if !defined(__IWMMXT__)# include <xmmintrin.h># endif// It seems that gcc (3.1 <= x < 3.4) segfaults when casting the ULL immediate values to __m64.// A workaround was proposed here: http://gcc.gnu.org/ml/gcc-prs/2002-07/msg00329.html# if !defined(Q_CC_INTEL) && __GNUC__ == 3 && __GNUC_MINOR__ >= 1 && __GNUC_MINOR__ < 4# define C_FF volatile unsigned long long mmx_0x00ff_ull = 0x00ff00ff00ff00ffULL; \ const m64 mmx_0x00ff = (__m64)mmx_0x00ff_ull# define C_80 volatile unsigned long long mmx_0x0080_ull = 0x0080008000800080ULL; \ const m64 mmx_0x0080 = (__m64)mmx_0x0080_ull# else# define C_FF const m64 mmx_0x00ff = (__m64)0x00ff00ff00ff00ffULL# define C_80 const m64 mmx_0x0080 = (__m64)0x0080008000800080ULL# endif# define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()#elif defined(Q_CC_INTEL) || defined(Q_OS_WIN)# include <mmintrin.h># include <xmmintrin.h># define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff)# define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80)# define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()# if defined(Q_OS_WIN)# pragma warning(disable: 4799) // No EMMS at end of function# endif#endiftypedef __m64 m64;#ifndef _MM_SHUFFLE#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))#endifstatic inline m64 alpha(m64 x){ return _mm_shuffle_pi16 (x, _MM_SHUFFLE(3, 3, 3, 3));}static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff){ return _mm_xor_si64(x, mmx_0x00ff);}#define negate(x) _negate(x, mmx_0x00ff)static inline m64 add(const m64 &a, const m64 &b){ return _mm_adds_pu16 (a, b);}static inline m64 _byte_mul(const m64 &a, const m64 &b, const m64 &mmx_0x0080){ m64 res = _mm_mullo_pi16(a, b); res = _mm_adds_pu16(res, mmx_0x0080); res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8)); return _mm_srli_pi16(res, 8);}#define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080)static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a, const m64 &y, const m64 &b) { m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b)); return _mm_srli_pi16(res, 8);}static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a, const m64 &y, const m64 &b, const m64 &mmx_0x0080) { m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b)); res = _mm_adds_pu16(res, mmx_0x0080); res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8)); return _mm_srli_pi16(res, 8);}#define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080)static inline m64 _premul(m64 x, const m64 &mmx_0x0080) { m64 a = alpha(x); return _byte_mul(x, a, mmx_0x0080);}#define premul(x) _premul(x, mmx_0x0080)static inline m64 _load(uint x, const m64 &mmx_0x0000){ return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);}#define load(x) _load(x, mmx_0x0000)static inline m64 _load_alpha(uint x, const m64 &mmx_0x0000){ m64 t = _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000); return _mm_shuffle_pi16 (t, _MM_SHUFFLE(0, 0, 0, 0));}#define load_alpha(x) _load_alpha(x, mmx_0x0000)static inline uint _store(const m64 &x, const m64 &mmx_0x0000){ return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000));}#define store(x) _store(x, mmx_0x0000)#if defined(__IWMMXT__)static inline void end_mmx() {}#elsestatic inline void end_mmx(){ _mm_empty();}#endif/* result = 0 d = d * cia*/static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha){ if (!length) return; if (const_alpha == 255) { QT_MEMFILL_UINT(dest, length, 0); } else { C_FF; C_80; C_00; m64 ia = negate(load_alpha(const_alpha)); for (int i = 0; i < length; ++i) { dest[i] = store(byte_mul(load(dest[i]), ia)); } } end_mmx();}static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha){ if (const_alpha == 255) { QT_MEMFILL_UINT(dest, length, 0); } else { C_FF; C_80; C_00; m64 ia = negate(load_alpha(const_alpha)); for (int i = 0; i < length; ++i) dest[i] = store(byte_mul(load(dest[i]), ia)); } end_mmx();}/* result = s dest = s * ca + d * cia*/static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha){ if (const_alpha == 255) { QT_MEMFILL_UINT(dest, length, src); } else { C_FF; C_80; C_00; const m64 a = load_alpha(const_alpha); const m64 ia = negate(a); const m64 s = byte_mul(load(src), a); for (int i = 0; i < length; ++i) { dest[i] = store(add(s, byte_mul(load(dest[i]), ia))); } end_mmx(); }}static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha){ if (const_alpha == 255) { ::memcpy(dest, src, length * sizeof(uint)); } else { C_FF; C_80; C_00; const m64 a = load_alpha(const_alpha); const m64 ia = negate(a); for (int i = 0; i < length; ++i) dest[i] = store(interpolate_pixel_255(load(src[i]), a, load(dest[i]), ia)); } end_mmx();}/* result = s + d * sia dest = (s + d * sia) * ca + d * cia = s * ca + d * (sia * ca + cia) = s * ca + d * (1 - sa*ca)*/static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha){ if ((const_alpha & qAlpha(src)) == 255) { QT_MEMFILL_UINT(dest, length, src); } else { C_FF; C_80; C_00; m64 s = load(src); if (const_alpha != 255) { m64 ca = load_alpha(const_alpha); s = byte_mul(s, ca); } m64 a = negate(alpha(s)); for (int i = 0; i < length; ++i) dest[i] = store(add(s, byte_mul(load(dest[i]), a))); end_mmx(); }}static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha){ C_FF; C_80; C_00; if (const_alpha == 255) { for (int i = 0; i < length; ++i) { m64 s = load(src[i]); m64 ia = negate(alpha(s)); dest[i] = store(add(s, byte_mul(load(dest[i]), ia))); } } else { m64 ca = load_alpha(const_alpha); for (int i = 0; i < length; ++i) { m64 s = byte_mul(load(src[i]), ca); m64 ia = negate(alpha(s)); dest[i] = store(add(s, byte_mul(load(dest[i]), ia))); } } end_mmx();}/* result = d + s * dia dest = (d + s * dia) * ca + d * cia = d + s * dia * ca*/static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha){ C_FF; C_80; C_00; m64 s = load(src); if (const_alpha != 255) s = byte_mul(s, load_alpha(const_alpha)); for (int i = 0; i < length; ++i) { m64 d = load(dest[i]); m64 dia = negate(alpha(d)); dest[i] = store(add(d, byte_mul(s, dia))); } end_mmx();}static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha){ C_FF; C_80; C_00; if (const_alpha == 255) { for (int i = 0; i < length; ++i) { m64 d = load(dest[i]); m64 ia = negate(alpha(d)); dest[i] = store(add(d, byte_mul(load(src[i]), ia))); } } else { m64 ca = load_alpha(const_alpha); for (int i = 0; i < length; ++i) { m64 d = load(dest[i]); m64 dia = negate(alpha(d)); dia = byte_mul(dia, ca); dest[i] = store(add(d, byte_mul(load(src[i]), dia))); } } end_mmx();}/* result = s * da dest = s * da * ca + d * cia*/static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha){ C_80; C_00; if (const_alpha == 255) { m64 s = load(src); for (int i = 0; i < length; ++i) { m64 da = alpha(load(dest[i])); dest[i] = store(byte_mul(s, da)); } } else { C_FF; m64 s = load(src); m64 ca = load_alpha(const_alpha); s = byte_mul(s, ca); m64 cia = negate(ca); for (int i = 0; i < length; ++i) { m64 d = load(dest[i]); dest[i] = store(interpolate_pixel_255(s, alpha(d), d, cia)); } } end_mmx();}static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha){ C_FF; C_80; C_00; if (const_alpha == 255) { for (int i = 0; i < length; ++i) { m64 a = alpha(load(dest[i])); dest[i] = store(byte_mul(load(src[i]), a)); } } else { m64 ca = load_alpha(const_alpha); m64 cia = negate(ca); for (int i = 0; i < length; ++i) { m64 d = load(dest[i]); m64 da = byte_mul(alpha(d), ca); dest[i] = store(interpolate_pixel_255(load(src[i]), da, d, cia)); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -