mpv_sparc32.s

来自「支持SSL v2/v3, TLS, PKCS #5, PKCS #7, PKCS」· S 代码 · 共 1,817 行 · 第 1/5 页

S
1,817
字号
/* * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ *  * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. *  * The Original Code is an asm version of SPARC/VIS multiply and add function * * The Initial Developer of the Original Code is Sun Microsystems Inc. * Portions created by Sun Microsystems Inc. are  * Copyright (C) 1999-2000 Sun Microsystems Inc.  All Rights Reserved. *  * Contributor(s): *  * Alternatively, the contents of this file may be used under the * terms of the GNU General Public License Version 2 or later (the * "GPL"), in which case the provisions of the GPL are applicable  * instead of those above.	If you wish to allow use of your  * version of this file only under the terms of the GPL and not to * allow others to use your version of this file under the MPL, * indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by * the GPL.  If you do not delete the provisions above, a recipient * may use your version of this file under either the MPL or the * GPL. *  $Id: mpv_sparc32.S,v 1.1 2000/09/29 23:38:02 nelsonb%netscape.com Exp $ */	.section	".text",#alloc,#execinstr	.file	"mul_add.c"	.section	".data",#alloc,#write	.align	8mask_cnst:	.word	-2147483648	.word	-2147483648	.type	mask_cnst,#object	.size	mask_cnst,8	.section	".text",#alloc,#execinstr/* 000000	   0 */		.align	8!! CONSTANT POOL!                       .L_const_seg_900000106:/* 000000	   0 */		.word	1127219200,0/* 0x0008	     */		.word	1105199103,-4194304/* 0x0010	     */		.word	-1008730112,0/* 0x0018	   0 */		.align	4!! SUBROUTINE mul_add!! OFFSET    SOURCE LINE	LABEL	INSTRUCTION                       	.global mul_add                       mul_add:/* 000000	     */		sethi	%hi(0x2c00),%g1/* 0x0004	     */		sethi	%hi(mask_cnst),%g2/* 0x0008	     */		xor	%g1,-584,%g1/* 0x000c	     */		add	%g2,%lo(mask_cnst),%g2/* 0x0010	     */		save	%sp,%g1,%sp                       .L900000190:/* 0x0014	     */		call	.+8/* 0x0018	     */		sethi	/*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000190-.)),%g5! FILE mul_add.c!    1		      !/* Copyright (C) 1999, Sun Microsystems, Inc. */!    3		      !#include "vis_proto.h"!    5		      !/***************************************************************/!    7		      !typedef  int                t_s32;!    8		      !typedef  unsigned int       t_u32;!    9		      !#if defined(__sparcv9)!   10		      !typedef  long               t_s64;!   11		      !typedef  unsigned long      t_u64;!   12		      !#else!   13		      !typedef  long long          t_s64;!   14		      !typedef  unsigned long long t_u64;!   15		      !#endif!   16		      !typedef  double             t_d64;!   18		      !/***************************************************************/!   20		      !typedef union {!   21		      !  t_d64 d64;!   22		      !  struct {!   23		      !    t_s32 i0;!   24		      !    t_s32 i1;!   25		      !  } i32s;!   26		      !} d64_2_i32;!   28		      !/***************************************************************/!   30		      !#define BUFF_SIZE  256!   32		      !#define A_BITS  21!   33		      !#define A_MASK  ((1 << A_BITS) - 1)!   35		      !/***************************************************************/!   37		      !static t_u64 mask_cnst[] = {!   38		      !  0x8000000080000000ull!   39		      !};!   41		      !/***************************************************************/!   43		      !#define DEF_VARS(N)                     \!   44		      !  t_d64 *py = (t_d64*)y;                \!   45		      !  t_d64 mask = *((t_d64*)mask_cnst);    \!   46		      !  t_d64 ca = (1u << 31) - 1;            \!   47		      !  t_d64 da = (t_d64)a;                  \!   48		      !  t_s64 buff[N], s;                     \!   49		      !  d64_2_i32 dy!   51		      !/***************************************************************/!   53		      !#define MUL_U32_S64_1(i)                                        \!   54		      !  dy.f32s.i0 = vis_fxnors(vis_read_hi(mask), ((t_f32*)y)[0]);   \!   55		      !  buff[0] = (ca - (t_d64)dy.i32s.i0) * da!   57		      !#define MUL_U32_S64_1_D()                                       \!   58		      !  dy.f32s.i0 = vis_fxnors(vis_read_hi(mask), ((t_f32*)y)[0]);   \!   59		      !  d0 = ca - (t_d64)dy.i32s.i0;                                  \!   60		      !  buff[0] = (t_s64)(d0 * da);                                   \!   61		      !  buff[1] = (t_s64)(d0 * db);                                   \!   63		      !#define MUL_U32_S64_1_E(i)                                      \!   64		      !  dy.f32s.i0 = vis_fxnors(vis_read_hi(mask), ((t_f32*)y)[0]);   \!   65		      !  d0 = ca - (t_d64)dy.i32s.i0;                                  \!   66		      !  buff[0] = (t_s64)(d0 * da);                                   \!   67		      !  buff[1] = (t_s64)(d0 * db - (1ULL << 63))!   69		      !/************ ***************************************************/!   71		      !#define MUL_U32_S64_2(i)                                \!   72		      !  dy.d64 = vis_fxnor(mask, py[i]);                      \!   73		      !  buff[2*(i)  ] = (ca - (t_d64)dy.i32s.i0) * da;        \!   74		      !  buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da!   76		      !#define MUL_U32_S64_2_D(i)              \!   77		      !  dy.d64 = vis_fxnor(mask, py[i]);      \!   78		      !  d0 = ca - (t_d64)dy.i32s.i0;          \!   79		      !  d1 = ca - (t_d64)dy.i32s.i1;          \!   80		      !  buff[4*(i)  ] = (t_s64)(d0 * da);     \!   81		      !  buff[4*(i)+1] = (t_s64)(d0 * db);     \!   82		      !  buff[4*(i)+2] = (t_s64)(d1 * da);     \!   83		      !  buff[4*(i)+3] = (t_s64)(d1 * db)!   85		      !#define MUL_U32_S64_2_E(i)                              \!   86		      !  dy.d64 = vis_fxnor(mask, py[i]);                      \!   87		      !  d0 = ca - (t_d64)dy.i32s.i0;                          \!   88		      !  d1 = ca - (t_d64)dy.i32s.i1;                          \!   89		      !  buff[4*(i)  ] = (t_s64)(d0 * da);                     \!   90		      !  buff[4*(i)+1] = (t_s64)(d0 * db - (1ULL << 63));      \!   91		      !  buff[4*(i)+2] = (t_s64)(d1 * da);                     \!   92		      !  buff[4*(i)+3] = (t_s64)(d1 * db - (1ULL << 63))!   94		      !/***************************************************************/!   96		      !#define ADD_S64_U32(i)          \!   97		      !  s = buff[i] + x[i] + c;       \!   98		      !  z[i] = s;                     \!   99		      !  c = (s >> 32)!  101		      !#define ADD_S64_U32_D(i)                        \!  102		      !  s = buff[2*(i)] + buff[2*(i)+1] + x[i] + c;   \!  103		      !  z[i] = s;                                     \!  104		      !  c = (s >> 32)!  106		      !#define ADD_S64_U32_E(i)                                        \!  107		      !  s =  buff[2*(i)] + buff[2*(i)+1] + x[i] + uc + (1ULL << 63);  \!  108		      !  z[i] = s;                                                     \!  109		      !  uc = ((t_u64)s >> 32)!  111		      !/***************************************************************/!  113		      !#define MUL_U32_S64_8(i)        \!  114		      !  MUL_U32_S64_2(i);             \!  115		      !  MUL_U32_S64_2(i+1);           \!  116		      !  MUL_U32_S64_2(i+2);           \!  117		      !  MUL_U32_S64_2(i+3)!  119		      !#define MUL_U32_S64_D_8(i)      \!  120		      !  MUL_U32_S64_2_D(i);           \!  121		      !  MUL_U32_S64_2_D(i+1);         \!  122		      !  MUL_U32_S64_2_D(i+2);         \!  123		      !  MUL_U32_S64_2_D(i+3)!  125		      !#define MUL_U32_S64_E_8(i)      \!  126		      !  MUL_U32_S64_2_E(i);           \!  127		      !  MUL_U32_S64_2_E(i+1);         \!  128		      !  MUL_U32_S64_2_E(i+2);         \!  129		      !  MUL_U32_S64_2_E(i+3)!  131		      !/***************************************************************/!  133		      !#define ADD_S64_U32_8(i)        \!  134		      !  ADD_S64_U32(i);               \!  135		      !  ADD_S64_U32(i+1);             \!  136		      !  ADD_S64_U32(i+2);             \!  137		      !  ADD_S64_U32(i+3);             \!  138		      !  ADD_S64_U32(i+4);             \!  139		      !  ADD_S64_U32(i+5);             \!  140		      !  ADD_S64_U32(i+6);             \!  141		      !  ADD_S64_U32(i+7)!  143		      !#define ADD_S64_U32_D_8(i)      \!  144		      !  ADD_S64_U32_D(i);             \!  145		      !  ADD_S64_U32_D(i+1);           \!  146		      !  ADD_S64_U32_D(i+2);           \!  147		      !  ADD_S64_U32_D(i+3);           \!  148		      !  ADD_S64_U32_D(i+4);           \!  149		      !  ADD_S64_U32_D(i+5);           \!  150		      !  ADD_S64_U32_D(i+6);           \!  151		      !  ADD_S64_U32_D(i+7)!  153		      !#define ADD_S64_U32_E_8(i)      \!  154		      !  ADD_S64_U32_E(i);             \!  155		      !  ADD_S64_U32_E(i+1);           \!  156		      !  ADD_S64_U32_E(i+2);           \!  157		      !  ADD_S64_U32_E(i+3);           \!  158		      !  ADD_S64_U32_E(i+4);           \!  159		      !  ADD_S64_U32_E(i+5);           \!  160		      !  ADD_S64_U32_E(i+6);           \!  161		      !  ADD_S64_U32_E(i+7)!  163		      !/***************************************************************/!  165		      !t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)!  166		      !{!  167		      !  if (a < (1 << A_BITS)) {!  169		      !    if (n == 8) {!  170		      !      DEF_VARS(8);!  171		      !      t_s32 c = 0;!  173		      !      MUL_U32_S64_8(0);/* 0x001c	 173 */		sethi	%hi(.L_const_seg_900000106),%g3/* 0x0020	 166 */		add	%g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000190-.)),%g5/* 0x0024	     */		or	%g0,%i4,%o0/* 0x0028	     */		st	%o0,[%fp+84]/* 0x002c	     */		add	%g5,%o7,%o1/* 0x0030	     */		or	%g0,%i3,%o2/* 0x0034	     */		ld	[%o1+%g2],%g2/* 0x0038	 173 */		add	%g3,%lo(.L_const_seg_900000106),%g3/* 0x003c	 167 */		sethi	%hi(0x200000),%g4/* 0x0040	 173 */		ld	[%o1+%g3],%o1/* 0x0044	 166 */		or	%g0,%i0,%i4/* 0x0048	     */		or	%g0,%i2,%o7/* 0x004c	     */		ldd	[%g2],%f30/* 0x0050	     */		or	%g0,%i1,%o4/* 0x0054	 167 */		cmp	%o0,%g4/* 0x0058	 166 */		or	%g0,%o2,%o3/* 0x005c	 167 */		bcc,pn	%icc,.L77000062/* 0x0060	     */		or	%g0,%i4,%o5/* 0x0064	 169 */		cmp	%o2,8/* 0x0068	     */		bne,pn	%icc,.L77000051/* 0x006c	     */		ld	[%fp+84],%f7/* 0x0070	     */		ldd	[%o7],%f4/* 0x0074	 170 */		ldd	[%o1],%f8/* 0x0078	     */		fxnor	%f30,%f4,%f4/* 0x007c	 173 */		ldd	[%o1+8],%f14/* 0x0080	     */		ldd	[%o7+8],%f10/* 0x0084	     */		fitod	%f4,%f12/* 0x0088	     */		ldd	[%o7+16],%f16/* 0x008c	     */		fitod	%f5,%f4/* 0x0090	     */		ldd	[%o7+24],%f18/* 0x0094	     */		fxnor	%f30,%f10,%f10!  174		      !      ADD_S64_U32_8(0);/* 0x0098	 174 */		ld	[%i1],%g2/* 0x009c	     */		ld	[%i1+4],%g3/* 0x00a0	     */		fxnor	%f30,%f16,%f16/* 0x00a4	 173 */		fsubd	%f14,%f4,%f4/* 0x00a8	 174 */		ld	[%i1+8],%g4/* 0x00ac	     */		ld	[%i1+16],%o0/* 0x00b0	 173 */		fitod	%f16,%f20/* 0x00b4	 174 */		ld	[%i1+12],%g5/* 0x00b8	     */		ld	[%i1+20],%o1/* 0x00bc	     */		ld	[%i1+24],%o2/* 0x00c0	 170 */		fmovs	%f8,%f6/* 0x00c4	 174 */		ld	[%i1+28],%o3/* 0x00c8	 170 */		fsubd	%f6,%f8,%f6/* 0x00cc	 173 */		fsubd	%f14,%f12,%f8/* 0x00d0	     */		fitod	%f10,%f12/* 0x00d4	     */		fmuld	%f4,%f6,%f4/* 0x00d8	     */		fitod	%f11,%f10/* 0x00dc	     */		fmuld	%f8,%f6,%f8/* 0x00e0	     */		fsubd	%f14,%f12,%f12/* 0x00e4	     */		fdtox	%f4,%f4/* 0x00e8	     */		std	%f4,[%sp+360]/* 0x00ec	     */		fdtox	%f8,%f8/* 0x00f0	     */		std	%f8,[%sp+368]/* 0x00f4	     */		fmuld	%f12,%f6,%f12/* 0x00f8	     */		fsubd	%f14,%f10,%f10/* 0x00fc	     */		fsubd	%f14,%f20,%f4/* 0x0100	     */		fitod	%f17,%f8/* 0x0104	 174 */		ldx	[%sp+368],%o4/* 0x0108	     */		fxnor	%f30,%f18,%f16/* 0x010c	 173 */		fmuld	%f10,%f6,%f10/* 0x0110	     */		fdtox	%f12,%f12/* 0x0114	     */		std	%f12,[%sp+352]/* 0x0118	     */		fmuld	%f4,%f6,%f4/* 0x011c	     */		fitod	%f16,%f18/* 0x0120	 174 */		add	%o4,%g2,%g2/* 0x0124	     */		st	%g2,[%i4]/* 0x0128	     */		ldx	[%sp+360],%o4/* 0x012c	 173 */		fsubd	%f14,%f8,%f8/* 0x0130	 174 */		srax	%g2,32,%o5/* 0x0134	 173 */		fdtox	%f10,%f10/* 0x0138	     */		std	%f10,[%sp+344]/* 0x013c	     */		fdtox	%f4,%f4/* 0x0140	     */		std	%f4,[%sp+336]/* 0x0144	 174 */		add	%o4,%g3,%o4/* 0x0148	 173 */		fitod	%f17,%f12/* 0x014c	 174 */		ldx	[%sp+352],%g2/* 0x0150	     */		add	%o4,%o5,%g3/* 0x0154	 173 */		fmuld	%f8,%f6,%f8/* 0x0158	     */		fsubd	%f14,%f18,%f10/* 0x015c	 174 */		st	%g3,[%i4+4]/* 0x0160	     */		srax	%g3,32,%g3/* 0x0164	     */		add	%g2,%g4,%g4/* 0x0168	     */		ldx	[%sp+344],%g2/* 0x016c	 173 */		fsubd	%f14,%f12,%f4/* 0x0170	 174 */		add	%g4,%g3,%g3/* 0x0174	     */		ldx	[%sp+336],%g4/* 0x0178	 173 */		fmuld	%f10,%f6,%f10/* 0x017c	     */		fdtox	%f8,%f8/* 0x0180	     */		std	%f8,[%sp+328]/* 0x0184	 174 */		add	%g4,%o0,%g4/* 0x0188	     */		add	%g2,%g5,%g2/* 0x018c	     */		st	%g3,[%i4+8]/* 0x0190	 173 */		fmuld	%f4,%f6,%f4/* 0x0194	 174 */		srax	%g3,32,%o0/* 0x0198	     */		ldx	[%sp+328],%g5/* 0x019c	 173 */		fdtox	%f10,%f6/* 0x01a0	     */		std	%f6,[%sp+320]/* 0x01a4	 174 */		add	%g2,%o0,%g2/* 0x01a8	     */		srax	%g2,32,%g3/* 0x01ac	     */		st	%g2,[%i4+12]/* 0x01b0	     */		add	%g5,%o1,%o1/* 0x01b4	 173 */		fdtox	%f4,%f4/* 0x01b8	     */		std	%f4,[%sp+312]/* 0x01bc	 174 */		add	%g4,%g3,%g3/* 0x01c0	     */		srax	%g3,32,%g4/* 0x01c4	     */		st	%g3,[%i4+16]/* 0x01c8	     */		ldx	[%sp+320],%o0/* 0x01cc	     */		add	%o1,%g4,%g4/* 0x01d0	     */		ldx	[%sp+312],%g5/* 0x01d4	     */		srax	%g4,32,%g2/* 0x01d8	     */		add	%o0,%o2,%o2/* 0x01dc	     */		st	%g4,[%i4+20]/* 0x01e0	     */		add	%o2,%g2,%g2/* 0x01e4	     */		add	%g5,%o3,%g5/* 0x01e8	     */		st	%g2,[%i4+24]/* 0x01ec	     */		srax	%g2,32,%g3/* 0x01f0	     */		add	%g5,%g3,%g2/* 0x01f4	     */		st	%g2,[%i4+28]!  176		      !      return c;/* 0x01f8	 176 */		srax	%g2,32,%i0/* 0x01fc	     */		ret	! Result =  %i0/* 0x0200	     */		restore	%g0,%g0,%g0                       .L77000051:!  178		      !    } else if (n == 16) {/* 0x0204	 178 */		cmp	%o2,16/* 0x0208	     */		bne,pn	%icc,.L77000112/* 0x020c	     */		ld	[%fp+84],%f7/* 0x0210	     */		ldd	[%o7],%f4!  179		      !      DEF_VARS(16);/* 0x0214	 179 */		ldd	[%o1],%f8!  180		      !      t_s32 c = 0;!  182		      !      MUL_U32_S64_8(0);/* 0x0218	 182 */		ldd	[%o1+8],%f14/* 0x021c	     */		fxnor	%f30,%f4,%f4/* 0x0220	     */		ldd	[%o7+8],%f10/* 0x0224	     */		ldd	[%o7+16],%f16/* 0x0228	     */		fitod	%f4,%f12/* 0x022c	     */		ldd	[%o7+24],%f18/* 0x0230	     */		fitod	%f5,%f4/* 0x0234	     */		ldd	[%o7+32],%f20/* 0x0238	     */		fxnor	%f30,%f10,%f10/* 0x023c	     */		ldd	[%o7+40],%f22/* 0x0240	     */		fxnor	%f30,%f16,%f16

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?