mpv_sparc32.s
来自「支持SSL v2/v3, TLS, PKCS #5, PKCS #7, PKCS」· S 代码 · 共 1,817 行 · 第 1/5 页
S
1,817 行
/* * The contents of this file are subject to the Mozilla Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is an asm version of SPARC/VIS multiply and add function * * The Initial Developer of the Original Code is Sun Microsystems Inc. * Portions created by Sun Microsystems Inc. are * Copyright (C) 1999-2000 Sun Microsystems Inc. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the * terms of the GNU General Public License Version 2 or later (the * "GPL"), in which case the provisions of the GPL are applicable * instead of those above. If you wish to allow use of your * version of this file only under the terms of the GPL and not to * allow others to use your version of this file under the MPL, * indicate your decision by deleting the provisions above and * replace them with the notice and other provisions required by * the GPL. If you do not delete the provisions above, a recipient * may use your version of this file under either the MPL or the * GPL. * $Id: mpv_sparc32.S,v 1.1 2000/09/29 23:38:02 nelsonb%netscape.com Exp $ */ .section ".text",#alloc,#execinstr .file "mul_add.c" .section ".data",#alloc,#write .align 8mask_cnst: .word -2147483648 .word -2147483648 .type mask_cnst,#object .size mask_cnst,8 .section ".text",#alloc,#execinstr/* 000000 0 */ .align 8!! CONSTANT POOL! .L_const_seg_900000106:/* 000000 0 */ .word 1127219200,0/* 0x0008 */ .word 1105199103,-4194304/* 0x0010 */ .word -1008730112,0/* 0x0018 0 */ .align 4!! SUBROUTINE mul_add!! OFFSET SOURCE LINE LABEL INSTRUCTION .global mul_add mul_add:/* 000000 */ sethi %hi(0x2c00),%g1/* 0x0004 */ sethi %hi(mask_cnst),%g2/* 0x0008 */ xor %g1,-584,%g1/* 0x000c */ add %g2,%lo(mask_cnst),%g2/* 0x0010 */ save %sp,%g1,%sp .L900000190:/* 0x0014 */ call .+8/* 0x0018 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000190-.)),%g5! FILE mul_add.c! 1 !/* Copyright (C) 1999, Sun Microsystems, Inc. */! 3 !#include "vis_proto.h"! 5 !/***************************************************************/! 7 !typedef int t_s32;! 8 !typedef unsigned int t_u32;! 9 !#if defined(__sparcv9)! 10 !typedef long t_s64;! 11 !typedef unsigned long t_u64;! 12 !#else! 13 !typedef long long t_s64;! 14 !typedef unsigned long long t_u64;! 15 !#endif! 16 !typedef double t_d64;! 18 !/***************************************************************/! 20 !typedef union {! 21 ! t_d64 d64;! 22 ! struct {! 23 ! t_s32 i0;! 24 ! t_s32 i1;! 25 ! } i32s;! 26 !} d64_2_i32;! 28 !/***************************************************************/! 30 !#define BUFF_SIZE 256! 32 !#define A_BITS 21! 33 !#define A_MASK ((1 << A_BITS) - 1)! 35 !/***************************************************************/! 37 !static t_u64 mask_cnst[] = {! 38 ! 0x8000000080000000ull! 39 !};! 41 !/***************************************************************/! 43 !#define DEF_VARS(N) \! 44 ! t_d64 *py = (t_d64*)y; \! 45 ! t_d64 mask = *((t_d64*)mask_cnst); \! 46 ! t_d64 ca = (1u << 31) - 1; \! 47 ! t_d64 da = (t_d64)a; \! 48 ! t_s64 buff[N], s; \! 49 ! d64_2_i32 dy! 51 !/***************************************************************/! 53 !#define MUL_U32_S64_1(i) \! 54 ! dy.f32s.i0 = vis_fxnors(vis_read_hi(mask), ((t_f32*)y)[0]); \! 55 ! buff[0] = (ca - (t_d64)dy.i32s.i0) * da! 57 !#define MUL_U32_S64_1_D() \! 58 ! dy.f32s.i0 = vis_fxnors(vis_read_hi(mask), ((t_f32*)y)[0]); \! 59 ! d0 = ca - (t_d64)dy.i32s.i0; \! 60 ! buff[0] = (t_s64)(d0 * da); \! 61 ! buff[1] = (t_s64)(d0 * db); \! 63 !#define MUL_U32_S64_1_E(i) \! 64 ! dy.f32s.i0 = vis_fxnors(vis_read_hi(mask), ((t_f32*)y)[0]); \! 65 ! d0 = ca - (t_d64)dy.i32s.i0; \! 66 ! buff[0] = (t_s64)(d0 * da); \! 67 ! buff[1] = (t_s64)(d0 * db - (1ULL << 63))! 69 !/************ ***************************************************/! 71 !#define MUL_U32_S64_2(i) \! 72 ! dy.d64 = vis_fxnor(mask, py[i]); \! 73 ! buff[2*(i) ] = (ca - (t_d64)dy.i32s.i0) * da; \! 74 ! buff[2*(i)+1] = (ca - (t_d64)dy.i32s.i1) * da! 76 !#define MUL_U32_S64_2_D(i) \! 77 ! dy.d64 = vis_fxnor(mask, py[i]); \! 78 ! d0 = ca - (t_d64)dy.i32s.i0; \! 79 ! d1 = ca - (t_d64)dy.i32s.i1; \! 80 ! buff[4*(i) ] = (t_s64)(d0 * da); \! 81 ! buff[4*(i)+1] = (t_s64)(d0 * db); \! 82 ! buff[4*(i)+2] = (t_s64)(d1 * da); \! 83 ! buff[4*(i)+3] = (t_s64)(d1 * db)! 85 !#define MUL_U32_S64_2_E(i) \! 86 ! dy.d64 = vis_fxnor(mask, py[i]); \! 87 ! d0 = ca - (t_d64)dy.i32s.i0; \! 88 ! d1 = ca - (t_d64)dy.i32s.i1; \! 89 ! buff[4*(i) ] = (t_s64)(d0 * da); \! 90 ! buff[4*(i)+1] = (t_s64)(d0 * db - (1ULL << 63)); \! 91 ! buff[4*(i)+2] = (t_s64)(d1 * da); \! 92 ! buff[4*(i)+3] = (t_s64)(d1 * db - (1ULL << 63))! 94 !/***************************************************************/! 96 !#define ADD_S64_U32(i) \! 97 ! s = buff[i] + x[i] + c; \! 98 ! z[i] = s; \! 99 ! c = (s >> 32)! 101 !#define ADD_S64_U32_D(i) \! 102 ! s = buff[2*(i)] + buff[2*(i)+1] + x[i] + c; \! 103 ! z[i] = s; \! 104 ! c = (s >> 32)! 106 !#define ADD_S64_U32_E(i) \! 107 ! s = buff[2*(i)] + buff[2*(i)+1] + x[i] + uc + (1ULL << 63); \! 108 ! z[i] = s; \! 109 ! uc = ((t_u64)s >> 32)! 111 !/***************************************************************/! 113 !#define MUL_U32_S64_8(i) \! 114 ! MUL_U32_S64_2(i); \! 115 ! MUL_U32_S64_2(i+1); \! 116 ! MUL_U32_S64_2(i+2); \! 117 ! MUL_U32_S64_2(i+3)! 119 !#define MUL_U32_S64_D_8(i) \! 120 ! MUL_U32_S64_2_D(i); \! 121 ! MUL_U32_S64_2_D(i+1); \! 122 ! MUL_U32_S64_2_D(i+2); \! 123 ! MUL_U32_S64_2_D(i+3)! 125 !#define MUL_U32_S64_E_8(i) \! 126 ! MUL_U32_S64_2_E(i); \! 127 ! MUL_U32_S64_2_E(i+1); \! 128 ! MUL_U32_S64_2_E(i+2); \! 129 ! MUL_U32_S64_2_E(i+3)! 131 !/***************************************************************/! 133 !#define ADD_S64_U32_8(i) \! 134 ! ADD_S64_U32(i); \! 135 ! ADD_S64_U32(i+1); \! 136 ! ADD_S64_U32(i+2); \! 137 ! ADD_S64_U32(i+3); \! 138 ! ADD_S64_U32(i+4); \! 139 ! ADD_S64_U32(i+5); \! 140 ! ADD_S64_U32(i+6); \! 141 ! ADD_S64_U32(i+7)! 143 !#define ADD_S64_U32_D_8(i) \! 144 ! ADD_S64_U32_D(i); \! 145 ! ADD_S64_U32_D(i+1); \! 146 ! ADD_S64_U32_D(i+2); \! 147 ! ADD_S64_U32_D(i+3); \! 148 ! ADD_S64_U32_D(i+4); \! 149 ! ADD_S64_U32_D(i+5); \! 150 ! ADD_S64_U32_D(i+6); \! 151 ! ADD_S64_U32_D(i+7)! 153 !#define ADD_S64_U32_E_8(i) \! 154 ! ADD_S64_U32_E(i); \! 155 ! ADD_S64_U32_E(i+1); \! 156 ! ADD_S64_U32_E(i+2); \! 157 ! ADD_S64_U32_E(i+3); \! 158 ! ADD_S64_U32_E(i+4); \! 159 ! ADD_S64_U32_E(i+5); \! 160 ! ADD_S64_U32_E(i+6); \! 161 ! ADD_S64_U32_E(i+7)! 163 !/***************************************************************/! 165 !t_u32 mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)! 166 !{! 167 ! if (a < (1 << A_BITS)) {! 169 ! if (n == 8) {! 170 ! DEF_VARS(8);! 171 ! t_s32 c = 0;! 173 ! MUL_U32_S64_8(0);/* 0x001c 173 */ sethi %hi(.L_const_seg_900000106),%g3/* 0x0020 166 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000190-.)),%g5/* 0x0024 */ or %g0,%i4,%o0/* 0x0028 */ st %o0,[%fp+84]/* 0x002c */ add %g5,%o7,%o1/* 0x0030 */ or %g0,%i3,%o2/* 0x0034 */ ld [%o1+%g2],%g2/* 0x0038 173 */ add %g3,%lo(.L_const_seg_900000106),%g3/* 0x003c 167 */ sethi %hi(0x200000),%g4/* 0x0040 173 */ ld [%o1+%g3],%o1/* 0x0044 166 */ or %g0,%i0,%i4/* 0x0048 */ or %g0,%i2,%o7/* 0x004c */ ldd [%g2],%f30/* 0x0050 */ or %g0,%i1,%o4/* 0x0054 167 */ cmp %o0,%g4/* 0x0058 166 */ or %g0,%o2,%o3/* 0x005c 167 */ bcc,pn %icc,.L77000062/* 0x0060 */ or %g0,%i4,%o5/* 0x0064 169 */ cmp %o2,8/* 0x0068 */ bne,pn %icc,.L77000051/* 0x006c */ ld [%fp+84],%f7/* 0x0070 */ ldd [%o7],%f4/* 0x0074 170 */ ldd [%o1],%f8/* 0x0078 */ fxnor %f30,%f4,%f4/* 0x007c 173 */ ldd [%o1+8],%f14/* 0x0080 */ ldd [%o7+8],%f10/* 0x0084 */ fitod %f4,%f12/* 0x0088 */ ldd [%o7+16],%f16/* 0x008c */ fitod %f5,%f4/* 0x0090 */ ldd [%o7+24],%f18/* 0x0094 */ fxnor %f30,%f10,%f10! 174 ! ADD_S64_U32_8(0);/* 0x0098 174 */ ld [%i1],%g2/* 0x009c */ ld [%i1+4],%g3/* 0x00a0 */ fxnor %f30,%f16,%f16/* 0x00a4 173 */ fsubd %f14,%f4,%f4/* 0x00a8 174 */ ld [%i1+8],%g4/* 0x00ac */ ld [%i1+16],%o0/* 0x00b0 173 */ fitod %f16,%f20/* 0x00b4 174 */ ld [%i1+12],%g5/* 0x00b8 */ ld [%i1+20],%o1/* 0x00bc */ ld [%i1+24],%o2/* 0x00c0 170 */ fmovs %f8,%f6/* 0x00c4 174 */ ld [%i1+28],%o3/* 0x00c8 170 */ fsubd %f6,%f8,%f6/* 0x00cc 173 */ fsubd %f14,%f12,%f8/* 0x00d0 */ fitod %f10,%f12/* 0x00d4 */ fmuld %f4,%f6,%f4/* 0x00d8 */ fitod %f11,%f10/* 0x00dc */ fmuld %f8,%f6,%f8/* 0x00e0 */ fsubd %f14,%f12,%f12/* 0x00e4 */ fdtox %f4,%f4/* 0x00e8 */ std %f4,[%sp+360]/* 0x00ec */ fdtox %f8,%f8/* 0x00f0 */ std %f8,[%sp+368]/* 0x00f4 */ fmuld %f12,%f6,%f12/* 0x00f8 */ fsubd %f14,%f10,%f10/* 0x00fc */ fsubd %f14,%f20,%f4/* 0x0100 */ fitod %f17,%f8/* 0x0104 174 */ ldx [%sp+368],%o4/* 0x0108 */ fxnor %f30,%f18,%f16/* 0x010c 173 */ fmuld %f10,%f6,%f10/* 0x0110 */ fdtox %f12,%f12/* 0x0114 */ std %f12,[%sp+352]/* 0x0118 */ fmuld %f4,%f6,%f4/* 0x011c */ fitod %f16,%f18/* 0x0120 174 */ add %o4,%g2,%g2/* 0x0124 */ st %g2,[%i4]/* 0x0128 */ ldx [%sp+360],%o4/* 0x012c 173 */ fsubd %f14,%f8,%f8/* 0x0130 174 */ srax %g2,32,%o5/* 0x0134 173 */ fdtox %f10,%f10/* 0x0138 */ std %f10,[%sp+344]/* 0x013c */ fdtox %f4,%f4/* 0x0140 */ std %f4,[%sp+336]/* 0x0144 174 */ add %o4,%g3,%o4/* 0x0148 173 */ fitod %f17,%f12/* 0x014c 174 */ ldx [%sp+352],%g2/* 0x0150 */ add %o4,%o5,%g3/* 0x0154 173 */ fmuld %f8,%f6,%f8/* 0x0158 */ fsubd %f14,%f18,%f10/* 0x015c 174 */ st %g3,[%i4+4]/* 0x0160 */ srax %g3,32,%g3/* 0x0164 */ add %g2,%g4,%g4/* 0x0168 */ ldx [%sp+344],%g2/* 0x016c 173 */ fsubd %f14,%f12,%f4/* 0x0170 174 */ add %g4,%g3,%g3/* 0x0174 */ ldx [%sp+336],%g4/* 0x0178 173 */ fmuld %f10,%f6,%f10/* 0x017c */ fdtox %f8,%f8/* 0x0180 */ std %f8,[%sp+328]/* 0x0184 174 */ add %g4,%o0,%g4/* 0x0188 */ add %g2,%g5,%g2/* 0x018c */ st %g3,[%i4+8]/* 0x0190 173 */ fmuld %f4,%f6,%f4/* 0x0194 174 */ srax %g3,32,%o0/* 0x0198 */ ldx [%sp+328],%g5/* 0x019c 173 */ fdtox %f10,%f6/* 0x01a0 */ std %f6,[%sp+320]/* 0x01a4 174 */ add %g2,%o0,%g2/* 0x01a8 */ srax %g2,32,%g3/* 0x01ac */ st %g2,[%i4+12]/* 0x01b0 */ add %g5,%o1,%o1/* 0x01b4 173 */ fdtox %f4,%f4/* 0x01b8 */ std %f4,[%sp+312]/* 0x01bc 174 */ add %g4,%g3,%g3/* 0x01c0 */ srax %g3,32,%g4/* 0x01c4 */ st %g3,[%i4+16]/* 0x01c8 */ ldx [%sp+320],%o0/* 0x01cc */ add %o1,%g4,%g4/* 0x01d0 */ ldx [%sp+312],%g5/* 0x01d4 */ srax %g4,32,%g2/* 0x01d8 */ add %o0,%o2,%o2/* 0x01dc */ st %g4,[%i4+20]/* 0x01e0 */ add %o2,%g2,%g2/* 0x01e4 */ add %g5,%o3,%g5/* 0x01e8 */ st %g2,[%i4+24]/* 0x01ec */ srax %g2,32,%g3/* 0x01f0 */ add %g5,%g3,%g2/* 0x01f4 */ st %g2,[%i4+28]! 176 ! return c;/* 0x01f8 176 */ srax %g2,32,%i0/* 0x01fc */ ret ! Result = %i0/* 0x0200 */ restore %g0,%g0,%g0 .L77000051:! 178 ! } else if (n == 16) {/* 0x0204 178 */ cmp %o2,16/* 0x0208 */ bne,pn %icc,.L77000112/* 0x020c */ ld [%fp+84],%f7/* 0x0210 */ ldd [%o7],%f4! 179 ! DEF_VARS(16);/* 0x0214 179 */ ldd [%o1],%f8! 180 ! t_s32 c = 0;! 182 ! MUL_U32_S64_8(0);/* 0x0218 182 */ ldd [%o1+8],%f14/* 0x021c */ fxnor %f30,%f4,%f4/* 0x0220 */ ldd [%o7+8],%f10/* 0x0224 */ ldd [%o7+16],%f16/* 0x0228 */ fitod %f4,%f12/* 0x022c */ ldd [%o7+24],%f18/* 0x0230 */ fitod %f5,%f4/* 0x0234 */ ldd [%o7+32],%f20/* 0x0238 */ fxnor %f30,%f10,%f10/* 0x023c */ ldd [%o7+40],%f22/* 0x0240 */ fxnor %f30,%f16,%f16
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?