📄 bn_mul.h
字号:
/** * \file bn_mul.h *//* * Multiply source vector [s] with b, add result * to destination vector [d] and set carry c. * * Currently supports: * * . IA-32 (386+) . AMD64 / EM64T * . IA-32 (SSE2) . Motorola 68000 * . PowerPC, 32-bit . MicroBlaze * . PowerPC, 64-bit . TriCore * . SPARC v8 . ARM v3+ * . Alpha . MIPS32 * . C, longlong . C, generic */#ifndef XYSSL_BN_MUL_H#define XYSSL_BN_MUL_H#include "config.h"#if defined(XYSSL_HAVE_ASM)#if defined(__GNUC__)#if defined(__i386__)#define MULADDC_INIT \ asm( "movl %%ebx, %0 " : "=m" (t)); \ asm( "movl %0, %%esi " :: "m" (s)); \ asm( "movl %0, %%edi " :: "m" (d)); \ asm( "movl %0, %%ecx " :: "m" (c)); \ asm( "movl %0, %%ebx " :: "m" (b));#define MULADDC_CORE \ asm( "lodsl " ); \ asm( "mull %ebx " ); \ asm( "addl %ecx, %eax " ); \ asm( "adcl $0, %edx " ); \ asm( "addl (%edi), %eax " ); \ asm( "adcl $0, %edx " ); \ asm( "movl %edx, %ecx " ); \ asm( "stosl " );#define MULADDC_STOP \ asm( "movl %0, %%ebx " :: "m" (t)); \ asm( "movl %%ecx, %0 " : "=m" (c)); \ asm( "movl %%edi, %0 " : "=m" (d)); \ asm( "movl %%esi, %0 " : "=m" (s) :: \ "eax", "ecx", "edx", "esi", "edi" );#if defined(XYSSL_HAVE_SSE2)#define MULADDC_HUIT \ asm( "movd %ecx, %mm1 " ); \ asm( "movd %ebx, %mm0 " ); \ asm( "movd (%edi), %mm3 " ); \ asm( "paddq %mm3, %mm1 " ); \ asm( "movd (%esi), %mm2 " ); \ asm( "pmuludq %mm0, %mm2 " ); \ asm( "movd 4(%esi), %mm4 " ); \ asm( "pmuludq %mm0, %mm4 " ); \ asm( "movd 8(%esi), %mm6 " ); \ asm( "pmuludq %mm0, %mm6 " ); \ asm( "movd 12(%esi), %mm7 " ); \ asm( "pmuludq %mm0, %mm7 " ); \ asm( "paddq %mm2, %mm1 " ); \ asm( "movd 4(%edi), %mm3 " ); \ asm( "paddq %mm4, %mm3 " ); \ asm( "movd 8(%edi), %mm5 " ); \ asm( "paddq %mm6, %mm5 " ); \ asm( "movd 12(%edi), %mm4 " ); \ asm( "paddq %mm4, %mm7 " ); \ asm( "movd %mm1, (%edi) " ); \ asm( "movd 16(%esi), %mm2 " ); \ asm( "pmuludq %mm0, %mm2 " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "movd 20(%esi), %mm4 " ); \ asm( "pmuludq %mm0, %mm4 " ); \ asm( "paddq %mm3, %mm1 " ); \ asm( "movd 24(%esi), %mm6 " ); \ asm( "pmuludq %mm0, %mm6 " ); \ asm( "movd %mm1, 4(%edi) " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "movd 28(%esi), %mm3 " ); \ asm( "pmuludq %mm0, %mm3 " ); \ asm( "paddq %mm5, %mm1 " ); \ asm( "movd 16(%edi), %mm5 " ); \ asm( "paddq %mm5, %mm2 " ); \ asm( "movd %mm1, 8(%edi) " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "paddq %mm7, %mm1 " ); \ asm( "movd 20(%edi), %mm5 " ); \ asm( "paddq %mm5, %mm4 " ); \ asm( "movd %mm1, 12(%edi) " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "paddq %mm2, %mm1 " ); \ asm( "movd 24(%edi), %mm5 " ); \ asm( "paddq %mm5, %mm6 " ); \ asm( "movd %mm1, 16(%edi) " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "paddq %mm4, %mm1 " ); \ asm( "movd 28(%edi), %mm5 " ); \ asm( "paddq %mm5, %mm3 " ); \ asm( "movd %mm1, 20(%edi) " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "paddq %mm6, %mm1 " ); \ asm( "movd %mm1, 24(%edi) " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "paddq %mm3, %mm1 " ); \ asm( "movd %mm1, 28(%edi) " ); \ asm( "addl $32, %edi " ); \ asm( "addl $32, %esi " ); \ asm( "psrlq $32, %mm1 " ); \ asm( "movd %mm1, %ecx " );#endif /* SSE2 */#endif /* i386 */#if defined(__amd64__) || defined (__x86_64__)#define MULADDC_INIT \ asm( "movq %0, %%rsi " :: "m" (s)); \ asm( "movq %0, %%rdi " :: "m" (d)); \ asm( "movq %0, %%rcx " :: "m" (c)); \ asm( "movq %0, %%rbx " :: "m" (b)); \ asm( "xorq %r8, %r8 " );#define MULADDC_CORE \ asm( "movq (%rsi),%rax " ); \ asm( "mulq %rbx " ); \ asm( "addq $8, %rsi " ); \ asm( "addq %rcx, %rax " ); \ asm( "movq %r8, %rcx " ); \ asm( "adcq $0, %rdx " ); \ asm( "nop " ); \ asm( "addq %rax, (%rdi) " ); \ asm( "adcq %rdx, %rcx " ); \ asm( "addq $8, %rdi " );#define MULADDC_STOP \ asm( "movq %%rcx, %0 " : "=m" (c)); \ asm( "movq %%rdi, %0 " : "=m" (d)); \ asm( "movq %%rsi, %0 " : "=m" (s) :: \ "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" );#endif /* AMD64 */#if defined(__mc68020__) || defined(__mcpu32__)#define MULADDC_INIT \ asm( "movl %0, %%a2 " :: "m" (s)); \ asm( "movl %0, %%a3 " :: "m" (d)); \ asm( "movl %0, %%d3 " :: "m" (c)); \ asm( "movl %0, %%d2 " :: "m" (b)); \ asm( "moveq #0, %d0 " );#define MULADDC_CORE \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d4:%d1 " ); \ asm( "addl %d3, %d1 " ); \ asm( "addxl %d0, %d4 " ); \ asm( "moveq #0, %d3 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "addxl %d4, %d3 " );#define MULADDC_STOP \ asm( "movl %%d3, %0 " : "=m" (c)); \ asm( "movl %%a3, %0 " : "=m" (d)); \ asm( "movl %%a2, %0 " : "=m" (s) :: \ "d0", "d1", "d2", "d3", "d4", "a2", "a3" );#define MULADDC_HUIT \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d4:%d1 " ); \ asm( "addxl %d3, %d1 " ); \ asm( "addxl %d0, %d4 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d3:%d1 " ); \ asm( "addxl %d4, %d1 " ); \ asm( "addxl %d0, %d3 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d4:%d1 " ); \ asm( "addxl %d3, %d1 " ); \ asm( "addxl %d0, %d4 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d3:%d1 " ); \ asm( "addxl %d4, %d1 " ); \ asm( "addxl %d0, %d3 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d4:%d1 " ); \ asm( "addxl %d3, %d1 " ); \ asm( "addxl %d0, %d4 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d3:%d1 " ); \ asm( "addxl %d4, %d1 " ); \ asm( "addxl %d0, %d3 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d4:%d1 " ); \ asm( "addxl %d3, %d1 " ); \ asm( "addxl %d0, %d4 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "movel %a2@+, %d1 " ); \ asm( "mulul %d2, %d3:%d1 " ); \ asm( "addxl %d4, %d1 " ); \ asm( "addxl %d0, %d3 " ); \ asm( "addl %d1, %a3@+ " ); \ asm( "addxl %d0, %d3 " );#endif /* MC68000 */#if defined(__powerpc__) || defined(__ppc__)#if defined(__powerpc64__) || defined(__ppc64__)#if defined(__MACH__) && defined(__APPLE__)#define MULADDC_INIT \ asm( "ld r3, %0 " :: "m" (s)); \ asm( "ld r4, %0 " :: "m" (d)); \ asm( "ld r5, %0 " :: "m" (c)); \ asm( "ld r6, %0 " :: "m" (b)); \ asm( "addi r3, r3, -8 " ); \ asm( "addi r4, r4, -8 " ); \ asm( "addic r5, r5, 0 " );#define MULADDC_CORE \ asm( "ldu r7, 8(r3) " ); \ asm( "mulld r8, r7, r6 " ); \ asm( "mulhdu r9, r7, r6 " ); \ asm( "adde r8, r8, r5 " ); \ asm( "ld r7, 8(r4) " ); \ asm( "addze r5, r9 " ); \ asm( "addc r8, r8, r7 " ); \ asm( "stdu r8, 8(r4) " );#define MULADDC_STOP \ asm( "addze r5, r5 " ); \ asm( "addi r4, r4, 8 " ); \ asm( "addi r3, r3, 8 " ); \ asm( "std r5, %0 " : "=m" (c)); \ asm( "std r4, %0 " : "=m" (d)); \ asm( "std r3, %0 " : "=m" (s) :: \ "r3", "r4", "r5", "r6", "r7", "r8", "r9" );#else#define MULADDC_INIT \ asm( "ld %%r3, %0 " :: "m" (s)); \ asm( "ld %%r4, %0 " :: "m" (d)); \ asm( "ld %%r5, %0 " :: "m" (c)); \ asm( "ld %%r6, %0 " :: "m" (b)); \ asm( "addi %r3, %r3, -8 " ); \ asm( "addi %r4, %r4, -8 " ); \ asm( "addic %r5, %r5, 0 " );#define MULADDC_CORE \ asm( "ldu %r7, 8(%r3) " ); \ asm( "mulld %r8, %r7, %r6 " ); \ asm( "mulhdu %r9, %r7, %r6 " ); \ asm( "adde %r8, %r8, %r5 " ); \ asm( "ld %r7, 8(%r4) " ); \ asm( "addze %r5, %r9 " ); \ asm( "addc %r8, %r8, %r7 " ); \ asm( "stdu %r8, 8(%r4) " );#define MULADDC_STOP \ asm( "addze %r5, %r5 " ); \ asm( "addi %r4, %r4, 8 " ); \ asm( "addi %r3, %r3, 8 " ); \ asm( "std %%r5, %0 " : "=m" (c)); \ asm( "std %%r4, %0 " : "=m" (d)); \ asm( "std %%r3, %0 " : "=m" (s) :: \ "r3", "r4", "r5", "r6", "r7", "r8", "r9" );#endif#else /* PPC32 */#if defined(__MACH__) && defined(__APPLE__)#define MULADDC_INIT \ asm( "lwz r3, %0 " :: "m" (s)); \ asm( "lwz r4, %0 " :: "m" (d)); \ asm( "lwz r5, %0 " :: "m" (c)); \ asm( "lwz r6, %0 " :: "m" (b)); \ asm( "addi r3, r3, -4 " ); \ asm( "addi r4, r4, -4 " ); \ asm( "addic r5, r5, 0 " );#define MULADDC_CORE \ asm( "lwzu r7, 4(r3) " ); \ asm( "mullw r8, r7, r6 " ); \ asm( "mulhwu r9, r7, r6 " ); \ asm( "adde r8, r8, r5 " ); \ asm( "lwz r7, 4(r4) " ); \ asm( "addze r5, r9 " ); \ asm( "addc r8, r8, r7 " ); \ asm( "stwu r8, 4(r4) " );#define MULADDC_STOP \ asm( "addze r5, r5 " ); \ asm( "addi r4, r4, 4 " ); \ asm( "addi r3, r3, 4 " ); \ asm( "stw r5, %0 " : "=m" (c)); \ asm( "stw r4, %0 " : "=m" (d)); \ asm( "stw r3, %0 " : "=m" (s) :: \ "r3", "r4", "r5", "r6", "r7", "r8", "r9" );#else#define MULADDC_INIT \ asm( "lwz %%r3, %0 " :: "m" (s)); \ asm( "lwz %%r4, %0 " :: "m" (d)); \ asm( "lwz %%r5, %0 " :: "m" (c)); \ asm( "lwz %%r6, %0 " :: "m" (b)); \ asm( "addi %r3, %r3, -4 " ); \ asm( "addi %r4, %r4, -4 " ); \ asm( "addic %r5, %r5, 0 " );#define MULADDC_CORE \ asm( "lwzu %r7, 4(%r3) " ); \ asm( "mullw %r8, %r7, %r6 " ); \ asm( "mulhwu %r9, %r7, %r6 " ); \ asm( "adde %r8, %r8, %r5 " ); \ asm( "lwz %r7, 4(%r4) " ); \ asm( "addze %r5, %r9 " ); \ asm( "addc %r8, %r8, %r7 " ); \ asm( "stwu %r8, 4(%r4) " );#define MULADDC_STOP \ asm( "addze %r5, %r5 " ); \ asm( "addi %r4, %r4, 4 " ); \ asm( "addi %r3, %r3, 4 " ); \ asm( "stw %%r5, %0 " : "=m" (c)); \ asm( "stw %%r4, %0 " : "=m" (d)); \ asm( "stw %%r3, %0 " : "=m" (s) :: \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -