📄 xor.h
字号:
/* * include/asm-i386/xor.h * * Optimized RAID-5 checksumming functions for MMX and SSE. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * You should have received a copy of the GNU General Public License * (for example /usr/src/linux/COPYING); if not, write to the Free * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * High-speed RAID5 checksumming functions utilizing MMX instructions. * Copyright (C) 1998 Ingo Molnar. */#define FPU_SAVE \ do { \ if (!(current->flags & PF_USEDFPU)) \ __asm__ __volatile__ (" clts;\n"); \ __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0])); \ } while (0)#define FPU_RESTORE \ do { \ __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0])); \ if (!(current->flags & PF_USEDFPU)) \ stts(); \ } while (0)#define LD(x,y) " movq 8*("#x")(%1), %%mm"#y" ;\n"#define ST(x,y) " movq %%mm"#y", 8*("#x")(%1) ;\n"#define XO1(x,y) " pxor 8*("#x")(%2), %%mm"#y" ;\n"#define XO2(x,y) " pxor 8*("#x")(%3), %%mm"#y" ;\n"#define XO3(x,y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"#define XO4(x,y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"static voidxor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2){ unsigned long lines = bytes >> 7; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ (#undef BLOCK#define BLOCK(i) \ LD(i,0) \ LD(i+1,1) \ LD(i+2,2) \ LD(i+3,3) \ XO1(i,0) \ ST(i,0) \ XO1(i+1,1) \ ST(i+1,1) \ XO1(i+2,2) \ ST(i+2,2) \ XO1(i+3,3) \ ST(i+3,3) " .align 32 ;\n" " 1: ;\n" BLOCK(0) BLOCK(4) BLOCK(8) BLOCK(12) " addl $128, %1 ;\n" " addl $128, %2 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "r" (lines), "r" (p1), "r" (p2) : "memory"); FPU_RESTORE;}static voidxor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3){ unsigned long lines = bytes >> 7; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ (#undef BLOCK#define BLOCK(i) \ LD(i,0) \ LD(i+1,1) \ LD(i+2,2) \ LD(i+3,3) \ XO1(i,0) \ XO1(i+1,1) \ XO1(i+2,2) \ XO1(i+3,3) \ XO2(i,0) \ ST(i,0) \ XO2(i+1,1) \ ST(i+1,1) \ XO2(i+2,2) \ ST(i+2,2) \ XO2(i+3,3) \ ST(i+3,3) " .align 32 ;\n" " 1: ;\n" BLOCK(0) BLOCK(4) BLOCK(8) BLOCK(12) " addl $128, %1 ;\n" " addl $128, %2 ;\n" " addl $128, %3 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "r" (lines), "r" (p1), "r" (p2), "r" (p3) : "memory"); FPU_RESTORE;}static voidxor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4){ unsigned long lines = bytes >> 7; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ (#undef BLOCK#define BLOCK(i) \ LD(i,0) \ LD(i+1,1) \ LD(i+2,2) \ LD(i+3,3) \ XO1(i,0) \ XO1(i+1,1) \ XO1(i+2,2) \ XO1(i+3,3) \ XO2(i,0) \ XO2(i+1,1) \ XO2(i+2,2) \ XO2(i+3,3) \ XO3(i,0) \ ST(i,0) \ XO3(i+1,1) \ ST(i+1,1) \ XO3(i+2,2) \ ST(i+2,2) \ XO3(i+3,3) \ ST(i+3,3) " .align 32 ;\n" " 1: ;\n" BLOCK(0) BLOCK(4) BLOCK(8) BLOCK(12) " addl $128, %1 ;\n" " addl $128, %2 ;\n" " addl $128, %3 ;\n" " addl $128, %4 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "r" (lines), "r" (p1), "r" (p2), "r" (p3), "r" (p4) : "memory"); FPU_RESTORE;}static voidxor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5){ unsigned long lines = bytes >> 7; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ (#undef BLOCK#define BLOCK(i) \ LD(i,0) \ LD(i+1,1) \ LD(i+2,2) \ LD(i+3,3) \ XO1(i,0) \ XO1(i+1,1) \ XO1(i+2,2) \ XO1(i+3,3) \ XO2(i,0) \ XO2(i+1,1) \ XO2(i+2,2) \ XO2(i+3,3) \ XO3(i,0) \ XO3(i+1,1) \ XO3(i+2,2) \ XO3(i+3,3) \ XO4(i,0) \ ST(i,0) \ XO4(i+1,1) \ ST(i+1,1) \ XO4(i+2,2) \ ST(i+2,2) \ XO4(i+3,3) \ ST(i+3,3) " .align 32 ;\n" " 1: ;\n" BLOCK(0) BLOCK(4) BLOCK(8) BLOCK(12) " addl $128, %1 ;\n" " addl $128, %2 ;\n" " addl $128, %3 ;\n" " addl $128, %4 ;\n" " addl $128, %5 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "g" (lines), "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5) : "memory"); FPU_RESTORE;}#undef LD#undef XO1#undef XO2#undef XO3#undef XO4#undef ST#undef BLOCKstatic voidxor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2){ unsigned long lines = bytes >> 6; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ ( " .align 32 ;\n" " 1: ;\n" " movq (%1), %%mm0 ;\n" " movq 8(%1), %%mm1 ;\n" " pxor (%2), %%mm0 ;\n" " movq 16(%1), %%mm2 ;\n" " movq %%mm0, (%1) ;\n" " pxor 8(%2), %%mm1 ;\n" " movq 24(%1), %%mm3 ;\n" " movq %%mm1, 8(%1) ;\n" " pxor 16(%2), %%mm2 ;\n" " movq 32(%1), %%mm4 ;\n" " movq %%mm2, 16(%1) ;\n" " pxor 24(%2), %%mm3 ;\n" " movq 40(%1), %%mm5 ;\n" " movq %%mm3, 24(%1) ;\n" " pxor 32(%2), %%mm4 ;\n" " movq 48(%1), %%mm6 ;\n" " movq %%mm4, 32(%1) ;\n" " pxor 40(%2), %%mm5 ;\n" " movq 56(%1), %%mm7 ;\n" " movq %%mm5, 40(%1) ;\n" " pxor 48(%2), %%mm6 ;\n" " pxor 56(%2), %%mm7 ;\n" " movq %%mm6, 48(%1) ;\n" " movq %%mm7, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "r" (lines), "r" (p1), "r" (p2) : "memory"); FPU_RESTORE;}static voidxor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3){ unsigned long lines = bytes >> 6; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ ( " .align 32,0x90 ;\n" " 1: ;\n" " movq (%1), %%mm0 ;\n" " movq 8(%1), %%mm1 ;\n" " pxor (%2), %%mm0 ;\n" " movq 16(%1), %%mm2 ;\n" " pxor 8(%2), %%mm1 ;\n" " pxor (%3), %%mm0 ;\n" " pxor 16(%2), %%mm2 ;\n" " movq %%mm0, (%1) ;\n" " pxor 8(%3), %%mm1 ;\n" " pxor 16(%3), %%mm2 ;\n" " movq 24(%1), %%mm3 ;\n" " movq %%mm1, 8(%1) ;\n" " movq 32(%1), %%mm4 ;\n" " movq 40(%1), %%mm5 ;\n" " pxor 24(%2), %%mm3 ;\n" " movq %%mm2, 16(%1) ;\n" " pxor 32(%2), %%mm4 ;\n" " pxor 24(%3), %%mm3 ;\n" " pxor 40(%2), %%mm5 ;\n" " movq %%mm3, 24(%1) ;\n" " pxor 32(%3), %%mm4 ;\n" " pxor 40(%3), %%mm5 ;\n" " movq 48(%1), %%mm6 ;\n" " movq %%mm4, 32(%1) ;\n" " movq 56(%1), %%mm7 ;\n" " pxor 48(%2), %%mm6 ;\n" " movq %%mm5, 40(%1) ;\n" " pxor 56(%2), %%mm7 ;\n" " pxor 48(%3), %%mm6 ;\n" " pxor 56(%3), %%mm7 ;\n" " movq %%mm6, 48(%1) ;\n" " movq %%mm7, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" " addl $64, %3 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "r" (lines), "r" (p1), "r" (p2), "r" (p3) : "memory" ); FPU_RESTORE;}static voidxor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4){ unsigned long lines = bytes >> 6; char fpu_save[108]; FPU_SAVE; __asm__ __volatile__ ( " .align 32,0x90 ;\n" " 1: ;\n" " movq (%1), %%mm0 ;\n" " movq 8(%1), %%mm1 ;\n" " pxor (%2), %%mm0 ;\n" " movq 16(%1), %%mm2 ;\n" " pxor 8(%2), %%mm1 ;\n" " pxor (%3), %%mm0 ;\n" " pxor 16(%2), %%mm2 ;\n" " pxor 8(%3), %%mm1 ;\n" " pxor (%4), %%mm0 ;\n" " movq 24(%1), %%mm3 ;\n" " pxor 16(%3), %%mm2 ;\n" " pxor 8(%4), %%mm1 ;\n" " movq %%mm0, (%1) ;\n" " movq 32(%1), %%mm4 ;\n" " pxor 24(%2), %%mm3 ;\n" " pxor 16(%4), %%mm2 ;\n" " movq %%mm1, 8(%1) ;\n" " movq 40(%1), %%mm5 ;\n" " pxor 32(%2), %%mm4 ;\n" " pxor 24(%3), %%mm3 ;\n" " movq %%mm2, 16(%1) ;\n" " pxor 40(%2), %%mm5 ;\n" " pxor 32(%3), %%mm4 ;\n" " pxor 24(%4), %%mm3 ;\n" " movq %%mm3, 24(%1) ;\n" " movq 56(%1), %%mm7 ;\n" " movq 48(%1), %%mm6 ;\n" " pxor 40(%3), %%mm5 ;\n" " pxor 32(%4), %%mm4 ;\n" " pxor 48(%2), %%mm6 ;\n" " movq %%mm4, 32(%1) ;\n" " pxor 56(%2), %%mm7 ;\n" " pxor 40(%4), %%mm5 ;\n" " pxor 48(%3), %%mm6 ;\n" " pxor 56(%3), %%mm7 ;\n" " movq %%mm5, 40(%1) ;\n" " pxor 48(%4), %%mm6 ;\n" " pxor 56(%4), %%mm7 ;\n" " movq %%mm6, 48(%1) ;\n" " movq %%mm7, 56(%1) ;\n" " addl $64, %1 ;\n" " addl $64, %2 ;\n" " addl $64, %3 ;\n" " addl $64, %4 ;\n" " decl %0 ;\n" " jnz 1b ;\n" : : "r" (lines), "r" (p1), "r" (p2), "r" (p3), "r" (p4) : "memory"); FPU_RESTORE;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -