aes-amd64.s

来自「Fast and transparent file system and swa」· S 代码 · 共 894 行 · 第 1/2 页
894 行
//// Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.// All rights reserved.//// TERMS////  Redistribution and use in source and binary forms, with or without//  modification, are permitted subject to the following conditions:////  1. Redistributions of source code must retain the above copyright//     notice, this list of conditions and the following disclaimer.////  2. Redistributions in binary form must reproduce the above copyright//     notice, this list of conditions and the following disclaimer in the//     documentation and/or other materials provided with the distribution.////  3. The copyright holder's name must not be used to endorse or promote//     any products derived from this software without his specific prior//     written permission.////  This software is provided 'as is' with no express or implied warranties//  of correctness or fitness for purpose.// Modified by Jari Ruusu,  December 24 2001//  - Converted syntax to GNU CPP/assembler syntax//  - C programming interface converted back to "old" API//  - Minor portability cleanups and speed optimizations// Modified by Jari Ruusu,  April 11 2002//  - Added above copyright and terms to resulting object code so that//    binary distributions can avoid legal trouble// Modified by Jari Ruusu,  June 12 2004//  - Converted 32 bit x86 code to 64 bit AMD64 code//  - Re-wrote encrypt and decrypt code from scratch// An AES (Rijndael) implementation for the AMD64. This version only// implements the standard AES block length (128 bits, 16 bytes). This code// does not preserve the rax, rcx, rdx, rsi, rdi or r8-r11 registers or the// artihmetic status flags. However, the rbx, rbp and r12-r15 registers are// preserved across calls.// void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)// void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])// void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])#if defined(USE_UNDERLINE)# define aes_set_key _aes_set_key# define aes_encrypt _aes_encrypt# define aes_decrypt _aes_decrypt#endif#if !defined(ALIGN64BYTES)# define ALIGN64BYTES 64#endif	.file	"aes-amd64.S"	.globl	aes_set_key	.globl	aes_encrypt	.globl	aes_decrypt	.section .rodatacopyright:	.ascii "    \000"	.ascii "Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.\000"	.ascii "All rights reserved.\000"	.ascii "    \000"	.ascii "TERMS\000"	.ascii "    \000"	.ascii " Redistribution and use in source and binary forms, with or without\000"	.ascii " modification, are permitted subject to the following conditions:\000"	.ascii "    \000"	.ascii " 1. Redistributions of source code must retain the above copyright\000"	.ascii "    notice, this list of conditions and the following disclaimer.\000"	.ascii "    \000"	.ascii " 2. Redistributions in binary form must reproduce the above copyright\000"	.ascii "    notice, this list of conditions and the following disclaimer in the\000"	.ascii "    documentation and/or other materials provided with the distribution.\000"	.ascii "    \000"	.ascii " 3. The copyright holder's name must not be used to endorse or promote\000"	.ascii "    any products derived from this software without his specific prior\000"	.ascii "    written permission.\000"	.ascii "    \000"	.ascii " This software is provided 'as is' with no express or implied warranties\000"	.ascii " of correctness or fitness for purpose.\000"	.ascii "    \000"#define tlen	1024	// length of each of 4 'xor' arrays (256 32-bit words)// offsets in context structure#define nkey	0	// key length, size 4#define nrnd	4	// number of rounds, size 4#define ekey	8	// encryption key schedule base address, size 256#define dkey	264	// decryption key schedule base address, size 256// This macro performs a forward encryption cycle. It is entered with// the first previous round column values in I1E, I2E, I3E and I4E and// exits with the final values OU1, OU2, OU3 and OU4 registers.#define fwd_rnd(p1,p2,I1E,I1B,I1H,I2E,I2B,I2H,I3E,I3B,I3R,I4E,I4B,I4R,OU1,OU2,OU3,OU4) \	movl	p2(%rbp),OU1		;\	movl	p2+4(%rbp),OU2		;\	movl	p2+8(%rbp),OU3		;\	movl	p2+12(%rbp),OU4		;\	movzbl	I1B,%edi		;\	movzbl	I2B,%esi		;\	movzbl	I3B,%r8d		;\	movzbl	I4B,%r13d		;\	shrl	$8,I3E			;\	shrl	$8,I4E			;\	xorl	p1(,%rdi,4),OU1		;\	xorl	p1(,%rsi,4),OU2		;\	xorl	p1(,%r8,4),OU3		;\	xorl	p1(,%r13,4),OU4		;\	movzbl	I2H,%esi		;\	movzbl	I3B,%r8d		;\	movzbl	I4B,%r13d		;\	movzbl	I1H,%edi		;\	shrl	$8,I3E			;\	shrl	$8,I4E			;\	xorl	p1+tlen(,%rsi,4),OU1	;\	xorl	p1+tlen(,%r8,4),OU2	;\	xorl	p1+tlen(,%r13,4),OU3	;\	xorl	p1+tlen(,%rdi,4),OU4	;\	shrl	$16,I1E			;\	shrl	$16,I2E			;\	movzbl	I3B,%r8d		;\	movzbl	I4B,%r13d		;\	movzbl	I1B,%edi		;\	movzbl	I2B,%esi		;\	xorl	p1+2*tlen(,%r8,4),OU1	;\	xorl	p1+2*tlen(,%r13,4),OU2	;\	xorl	p1+2*tlen(,%rdi,4),OU3	;\	xorl	p1+2*tlen(,%rsi,4),OU4	;\	shrl	$8,I4E			;\	movzbl	I1H,%edi		;\	movzbl	I2H,%esi		;\	shrl	$8,I3E			;\	xorl	p1+3*tlen(,I4R,4),OU1	;\	xorl	p1+3*tlen(,%rdi,4),OU2	;\	xorl	p1+3*tlen(,%rsi,4),OU3	;\	xorl	p1+3*tlen(,I3R,4),OU4// This macro performs an inverse encryption cycle. It is entered with// the first previous round column values in I1E, I2E, I3E and I4E and// exits with the final values OU1, OU2, OU3 and OU4 registers.#define inv_rnd(p1,p2,I1E,I1B,I1R,I2E,I2B,I2R,I3E,I3B,I3H,I4E,I4B,I4H,OU1,OU2,OU3,OU4) \	movl	p2+12(%rbp),OU4		;\	movl	p2+8(%rbp),OU3		;\	movl	p2+4(%rbp),OU2		;\	movl	p2(%rbp),OU1		;\	movzbl	I4B,%edi		;\	movzbl	I3B,%esi		;\	movzbl	I2B,%r8d		;\	movzbl	I1B,%r13d		;\	shrl	$8,I2E			;\	shrl	$8,I1E			;\	xorl	p1(,%rdi,4),OU4		;\	xorl	p1(,%rsi,4),OU3		;\	xorl	p1(,%r8,4),OU2		;\	xorl	p1(,%r13,4),OU1		;\	movzbl	I3H,%esi		;\	movzbl	I2B,%r8d		;\	movzbl	I1B,%r13d		;\	movzbl	I4H,%edi		;\	shrl	$8,I2E			;\	shrl	$8,I1E			;\	xorl	p1+tlen(,%rsi,4),OU4	;\	xorl	p1+tlen(,%r8,4),OU3	;\	xorl	p1+tlen(,%r13,4),OU2	;\	xorl	p1+tlen(,%rdi,4),OU1	;\	shrl	$16,I4E			;\	shrl	$16,I3E			;\	movzbl	I2B,%r8d		;\	movzbl	I1B,%r13d		;\	movzbl	I4B,%edi		;\	movzbl	I3B,%esi		;\	xorl	p1+2*tlen(,%r8,4),OU4	;\	xorl	p1+2*tlen(,%r13,4),OU3	;\	xorl	p1+2*tlen(,%rdi,4),OU2	;\	xorl	p1+2*tlen(,%rsi,4),OU1	;\	shrl	$8,I1E			;\	movzbl	I4H,%edi		;\	movzbl	I3H,%esi		;\	shrl	$8,I2E			;\	xorl	p1+3*tlen(,I1R,4),OU4	;\	xorl	p1+3*tlen(,%rdi,4),OU3	;\	xorl	p1+3*tlen(,%rsi,4),OU2	;\	xorl	p1+3*tlen(,I2R,4),OU1// AES (Rijndael) Encryption Subroutine// rdi = pointer to AES context// rsi = pointer to input plaintext bytes// rdx = pointer to output ciphertext bytes	.text	.align	ALIGN64BYTESaes_encrypt:	movl	(%rsi),%eax		// read in plaintext	movl	4(%rsi),%ecx	movl	8(%rsi),%r10d	movl	12(%rsi),%r11d	pushq	%rbp	leaq	ekey+16(%rdi),%rbp	// encryption key pointer	movq	%rdx,%r9		// pointer to out block	movl	nrnd(%rdi),%edx		// number of rounds	pushq	%rbx	pushq	%r13	pushq	%r14	pushq	%r15	xorl	-16(%rbp),%eax		// xor in first round key	xorl	-12(%rbp),%ecx	xorl	-8(%rbp),%r10d	xorl	-4(%rbp),%r11d	subl	$10,%edx	je	aes_15	addq	$32,%rbp	subl	$2,%edx	je	aes_13	addq	$32,%rbp	fwd_rnd(aes_ft_tab,-64,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_ft_tab,-48,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	jmp	aes_13	.align	ALIGN64BYTESaes_13:	fwd_rnd(aes_ft_tab,-32,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_ft_tab,-16,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	jmp	aes_15	.align	ALIGN64BYTESaes_15:	fwd_rnd(aes_ft_tab,0,  %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_ft_tab,16, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	fwd_rnd(aes_ft_tab,32, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_ft_tab,48, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	fwd_rnd(aes_ft_tab,64, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_ft_tab,80, %ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	fwd_rnd(aes_ft_tab,96, %eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_ft_tab,112,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	fwd_rnd(aes_ft_tab,128,%eax,%al,%ah,%ecx,%cl,%ch,%r10d,%r10b,%r10,%r11d,%r11b,%r11,%ebx,%edx,%r14d,%r15d)	fwd_rnd(aes_fl_tab,144,%ebx,%bl,%bh,%edx,%dl,%dh,%r14d,%r14b,%r14,%r15d,%r15b,%r15,%eax,%ecx,%r10d,%r11d)	popq	%r15	popq	%r14	popq	%r13	popq	%rbx	popq	%rbp	movl	%eax,(%r9)		// move final values to the output array.	movl	%ecx,4(%r9)	movl	%r10d,8(%r9)	movl	%r11d,12(%r9)	ret// AES (Rijndael) Decryption Subroutine// rdi = pointer to AES context// rsi = pointer to input ciphertext bytes// rdx = pointer to output plaintext bytes	.align	ALIGN64BYTESaes_decrypt:	movl	12(%rsi),%eax		// read in ciphertext	movl	8(%rsi),%ecx	movl	4(%rsi),%r10d	movl	(%rsi),%r11d	pushq	%rbp	leaq	dkey+16(%rdi),%rbp	// decryption key pointer	movq	%rdx,%r9		// pointer to out block	movl	nrnd(%rdi),%edx		// number of rounds	pushq	%rbx	pushq	%r13	pushq	%r14	pushq	%r15	xorl	-4(%rbp),%eax		// xor in first round key	xorl	-8(%rbp),%ecx	xorl	-12(%rbp),%r10d	xorl	-16(%rbp),%r11d	subl	$10,%edx	je	aes_25	addq	$32,%rbp	subl	$2,%edx	je	aes_23	addq	$32,%rbp	inv_rnd(aes_it_tab,-64,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_it_tab,-48,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	jmp	aes_23	.align	ALIGN64BYTESaes_23:	inv_rnd(aes_it_tab,-32,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_it_tab,-16,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	jmp	aes_25	.align	ALIGN64BYTESaes_25:	inv_rnd(aes_it_tab,0,  %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_it_tab,16, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	inv_rnd(aes_it_tab,32, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_it_tab,48, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	inv_rnd(aes_it_tab,64, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_it_tab,80, %r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	inv_rnd(aes_it_tab,96, %r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_it_tab,112,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	inv_rnd(aes_it_tab,128,%r11d,%r11b,%r11,%r10d,%r10b,%r10,%ecx,%cl,%ch,%eax,%al,%ah,%r15d,%r14d,%edx,%ebx)	inv_rnd(aes_il_tab,144,%r15d,%r15b,%r15,%r14d,%r14b,%r14,%edx,%dl,%dh,%ebx,%bl,%bh,%r11d,%r10d,%ecx,%eax)	popq	%r15	popq	%r14	popq	%r13	popq	%rbx	popq	%rbp	movl	%eax,12(%r9)		// move final values to the output array.	movl	%ecx,8(%r9)	movl	%r10d,4(%r9)	movl	%r11d,(%r9)	ret// AES (Rijndael) Key Schedule Subroutine// This macro performs a column mixing operation on an input 32-bit// word to give a 32-bit result. It uses each of the 4 bytes in the// the input column to index 4 different tables of 256 32-bit words// that are xored together to form the output value.#define mix_col(p1)			 \	movzbl	%bl,%ecx		;\	movl	p1(,%rcx,4),%eax	;\	movzbl	%bh,%ecx		;\	ror	$16,%ebx		;\	xorl	p1+tlen(,%rcx,4),%eax	;\	movzbl	%bl,%ecx		;\	xorl	p1+2*tlen(,%rcx,4),%eax	;\	movzbl	%bh,%ecx		;\	xorl	p1+3*tlen(,%rcx,4),%eax// Key Schedule Macros#define ksc4(p1)			 \	rol	$24,%ebx		;\	mix_col(aes_fl_tab)		;\	ror	$8,%ebx			;\	xorl	4*p1+aes_rcon_tab,%eax	;\	xorl	%eax,%esi		;\	xorl	%esi,%ebp		;\	movl	%esi,16*p1(%rdi)	;\	movl	%ebp,16*p1+4(%rdi)	;\	xorl	%ebp,%edx		;\	xorl	%edx,%ebx		;\	movl	%edx,16*p1+8(%rdi)	;\	movl	%ebx,16*p1+12(%rdi)#define ksc6(p1)			 \	rol	$24,%ebx		;\	mix_col(aes_fl_tab)		;\	ror	$8,%ebx			;\	xorl	4*p1+aes_rcon_tab,%eax	;\	xorl	24*p1-24(%rdi),%eax	;\	movl	%eax,24*p1(%rdi)	;\	xorl	24*p1-20(%rdi),%eax	;\	movl	%eax,24*p1+4(%rdi)	;\	xorl	%eax,%esi		;\	xorl	%esi,%ebp		;\	movl	%esi,24*p1+8(%rdi)	;\	movl	%ebp,24*p1+12(%rdi)	;\	xorl	%ebp,%edx		;\	xorl	%edx,%ebx		;\	movl	%edx,24*p1+16(%rdi)	;\	movl	%ebx,24*p1+20(%rdi)#define ksc8(p1)			 \	rol	$24,%ebx		;\	mix_col(aes_fl_tab)		;\	ror	$8,%ebx			;\	xorl	4*p1+aes_rcon_tab,%eax	;\	xorl	32*p1-32(%rdi),%eax	;\	movl	%eax,32*p1(%rdi)	;\	xorl	32*p1-28(%rdi),%eax	;\	movl	%eax,32*p1+4(%rdi)	;\	xorl	32*p1-24(%rdi),%eax	;\	movl	%eax,32*p1+8(%rdi)	;\	xorl	32*p1-20(%rdi),%eax	;\	movl	%eax,32*p1+12(%rdi)	;\	pushq	%rbx			;\	movl	%eax,%ebx		;\	mix_col(aes_fl_tab)		;\	popq	%rbx			;\	xorl	%eax,%esi		;\	xorl	%esi,%ebp		;\	movl	%esi,32*p1+16(%rdi)	;\	movl	%ebp,32*p1+20(%rdi)	;\	xorl	%ebp,%edx		;\	xorl	%edx,%ebx		;\	movl	%edx,32*p1+24(%rdi)	;\	movl	%ebx,32*p1+28(%rdi)// rdi = pointer to AES context// rsi = pointer to key bytes// rdx = key length, bytes or bits// rcx = ed_flag, 1=encrypt only, 0=both encrypt and decrypt	.align	ALIGN64BYTESaes_set_key:	pushfq	pushq	%rbp	pushq	%rbx	movq	%rcx,%r11		// ed_flg	movq	%rdx,%rcx		// key length	movq	%rdi,%r10		// AES context	cmpl	$128,%ecx	jb	aes_30	shrl	$3,%ecxaes_30:	cmpl	$32,%ecx	je	aes_32	cmpl	$24,%ecx	je	aes_32	movl	$16,%ecxaes_32:	shrl	$2,%ecx	movl	%ecx,nkey(%r10)	leaq	6(%rcx),%rax		// 10/12/14 for 4/6/8 32-bit key length	movl	%eax,nrnd(%r10)	leaq	ekey(%r10),%rdi		// key position in AES context	cld	movl	%ecx,%eax		// save key length in eax	rep ;	movsl			// words in the key schedule	movl	-4(%rsi),%ebx		// put some values in registers	movl	-8(%rsi),%edx		// to allow faster code	movl	-12(%rsi),%ebp	movl	-16(%rsi),%esi	cmpl	$4,%eax			// jump on key size	je	aes_36	cmpl	$6,%eax	je	aes_35	ksc8(0)	ksc8(1)	ksc8(2)	ksc8(3)	ksc8(4)	ksc8(5)
aes-amd64.s - 源码说明

本页面展示了「Fast and transparent file system and swap encryption package for linux. No source code changes to li」中的 aes-amd64.s 源码文件，采用 S 编程语言编写，共 894 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与transparent相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?