📄 idea.cpp

📁 idea算法从speakfs提取
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* idea.c - C source code for IDEA block cipher. IDEA (International Data 
 * Encryption Algorithm), formerly known as IPES (Improved Proposed Encryption
 * Standard). Algorithm developed by Xuejia Lai and James L. Massey, of ETH 
 * Zurich. This implementation modified and derived from original C code 
 * developed by Xuejia Lai. Zero-based indexing added, names changed from IPES
 * to IDEA. CFB functions added. Random number routines added. Optimized for 
 * speed 21 Oct 92 by Colin Plumb <colin@nsq.gts.org>. This code assumes that 
 * each pair of 8-bit bytes comprising a 16-bit word in the key and in the 
 * cipher block are externally represented with the Most Significant Byte 
 * (MSB) first, regardless of internal native byte order of the target CPU.  */

#ifdef TEST
#include <stdio.h>
#include <time.h>
#endif

#ifdef sgi
#define HIGHFIRST
#endif

#ifdef sun
#define HIGHFIRST
#define const
#endif

#include "idea.h"

#define FAR	IFAR

#ifndef _WIN32
#ifdef _M_I86
#define USE_ASM
#endif
#else
#define IDEA32
#endif

#define min(x, y) (((x) < (y)) ? (x) : (y))

#define TRUE	1
#define FALSE	0

#define IDEABLOCKSIZE 8
#define ROUNDS      8               /* Don't change this value, should be 8 */
#define KEYLEN		(6*ROUNDS+4)	/* length of key schedule */

#define byte	unsigned char
#define word16	unsigned short
#define boolean int
#define word32	unsigned long
#define byteptr unsigned char *

typedef word16 IDEAkey[KEYLEN];

#ifdef IDEA32 /* Use >16-bit temporaries */
#define low16(x) ((x) & 0xFFFF)
typedef unsigned int uint16;		/* at LEAST 16 bits, maybe more */
#else
#define low16(x) (x)                /* this is only ever applied to uint16's */
typedef word16 uint16;
#endif

#ifdef _GNUC_
/* __const__ simply means there are no side effects for this function,
 * which is useful info for the gcc optimizer */
#define CONST __const__
#else
#define CONST
#endif

static void en_key_idea(word16 *userkey, word16 *Z);
static void de_key_idea(IDEAkey Z, IDEAkey DK);

/* Multiplication, modulo (2**16)+1. Note that this code is structured like 
 * this on the assumption that untaken branches are cheaper than taken 
 * branches, and the compiler doesn't schedule branches. */

#ifdef SMALL_CACHE
CONST static uint16 mul(register uint16 a, register uint16 b)
{
	   register word32 p;
	   if (a)
	   {	 if (b)
			 {		p = (word32)a * b;
					b = low16(p);
					a = p>>16;
					return b - a + (b < a);
			 }
			 else
			 {		return 1-a;
			 }
	   }
	   else
	   {	 return 1-b;
	   }
}
#endif /* SMALL_CACHE */

/* Compute multiplicative inverse of x, modulo (2**16)+1, using Euclid's GCD 
 * algorithm. It is unrolled twice to avoid swapping the meaning of the 
 * registers each iteration; some subtracts of t have been changed to adds.  */

CONST static uint16 inv(uint16 x)	   
{
	   uint16 t0, t1;
	   uint16 q, y;
	   if (x <= 1)
			 return x;	  /* 0 and 1 are self-inverse */
	   t1 = 0x10001 / x;  /* Since x >= 2, this fits into 16 bits */
	   y = 0x10001 % x;
	   if (y == 1)
			 return low16(1-t1);
	   t0 = 1;
	   do
	   {	 q = x / y;
			 x = x % y;
			 t0 += q * t1;
			 if (x == 1)
					return t0;
			 q = y / x;
			 y = y % x;
			 t1 += q * t0;
	   } while (y != 1);
	   return low16(1-t1);
}

/*	   Compute IDEA encryption subkeys Z */

static void en_key_idea(word16 *userkey, word16 *Z)
{
	   int i,j;
	   /* shifts */
	   for (j=0; j<8; j++)
			 Z[j] = *userkey++;
	   for (i=0; j<KEYLEN; j++)
	   {	 i++;
			 Z[i+7] = Z[i & 7] << 9 | Z[i+1 & 7] >> 7;
			 Z += i & 8;
			 i &= 7;
	   }
}

/*	   Compute IDEA decryption subkeys DK from encryption subkeys Z */
/* Note: these buffers *may* overlap! */

static void de_key_idea(IDEAkey Z, IDEAkey DK)
{
	   int j;
	   uint16 t1, t2, t3;
	   IDEAkey T;
	   word16 *p = T + KEYLEN;
	   t1 = inv(*Z++);
	   t2 = -*Z++;
	   t3 = -*Z++;
	   *--p = inv(*Z++);
	   *--p = t3;
	   *--p = t2;
	   *--p = t1;
	   for (j = 1; j < ROUNDS; j++)
	   {
			 t1 = *Z++;
			 *--p = *Z++;
			 *--p = t1;
			 t1 = inv(*Z++);
			 t2 = -*Z++;
			 t3 = -*Z++;
			 *--p = inv(*Z++);
			 *--p = t2;
			 *--p = t3;
			 *--p = t1;
	   }
	   t1 = *Z++;
	   *--p = *Z++;
	   *--p = t1;
	   t1 = inv(*Z++);
	   t2 = -*Z++;
	   t3 = -*Z++;
	   *--p = inv(*Z++);
	   *--p = t3;
	   *--p = t2;
	   *--p = t1;
/* Copy and destroy temp copy */
	   for (j = 0, p = T; j < KEYLEN; j++)
	   {
			 *DK++ = *p;
			 *p++ = 0;
	   }
}

/* MUL(x,y) computes x = x*y, modulo 0x10001. Requires two temps, t16 and t32.
 * x must me a side-effect-free lvalue. y may be anything, but unlike x, must 
 * be strictly 16 bits even if low16() is #defined. All of these are 
 * equivalent; see which is faster on your machine.  */

#ifdef SMALL_CACHE
#define MUL(x,y) (x = mul(low16(x),y))
#else
#ifdef AVOID_JUMPS
#define MUL(x,y) (x = low16(x-1), t16 = low16((y)-1), \
             t32 = (word32)x*t16+x+t16+1, x = low16(t32), \
			 t16 = t32>>16, x = x-t16+(x<t16) )
#else
#define MUL(x,y) ((t16 = (y)) ? (x=low16(x)) ? \
        t32 = (word32)x*t16, x = low16(t32), t16 = t32>>16, \
        x = x-t16+(x<t16) : \
		(x = 1-t16) : (x = 1-x))
#endif
#endif

#ifdef USE_ASM

static void cipher_idea(word16 *inblock, word16 *outblock, IDEAkey zkey)
{
	word16 sx1, sx4, skk, done8;
	__asm {
;A while ago I posted a message claiming a speed of 238,000
;bytes/sec for an implementation of IDEA on a 33Mh 486.  Below is
;an explanation and some code to show how it works.  The basic
;trick should be useful on many (but not all) processors.  I
;expect only those familiar with IDEA and its reference
;implementation will be able to follow the discussion.	See:
;
;Lai, Xueja and Massey, James L.  A Proposal for a New Block
;Encryption Standard, Eurocrypt 90
;
;For those who have been asking for the code, sorry I kept
;putting it off.  I wanted to get it out of Turbo Pascal
;ideal-mode, but I never had the time.
;
;Colin Plum wrote IDEA-386 code which is included in PGP
;2.3a and uses the same tricks.  I don't know who's is
;faster, but I expect they will be very close.	Now
;here's how it's done.
;
;A major bottleneck in software IDEA is the mul() routine, which
;is used 34 times per 64 bit block.  The routine performs
;multiplication in the multiplicative group mod 2^16+1.  The two
;factors are each in a 16 bit word, and the output is also in a 16
;bit word.	Note that 0 is not a member of the multiplicative
;group and 2^16 does not fit in 16 bits. We therefor use the 0
;word to represent 2^16.  Now group elements map one to one onto
;all possible 16 bit words, since 2^16+1 is prime.
;
;Here is (essentially) the reference implementation from [Lai].
;
;
;unsigned mul( unsigned a, unsigned b ) {
;  long int p ;
;  long unsigned q ;
;		if( a==0 ) p= 0x00010001 - b ;
;		else if( b==0 ) p= 0x00010001 - a ;
;		else {
;				q= a*b;
;				p= (q & 0xffff) - (q>>16)
;				if( p<0 ) p= p + 0x00010001 ;
;		  }
;		return (unsigned)(p & 0xffff) ;
;}
;
;
;Note the method of reducing a 32 bit word modulo 2^16-1.  We
;subtract the high word from the low word, and add the modulus
;back if the result is less than 0.  [Lai] contains a proof that
;this works, and you can convince yourself fairly easily.
;
;To speed up this routine, we note that the tests for a=0 and b=0
;will rarely be false.	With the possible exception of the first 2
;of the 34 multiplications, 0 should be no more likely than any of
;the other 65535 numbers.  Note that if (and only if) either a or
;b is 0 then q will also be 0, and we can check for this in one
;instruction if our processor sets a zero flag for multiplication
;(as the 68000 does but 80x86 does not).
;
;Fortunately p will also be zero after the subtraction if and only
;if either a or b is 0.  Proof: r will be zero when the high order
;word of q equals the low order word, and that happens when q is
;divisible by 00010001 hex.  Since 00010001h = 2^16+1 is prime,
;this happens if either a or b is a multiple of 2^16+1, and 0 is
;the only such multiple which will fit in a 16 bit word.
;
;The speed-up strategy is to proceed under the assumption that a
;and b are not 0, check to be sure in one instruction, and
;recompute if the assumption was wrong.  Here's some 8086
;assembler code:
;
;		mov  ax, [a]
;		mul  [b]		; ax is implied. q is now in DX AX
;		sub  ax, dx 	; mod 2^16+1
;		jnz  not0		; Jump if neither op was 0. Usually taken.
;
;		mov  ax, 1		; recompute result knowing one op is 0.
;		sub  ax, [a]
;		sub  ax, [b]
;		jmp  out		; Just jump over adding the carry.
;not0:
;		adc  ax, 0		; If r<0 add 1, otherwise do nothing.
;out:				 ; Result is now in ax
;
;
;Note that when r<0 we add 1 instead of 2^16+1 since the 2^16 part
;overflows out of the result.  The "adc  ax, 0" does all the work
;of checking for a negative result and adding the modulus if
;needed.
;
;The multiplication takes 9 instructions, 4 of which are rarely
;executed.	I believe similar tricks are possible on many
;processors.  The one drawback to the check-after-multiply tactic
;is that we can't let the multiply overwrite the only copy of an
;operand.
;
;Note that most software implementations of IDEA will run at
;slightly different speeds when 0's come up in the multiply
;routine.  The reference implementation is faster on 0, this one
;is faster on non-zero.  This may be a problem for some real-time
;stuff, and also suggests an attack based on timing.
;
;Finally, below is an implementation of the complete encryption
;function in 8086 assembler, to replace the cipher_idea() function
;in PGP.  It takes the same parameters as the function from PGP,
;and uses the c language calling conventions.  I tested it using
;the debug features of the idea.c file in PGP.	You will need to
;add segment/assume directives.  This version uses no global data
;and should be reentrant.
;
;The handling of zero multipliers is outside the inner loop so
;that a short conditional jump can loop back to the beginning.
;Forward conditional jumps are usually not taken and backward
;jumps are usually taken, which is consistent with 586 branch
;prediction (or so I've heard).  Stalls where the output of one
;instruction is needed for the next seem unavoidable.
;
;Last I heard, IDEA was patent pending.  My code is up for grabs,
;although I would get a kick out being credited if you use it.
;On the other hand Colin's code is already tested and ready
;to assemble and link with PGP.
;
;--Bryan
;
;____________________CODE STARTS BELOW THIS LINE_________

;  Called as: cipher_idea(inbuff, outbuff, zkey)
;  All arguments must be near pointers addressed off DS.

 ;		push ax 	; My compiler assumes these are not saved.
 ;		push bx
 ;		push cx
 ;		push dx

		push si
		push di

; Put the 16 bit sub-blocks in registers and/or local variables
		mov  si, [inblock]
		mov  ax, [si]
		mov  [sx1], ax		 ; x1  is in ax and sx1
		mov  di, [si+2] 	 ; x2  is in di
		mov  bx, [si+4] 	 ; x3  is in bx
		mov  dx, [si+6]
		mov  [sx4], dx		 ; x4  is in sx4

		mov  si, [zkey] 	 ; si points to next subkey
		mov  [done8], si
		add  [done8], 96	 ; we will be finished with 8 rounds
							 ; when si=done8

LLloop: 					 ; 8 rounds of this
		add  di, [si+2] 	 ; x2+=zkey[2]	is in di
		add  bx, [si+4] 	 ; x3+=zkey[4]	is in bx

		mul  [Word  ptr si]	 ;x1 *= zkey[0]
		sub  ax, dx
		jz	LLx1			 ; if 0, use special case multiply
		adc  ax, 0
LLx1out:
		mov  [sx1], ax		 ; x1 is in ax and sx1

		xor  ax, bx 		 ; ax= x1^x3
		mul  [Word ptr si+8] ; compute kk
		sub  ax, dx 		 ; if 0, use special case multiply
		jz	LLkk
		adc  ax, 0
LLkkout:
		mov  cx, ax 		 ; kk is in cx

		mov  ax, [sx4]		 ; x4 *= zkey[6]
		mul  [Word ptr si+6]
		sub  ax, dx
		jz	 LLx4			 ; if 0, use special case multiply
		adc  ax, 0
LLx4out:
		mov  [sx4], ax		 ; x4 is in sx4 and ax

		xor  ax, di 		 ; x4^x2
		add  ax, cx 		 ; kk+(x2^x4)
		mul  [Word ptr si+10]; compute t1
		sub  ax, dx
		jz	LLt1			 ; if 0, use special case multiply
		adc  ax, 0
LLt1out:					 ; t1 is in ax

		add  cx, ax 		 ; t2 is in cx	 kk+t1

		xor  [sx4], cx		 ; x4 in sx4
		xor  di, cx 		 ; new x3 in di
		xor  bx, ax 		 ; new x2 in bx
		xchg bx, di 		 ; x2 in di, x3 in bx
		xor  ax, [sx1]		 ; x1 in ax
		mov  [sx1], ax		 ; and [sx1]

		add  si, 12 		 ; point to next subkey
12 下一页
💿 文件大小 12 K
👤 上传用户 jxyw163
📂 所属分类加密解密
📄 代码行数 819 行
💻 语言类型 C++
🏷️ 相关标签

#speakfs #idea #算法
更多speakfs资源 →
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -