📄 twofish2.c
字号:
/***************************************************************************
TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission
Submitters:
Bruce Schneier, Counterpane Systems
Doug Whiting, Hi/fn
John Kelsey, Counterpane Systems
Chris Hall, Counterpane Systems
David Wagner, UC Berkeley
Code Author: Doug Whiting, Hi/fn
Version 1.00 April 1998
Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved.
Notes:
* Optimized version
* Tab size is set to 4 characters in this file
***************************************************************************/
#include "aes.h"
#include "table.h"
#include <memory.h>
#include <assert.h>
#if defined(min_key) && !defined(MIN_KEY)
#define MIN_KEY 1 /* toupper() */
#elif defined(part_key) && !defined(PART_KEY)
#define PART_KEY 1
#elif defined(zero_key) && !defined(ZERO_KEY)
#define ZERO_KEY 1
#endif
#ifdef USE_ASM
extern int useAsm; /* ok to use ASM code? */
typedef int CipherProc
(cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer);
typedef int KeySetupProc(keyInstance *key);
extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */
extern CipherProc *blockDecrypt_86;
extern KeySetupProc *reKey_86;
extern DWORD TwofishAsmCodeSize(void);
#endif
/*
+*****************************************************************************
* Constants/Macros/Tables
-****************************************************************************/
#define CONST /* help syntax from C++, NOP here */
CONST fullSbox MDStab; /* not actually const. Initialized ONE time */
int needToBuildMDS=1; /* is MDStab initialized yet? */
/* number of rounds for various key sizes: 128, 192, 256 */
/* (ignored for now in optimized code!) */
int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256};
#if REENTRANT
#define _sBox_ key->sBox8x32
#else
static fullSbox _sBox_; /* permuted MDStab based on keys */
#endif
#define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256)
/*------- see what level of S-box precomputation we need to do -----*/
#if defined(ZERO_KEY)
#define MOD_STRING "(Zero S-box keying)"
#define Fe32_128(x,R) \
( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] )
#define Fe32_192(x,R) \
( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
#define Fe32_256(x,R) \
( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \
MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \
MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \
MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] )
#define GetSboxKey DWORD SKEY[4]; /* local copy */ \
memcpy(SKEY,key->sboxKeys,sizeof(SKEY));
/*----------------------------------------------------------------*/
#elif defined(MIN_KEY)
#define MOD_STRING "(Minimal keying)"
#define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \
MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \
MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \
MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)])
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
#define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */
/*----------------------------------------------------------------*/
#elif defined(PART_KEY)
#define MOD_STRING "(Partial keying)"
#define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \
MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \
MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \
MDStab[3][_sBox8_(3)[_b(x,R+3)]])
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; }
#define GetSboxKey
/*----------------------------------------------------------------*/
#else /* default is FULL_KEY */
#ifndef FULL_KEY
#define FULL_KEY 1
#endif
#ifdef COMPILE_KEY
#define MOD_STRING "(Compiled subkeys)"
#else
#define MOD_STRING "(Full keying)"
#endif
/* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use.
Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions
in optimized assembly language.
*/
#define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \
_sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1])
/* set a single S-box value, given the input byte */
#define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; }
#define GetSboxKey
#endif
CONST char *moduleDescription ="Optimized C ";
CONST char *modeString =MOD_STRING;
/* macro(s) for debugging help */
#define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */
#define VALIDATE_PARMS 0 /* disable for full speed */
#include "debug.h" /* debug display macros */
/* end of debug macros */
extern DWORD Here(DWORD x); /* return caller's address! */
DWORD TwofishCodeStart(void) { return Here(0); };
/*
+*****************************************************************************
*
* Function Name: setRounds
*
* Function: Set the number of rounds for Twofish
*
* Arguments: keyLen = which keyLength to affect (128,192,256)
* numRounds = how many rounds
*
* Return: TRUE if the values passed in are valid
*
* Notes: This routine sets global variable(s) which are used in setting
* up the key schedule.
*
-****************************************************************************/
int setRounds(int keyLen,int nRounds)
{
if ((nRounds < 2) || (nRounds > MAX_ROUNDS) || (nRounds & 1))
return FALSE;
switch (keyLen)
{
case 128: numRounds[1]=nRounds; break;
case 192: numRounds[2]=nRounds; break;
case 256: numRounds[3]=nRounds; break;
default: return FALSE;
}
return TRUE;
}
/*
+*****************************************************************************
*
* Function Name: TableOp
*
* Function: Handle table use checking
*
* Arguments: op = what to do (see TAB_* defns in AES.H)
*
* Return: TRUE --> done (for TAB_QUERY)
*
* Notes: This routine is for use in generating the tables KAT file.
* For this optimized version, we don't actually track table usage,
* since it would make the macros incredibly ugly. Instead we just
* run for a fixed number of queries and then say we're done.
*
-****************************************************************************/
int TableOp(int op)
{
static int queryCnt=0;
switch (op)
{
case TAB_DISABLE:
break;
case TAB_ENABLE:
break;
case TAB_RESET:
queryCnt=0;
break;
case TAB_QUERY:
queryCnt++;
if (queryCnt < TAB_MIN_QUERY)
return FALSE;
}
return TRUE;
}
/*
+*****************************************************************************
*
* Function Name: ParseHexDword
*
* Function: Parse ASCII hex nibbles and fill in key/iv dwords
*
* Arguments: bit = # bits to read
* srcTxt = ASCII source
* d = ptr to dwords to fill in
* dstTxt = where to make a copy of ASCII source
* (NULL ok)
*
* Return: Zero if no error. Nonzero --> invalid hex or length
*
* Notes: Note that the parameter d is a DWORD array, not a byte array.
* This routine is coded to work both for little-endian and big-endian
* architectures. The character stream is interpreted as a LITTLE-ENDIAN
* byte stream, since that is how the Pentium works, but the conversion
* happens automatically below.
*
-****************************************************************************/
int ParseHexDword(int bits,CONST char *srcTxt,DWORD *d,char *dstTxt)
{
int i;
char c;
DWORD b;
union /* make sure LittleEndian is defined correctly */
{
BYTE b[4];
DWORD d[1];
} v;
v.d[0]=1;
if (v.b[0 ^ ADDR_XOR] != 1)
return BAD_ENDIAN; /* make sure compile-time switch is set ok */
#if VALIDATE_PARMS
#if ALIGN32
if (((int)d) & 3)
return BAD_ALIGN32;
#endif
#endif
for (i=0;i*32<bits;i++)
d[i]=0; /* first, zero the field */
for (i=0;i*4<bits;i++) /* parse one nibble at a time */
{ /* case out the hexadecimal characters */
c=srcTxt[i];
if (dstTxt) dstTxt[i]=c;
if ((c >= '0') && (c <= '9'))
b=c-'0';
else if ((c >= 'a') && (c <= 'f'))
b=c-'a'+10;
else if ((c >= 'A') && (c <= 'F'))
b=c-'A'+10;
else
return BAD_KEY_MAT; /* invalid hex character */
/* works for big and little endian! */
d[i/8] |= b << (4*((i^1)&7));
}
return 0; /* no error */
}
#if CHECK_TABLE
/*
+*****************************************************************************
*
* Function Name: f32
*
* Function: Run four bytes through keyed S-boxes and apply MDS matrix
*
* Arguments: x = input to f function
* k32 = pointer to key dwords
* keyLen = total key length (k32 --> keyLey/2 bits)
*
* Return: The output of the keyed permutation applied to x.
*
* Notes:
* This function is a keyed 32-bit permutation. It is the major building
* block for the Twofish round function, including the four keyed 8x8
* permutations and the 4x4 MDS matrix multiply. This function is used
* both for generating round subkeys and within the round function on the
* block being encrypted.
*
* This version is fairly slow and pedagogical, although a smartcard would
* probably perform the operation exactly this way in firmware. For
* ultimate performance, the entire operation can be completed with four
* lookups into four 256x32-bit tables, with three dword xors.
*
* The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the
* macro Mij(x).
*
-****************************************************************************/
DWORD f32(DWORD x,CONST DWORD *k32,int keyLen)
{
BYTE b[4];
/* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */
/* Note that each byte goes through a different combination of S-boxes.*/
*((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */
switch (((keyLen + 63)/64) & 3)
{
case 0: /* 256 bits of key */
b[0] = p8(04)[b[0]] ^ b0(k32[3]);
b[1] = p8(14)[b[1]] ^ b1(k32[3]);
b[2] = p8(24)[b[2]] ^ b2(k32[3]);
b[3] = p8(34)[b[3]] ^ b3(k32[3]);
/* fall thru, having pre-processed b[0]..b[3] with k32[3] */
case 3: /* 192 bits of key */
b[0] = p8(03)[b[0]] ^ b0(k32[2]);
b[1] = p8(13)[b[1]] ^ b1(k32[2]);
b[2] = p8(23)[b[2]] ^ b2(k32[2]);
b[3] = p8(33)[b[3]] ^ b3(k32[2]);
/* fall thru, having pre-processed b[0]..b[3] with k32[2] */
case 2: /* 128 bits of key */
b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])];
b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])];
b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])];
b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])];
}
/* Now perform the MDS matrix multiply inline. */
return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^
((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^
((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^
((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ;
}
#endif /* CHECK_TABLE */
/*
+*****************************************************************************
*
* Function Name: RS_MDS_encode
*
* Function: Use (12,8) Reed-Solomon code over GF(256) to produce
* a key S-box dword from two key material dwords.
*
* Arguments: k0 = 1st dword
* k1 = 2nd dword
*
* Return: Remainder polynomial generated using RS code
*
* Notes:
* Since this computation is done only once per reKey per 64 bits of key,
* the performance impact of this routine is imperceptible. The RS code
* chosen has "simple" coefficients to allow smartcard/hardware implementation
* without lookup tables.
*
-****************************************************************************/
DWORD RS_MDS_Encode(DWORD k0,DWORD k1)
{
int i,j;
DWORD r;
for (i=r=0;i<2;i++)
{
r ^= (i) ? k0 : k1; /* merge in 32 more key bits */
for (j=0;j<4;j++) /* shift one byte at a time */
RS_rem(r);
}
return r;
}
/*
+*****************************************************************************
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -