📄 cast5.cpp
字号:
mov right, 8+[esp+4] /* U */
mov esi, 8+[esp+12] /* V */
push left /* U */
push ebx /* V */
mov left, [right] /* U */
mov right, [right+4] /* V */
bswap right /* NP */
bswap left /* NP */
mov eax, [esi] /* U */
xor ebx,ebx /* V */
mov ecx, [esi+4] /* U */
ROUND(right, left, xor, sub, add, 0)
ROUND(left, right, sub, add, xor, 1)
ROUND(right, left, add, xor, sub, 2)
ROUND(left, right, xor, sub, add, 3)
ROUND(right, left, sub, add, xor, 4)
ROUND(left, right, add, xor, sub, 5)
ROUND(right, left, xor, sub, add, 6)
ROUND(left, right, sub, add, xor, 7)
ROUND(right, left, add, xor, sub, 8)
ROUND(left, right, xor, sub, add, 9)
ROUND(right, left, sub, add, xor,10)
ROUND(left, right, add, xor, sub,11)
ROUND(right, left, xor, sub, add,12)
ROUND(left, right, sub, add, xor,13)
ROUND(right, left, add, xor, sub,14)
/* ROUND(left, right, xor, sub, add,15)
* Last round: omit loading of keys for next round
* Fetch out pointer and store data there instead
*/
add eax, left /* 1 U */
xor edx, edx /* V */
rol eax, cl /*2-5NP */
mov bl, ah /* 6 U */
mov dl, al /* V */
shr eax, 16 /* 7 Ux */
mov esi, 16+[esp+8] /* V */
mov ecx, S1[ebx*4] /* 8 U */
mov bl, ah /* V */
mov edx, S2[edx*4] /* 9 U */
and eax, 255 /* V */
xor ecx, edx /*10 U */
mov edx, S3[ebx*4] /* V */
sub ecx, edx /*11 U */
mov eax, S4[eax*4] /* V */
add ecx, eax /*12 U */
pop ebx /* V */
bswap left /*13 NP */
xor right, ecx /*14 U */
mov [esi+4], left /* V */
bswap right /* NP */
pop left /* U */
mov [esi], right /* V */
pop right /* U */
pop esi /* V */
ret /* NP */
}
}
#define left ebp
#define right edi
__declspec(naked)
void
__cdecl
CAST5Decrypt(const PGPByte *in, PGPByte *out, const PGPUInt32 *xkey)
{
__asm
{
ALIGN 16
push esi /* U */
push right /* V */
mov right, 8+[esp+4] /* U */
mov esi, 8+[esp+12] /* V */
push left /* U */
push ebx /* V */
mov left, [right] /* U */
mov right, [right+4] /* V */
bswap right /* NP */
bswap left /* NP */
mov eax, [esi+8*15] /* U */
xor ebx,ebx /* V */
mov ecx, 4+[esi+8*15] /* U */
ROUND2(right, left, xor, sub, add,15)
ROUND2(left, right, add, xor, sub,14)
ROUND2(right, left, sub, add, xor,13)
ROUND2(left, right, xor, sub, add,12)
ROUND2(right, left, add, xor, sub,11)
ROUND2(left, right, sub, add, xor,10)
ROUND2(right, left, xor, sub, add,9)
ROUND2(left, right, add, xor, sub,8)
ROUND2(right, left, sub, add, xor,7)
ROUND2(left, right, xor, sub, add,6)
ROUND2(right, left, add, xor, sub,5)
ROUND2(left, right, sub, add, xor,4)
ROUND2(right, left, xor, sub, add,3)
ROUND2(left, right, add, xor, sub,2)
ROUND2(right, left, sub, add, xor,1)
/* ROUND2(left, right, xor, sub, add,0)
* Last round: omit loading of keys for next round
* Fetch out pointer and store data there instead
*/
add eax, left /* 1 U */
xor edx, edx /* V */
rol eax, cl /*2-5NP */
mov bl, ah /* 6 U */
mov dl, al /* V */
shr eax, 16 /* 7 Ux */
mov esi, 16+[esp+8] /* V */
mov ecx, S1[ebx*4] /* 8 U */
mov bl, ah /* V */
mov edx, S2[edx*4] /* 9 U */
and eax, 255 /* V */
xor ecx, edx /*10 U */
mov edx, S3[ebx*4] /* V */
sub ecx, edx /*11 U */
mov eax, S4[eax*4] /* V */
add ecx, eax /*12 U */
pop ebx /* V */
bswap left /*13 NP */
xor right, ecx /*14 U */
mov [esi+4], left /* V */
bswap right /* NP */
pop left /* U */
mov [esi], right /* V */
pop right /* U */
pop esi /* V */
ret
}
}
/*
* asm void
* CAST5encryptCFBdbl(
* register word32 const *xkey, // esp+pushes+ 4
* register word32 in0, // esp+pushes+ 8
* register word32 in1, // esp+pushes+12
* register word32 in2, // esp+pushes+16
* register word32 in3, // esp+pushes+20
* register word32 const *src, // esp+pushes+24
* register word32 *dest, // esp+pushes+28
* register word32 len) // esp+pushes+32
*
* Note that "len" is the number of 16-byte units to encrypt.
* Since this function only encrypts one block per time
* around the loop, it has to be doubled.
*
* Doing the dbl part...
* We use the srgument slots on the stack as IV registers, but
* actually only use one IV, in the in2/in3 slots.
* Each iteration, we fetch from them the data to be encrypted
* for the next iteration before storing the current
* ciphertext for the ineration after the next, thus achieving the
* necessary interleaving.
*/
#define left ebp
#define right edi
__declspec(naked)
void
__cdecl
CAST5EncryptCFBdbl(
const PGPUInt32 *xkey,
PGPUInt32 iv0,
PGPUInt32 iv1,
PGPUInt32 iv2,
PGPUInt32 iv3,
const PGPUInt32 *src,
PGPUInt32 *dest,
PGPUInt32 len)
{
__asm
{
ALIGN 16
push esi
push right
mov esi, 8+[esp+4] /* U - load key schedule pointer */
push left /* V */
mov left, 12+[esp+8] /* U - load in0 as left */
mov right, 12+[esp+12] /* V - load in1 as right */
push ebx /* U */
xor ebx,ebx /* V */
shl dword ptr 16+[esp+32], 1 /* NP - double loop counter */
encryptloop:
mov eax, [esi] /* U - preload key material */
mov ecx, [esi+4] /* V - preload key material */
bswap right /* NP */
bswap left /* NP */
ROUND(right, left, xor, sub, add, 0)
ROUND(left, right, sub, add, xor, 1)
ROUND(right, left, add, xor, sub, 2)
ROUND(left, right, xor, sub, add, 3)
ROUND(right, left, sub, add, xor, 4)
ROUND(left, right, add, xor, sub, 5)
ROUND(right, left, xor, sub, add, 6)
ROUND(left, right, sub, add, xor, 7)
ROUND(right, left, add, xor, sub, 8)
ROUND(left, right, xor, sub, add, 9)
ROUND(right, left, sub, add, xor,10)
ROUND(left, right, add, xor, sub,11)
ROUND(right, left, xor, sub, add,12)
ROUND(left, right, sub, add, xor,13)
ROUND(right, left, add, xor, sub,14)
/* ROUND(left, right, xor, sub, add,15)
* Last round: omit loading of keys for next round
* Instead, start the CFB operations. Including the
* swap of the halves, that ends up as:
*
* %eax = src[0] ^ bswap(right)
* %ecx = src[1] ^ bswap(left)
* src += 8bytes
* left = bswap(in2)
* right = bswap(in3)
* in2 = %eax
* dest[0] = %eax
* in3 = %ecx
* dest[1] = %eax
* dest += 8bytes
*
* For the '486, we can just use bswap. For the '386, it's
* xchg ah,al
* rol $16,eax
* xchg ah,al
*
* Annoyingly, this really *sucks* on a PPro, due to fierce
* partial-register stalls. So it pretty much has to go two ways.
* Options are:
* - Duplicate the entire encryption loop?
* - Do it in a pre-pass and a post-pass. Makes life easy, but we
* end up being memory bound.
* - Something truly wierd?
*/
add eax, left /* 1 U */
xor edx, edx /* V */
rol eax, cl /*2-5NP */
mov bl, ah /* 6 U */
mov dl, al /* V */
shr eax, 16 /* 7 Ux */
nop /* V */
mov ecx, S1[ebx*4] /* 8 U */
mov bl, ah /* V */
mov edx, S2[edx*4] /* 9 U */
and eax, 255 /* V */
xor ecx, edx /*10 U */
mov edx, S3[ebx*4] /* V */
sub ecx, edx /*11 U */
mov edx, S4[eax*4] /* V */
add ecx, edx /*12 U */
mov ebx, 16+[esp+24] /* V - fetch src pointer */
xor right, ecx /*13 U */
add ebx, 8 /* V - increment src ptr */
bswap left
bswap right
mov eax, [ebx-8] /* U - get src word */
mov ecx, [ebx-4] /* V - other src word */
mov 16+[esp+24], ebx /* U - store src pointer back */
mov edx, 16+[esp+28] /* V - fetch dest pointer */
xor eax, right /* U */
xor ecx, left /* V */
mov left, 16+[esp+16] /* U - fetch in2 for new left */
mov right, 16+[esp+20] /* V - fetch in3 for new right */
mov 16+[esp+16], eax /* U - store ciphertext for next time */
mov 16+[esp+20], ecx /* V - store ciphertext for next time */
mov [edx+0], eax /* U - store result */
mov [edx+4], ecx /* V - store result */
add edx, 8 /* U - increment dest ptr */
xor ebx, ebx /* V - clear %ebx for next iteration */
dec dword ptr 16+[esp+32] /* U - decrement loop counter (set ZF) */
mov 16+[esp+28], edx /* V - store dest pointer back */
/* Pairing opportunity lost, sigh */
jnz encryptloop
/* And we're done! Pop registers and return */
pop ebx /* U */
pop left /* V */
pop right /* U */
pop esi /* V */
ret /* NP */
}
}
/*
* dbl decode... plain[i] = cipher[i] ^ CRYPT(cipher[i-2])
* Can we do this with only one temporary (and still allow in-place)?
* Crypt cipher[i-2] -> plain[i]
* Load cipher[i-1] from temp
* Load cipher[i]
* Store cipher[i] into temp, plain[i] ^= cipher[i] (or vice-versa)
* Store plain[i]
* i++
* Um... do we have enough regs? It seems that we have cipher[i],
* cipher[i-1] and plain[i] in registers at the same time.
* Try this order of operations:
* Crypt cipher[i-2] -> plain[i]
* Load cipher[i]
* plain[i] ^= cipher[i]
* store plain[i]
* load temp into plain[i], which is now cipher[i-1] (cipher[i-2] after i++)
* store cipher[i] into temp
* increment i
* That works, and doesn't use an excessive number of registers.
*
* Written out in full,
* %eax = src[0];
* %ecx = src[1];
* src += 8bytes
* BSWAP(left)
* BSWAP(right)
* right ^= %eax
* left ^= %ecx
* dest[0] = right
* dest[1] = left
* dest += 8bytes
* left = in2
* right = in3
* in2 = %eax
* in3 = %ecx
*/
__declspec(naked)
void
__cdecl
CAST5DecryptCFBdbl(
const PGPUInt32 *xkey,
PGPUInt32 iv0,
PGPUInt32 iv1,
PGPUInt32 iv2,
PGPUInt32 iv3,
const PGPUInt32 *src,
PGPUInt32 *dest,
PGPUInt32 len)
{
__asm
{
ALIGN 16
push esi
push right
mov esi, 8+[esp+4] /* U - load key schedule pointer */
push left /* V */
mov left, 12+[esp+8] /* U - load in0 as left */
mov right, 12+[esp+12] /* V - load in1 as right */
push ebx /* U */
xor ebx,ebx /* V */
shl dword ptr 16+[esp+32], 1 /* NP - double loop counter */
decryptloop:
mov eax, [esi] /* U - preload key material */
mov ecx, [esi+4] /* V - preload key material */
bswap right /* NP */
bswap left /* NP */
ROUND(right, left, xor, sub, add, 0)
ROUND(left, right, sub, add, xor, 1)
ROUND(right, left, add, xor, sub, 2)
ROUND(left, right, xor, sub, add, 3)
ROUND(right, left, sub, add, xor, 4)
ROUND(left, right, add, xor, sub, 5)
ROUND(right, left, xor, sub, add, 6)
ROUND(left, right, sub, add, xor, 7)
ROUND(right, left, add, xor, sub, 8)
ROUND(left, right, xor, sub, add, 9)
ROUND(right, left, sub, add, xor,10)
ROUND(left, right, add, xor, sub,11)
ROUND(right, left, xor, sub, add,12)
ROUND(left, right, sub, add, xor,13)
ROUND(right, left, add, xor, sub,14)
/* ROUND(left, right, xor, sub, add,15)
* Last round: omit loading of keys for next round
* Instead, start the CFB operations. Including the
* swap of the halves, that ends up as:
*
* %eax = src[0];
* %ecx = src[1];
* src += 8bytes
* BSWAP(left)
* BSWAP(right)
* right ^= %eax
* left ^= %ecx
* dest[0] = right
* dest[1] = left
* dest += 8bytes
* left = in2
* right = in3
* in2 = %eax
* in3 = %ecx
*/
add eax, left /* 1 U */
xor edx, edx /* V */
rol eax, cl /*2-5NP */
mov bl, ah /* 6 U */
mov dl, al /* V */
shr eax, 16 /* 7 Ux */
nop /* V */
mov ecx, S1[ebx*4] /* 8 U */
mov bl, ah /* V */
mov edx, S2[edx*4] /* 9 U */
and eax, 255 /* V */
xor ecx, edx /*10 U */
mov edx, S3[ebx*4] /* V */
sub ecx, edx /*11 U */
mov edx, S4[eax*4] /* V */
add ecx, edx /*12 U */
mov ebx, 16+[esp+24] /* V - fetch src pointer */
xor right, ecx /*13 U */
add ebx, 8 /* V - increment src ptr */
bswap left
bswap right
mov eax, [ebx-8] /* U - get src word */
mov ecx, [ebx-4] /* V - other src word */
mov 16+[esp+24], ebx /* U - store src pointer back */
mov edx, 16+[esp+28] /* V - fetch dest pointer */
xor right, eax /* U */
xor left, ecx /* V */
mov [edx+0], right /* U - store result */
mov [edx+4], left /* V - store result */
mov left, 16+[esp+16] /* U - fetch in2 for new left */
mov right, 16+[esp+20] /* V - fetch in3 for new right */
mov 16+[esp+16], eax /* U - store ciphertext for next time */
mov 16+[esp+20], ecx /* V - store ciphertext for next time */
add edx, 8 /* U - increment dest ptr */
xor ebx, ebx /* V - clear %ebx for next iteration */
dec dword ptr 16+[esp+32] /* U - decrement loop counter (set ZF) */
mov 16+[esp+28], edx /* V - store dest pointer back */
/* Pairing opportunity lost, sigh */
jnz decryptloop
/* And we're done! Pop registers and return */
pop ebx /* U */
pop left /* V */
pop right /* U */
pop esi /* V */
ret /* NP */
}
}
#endif // !USE_CAST5_ASSEMBLY
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -