📄 square.c
字号:
/*
* The Square block cipher.
*
* Algorithm developed by Joan Daemen <Daemen.J@banksys.com> and
* Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
*
* This public domain implementation by Paulo S.L.M. Barreto
* <pbarreto@uninet.com.br> and George Barwood <george.barwood@dial.pipex.com>
* based on software originally written by Vincent Rijmen.
*
* Caveat: this code assumes 32-bit words and probably will not work
* otherwise.
*
* Version 2.6 (1997.05.24)
*
* =============================================================================
*
* Differences from version 2.5 (1997.04.25)
*
* - Improved key scheduling setup to enhance hashing performance. Modules
* sqgen.c and sqtest.c have been updated accordingly. The new scheme
* (suggested by Vincent Rijmen) uses tables instead of explicit GF(2^8)
* multiplications. Increase in size of the generated code is very small.
*
* - Hashing scheme (Matyas-Meyer-Oseas) added.
*
* =============================================================================
*
* Differences from version 2.4 (1997.04.09):
*
* - Changed all initialization functions so that the IV (when applicable)
* is separately loaded.
*
* - Ciphertext Stealing (CTS) mode added.
*
* - Output Feedback (OFB) mode added.
*
* - Cipher Block Chaining (CBC) mode added.
*
* - Split square.c int several files according to the specific functionality
* (basic functions, modes, testing).
*
* - Flipped tables according to the endianness of the subjacent platform
* for best performance.
*
* - Changed "maketabs.c" to "sqgen.c" for compatibility with the Pegwit system.
*
* =============================================================================
*
* Differences from version 2.3 (1997.04.09):
*
* - Defined function squareExpandKey() to enhance performance of both CFB
* initialization and hash computation (not yet implemented).
*
* - Changed definition of function squareTransform() to accept a single in-out
* parameter, and optimized function squareGenerateRoundKeys accordingly.
*
* =============================================================================
*
* Differences from version 2.2 (1997.03.03):
*
* - Cipher feedback (CFB) mode added (heavily based on an old public domain CFB
* shell written by Colin Plumb for the IDEA cipher).
*
* - Fixed word size problem (64 bits rather than 32) arising on the Alpha.
*
* - Reformatted indented sections of compiler directives for use with old,
* non-ANSI compliant compilers.
*
* Differences from version 2.1 (1997.03.03):
*
* - Added optional Microsoft x86 assembler version, which can boost performance
* by up to 20% depending on the target machine, and generates smaller code.
*
* Differences from version 2.0 (1997.02.11):
*
* - Added typecasts to the build-up of out[] in function squareTransform()
* to make it portable to 16-bit (MSDOS) systems.
*
* - Truncated alogtab[] back to 256 elements and changed the mul() macro
* accordingly. Using an extended table to avoid a division seemed an
* unnecessary storage overhead (it could be useful to speed up hash
* functions derived from Square, but other optimizations are likely to be
* more effective).
*
* Differences from version 2.0 (1997.02.11):
*
* - Updated definition of Square algorithm (version 1.0 implemented an
* embryonic form of Square).
*
* ==============================================================================
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "square.h"
#ifndef R
#define R 8
#endif /* R */
#if R != 8
#error "This implementation is optimized for (and assumes) exactly 8 rounds"
#endif
#ifndef USUAL_TYPES
# define USUAL_TYPES
typedef unsigned char byte; /* 8 bit */
typedef unsigned short word16; /* 16 bit */
# ifdef __alpha
typedef unsigned int word32; /* 32 bit */
# else /* !__alpha */
typedef unsigned long word32; /* 32 bit */
# endif /* ?__alpha */
#endif /* ?USUAL_TYPES */
/* platform endianness: */
#if !defined(LITTLE_ENDIAN) && !defined(BIG_ENDIAN)
# if defined(_M_IX86) || defined(_M_I86) || defined(__alpha)
# define LITTLE_ENDIAN
# else
# error "Either LITTLE_ENDIAN or BIG_ENDIAN must be defined"
# endif
#elif defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN)
# error "LITTLE_ENDIAN and BIG_ENDIAN must not be simultaneously defined"
#endif /* !LITTLE_ENDIAN && !BIG_ENDIAN */
/* Microsoft C / Intel x86 optimizations: */
#if defined(_MSC_VER) && defined(_M_IX86)
# define HARDWARE_ROTATIONS
# define ASSEMBLER_CORE
#endif /* ?(_MSC_VER && _M_IX86) */
#include "square.tab" /* substitution boxes */
const char *squareBanner =
"Square cipher v.2.6 (compiled on " __DATE__ " " __TIME__ ").\n"
#ifdef ASSEMBLER_CORE
"Using assembler core functions.\n"
#endif /* ?ASSEMBLER_CORE */
#ifdef DESTROY_TEMPORARIES
"Destroying temporaries after use.\n"
#endif /* ?DESTROY_TEMPORARIES */
; /* squareBanner */
#ifdef HARDWARE_ROTATIONS
# define ROTL(x, s) (_lrotl ((x), (s)))
# define ROTR(x, s) (_lrotr ((x), (s)))
#else /* !HARDWARE_ROTATIONS */
# define ROTL(x, s) (((x) << (s)) | ((x) >> (32 - (s))))
# define ROTR(x, s) (((x) >> (s)) | ((x) << (32 - (s))))
#endif /* ?HARDWARE_ROTATIONS */
#ifdef LITTLE_ENDIAN
# ifdef MASKED_BYTE_EXTRACTION
# define GETB0(x) (((x) ) & 0xffU)
# define GETB1(x) (((x) >> 8) & 0xffU)
# define GETB2(x) (((x) >> 16) & 0xffU)
# define GETB3(x) (((x) >> 24) & 0xffU)
# else /* !MASKED_BYTE_EXTRACTION */
# define GETB0(x) ((byte) ((x) ))
# define GETB1(x) ((byte) ((x) >> 8))
# define GETB2(x) ((byte) ((x) >> 16))
# define GETB3(x) ((byte) ((x) >> 24))
# endif /* ?MASKED_BYTE_EXTRACTION */
# define PUTB0(x) ((word32) (x) )
# define PUTB1(x) ((word32) (x) << 8)
# define PUTB2(x) ((word32) (x) << 16)
# define PUTB3(x) ((word32) (x) << 24)
# define PSI_ROTL(x, s) ROTR(x, s)
# define PSI_ROTR(x, s) ROTL(x, s)
#else /* !LITTLE_ENDIAN */
# ifdef MASKED_BYTE_EXTRACTION
# define GETB0(x) (((x) >> 24) & 0xffU)
# define GETB1(x) (((x) >> 16) & 0xffU)
# define GETB2(x) (((x) >> 8) & 0xffU)
# define GETB3(x) (((x) ) & 0xffU)
# else /* !MASKED_BYTE_EXTRACTION */
# define GETB0(x) ((byte) ((x) >> 24))
# define GETB1(x) ((byte) ((x) >> 16))
# define GETB2(x) ((byte) ((x) >> 8))
# define GETB3(x) ((byte) ((x) ))
# endif /* ?MASKED_BYTE_EXTRACTION */
# define PUTB0(x) ((word32) (x) << 24)
# define PUTB1(x) ((word32) (x) << 16)
# define PUTB2(x) ((word32) (x) << 8)
# define PUTB3(x) ((word32) (x) )
# define PSI_ROTL(x, s) ROTL(x, s)
# define PSI_ROTR(x, s) ROTR(x, s)
#endif /* ?LITTLE_ENDIAN */
#define mul(a, b) ((a && b) ? alogtab[(mtemp = logtab[a] + logtab[b]) >= 255 ? mtemp - 255 : mtemp] : 0)
#define D(p) ((word32 *)(p))
#define COPY_BLOCK(target, source) \
{ \
(target)[0] = (source)[0]; \
(target)[1] = (source)[1]; \
(target)[2] = (source)[2]; \
(target)[3] = (source)[3]; \
} /* COPY_BLOCK */
static void squareTransform (word32 roundKey[4])
{
roundKey[0] = phi[GETB0 (roundKey[0])] ^
PSI_ROTR (phi[GETB1 (roundKey[0])], 8) ^
PSI_ROTR (phi[GETB2 (roundKey[0])], 16) ^
PSI_ROTR (phi[GETB3 (roundKey[0])], 24);
roundKey[1] = phi[GETB0 (roundKey[1])] ^
PSI_ROTR (phi[GETB1 (roundKey[1])], 8) ^
PSI_ROTR (phi[GETB2 (roundKey[1])], 16) ^
PSI_ROTR (phi[GETB3 (roundKey[1])], 24);
roundKey[2] = phi[GETB0 (roundKey[2])] ^
PSI_ROTR (phi[GETB1 (roundKey[2])], 8) ^
PSI_ROTR (phi[GETB2 (roundKey[2])], 16) ^
PSI_ROTR (phi[GETB3 (roundKey[2])], 24);
roundKey[3] = phi[GETB0 (roundKey[3])] ^
PSI_ROTR (phi[GETB1 (roundKey[3])], 8) ^
PSI_ROTR (phi[GETB2 (roundKey[3])], 16) ^
PSI_ROTR (phi[GETB3 (roundKey[3])], 24);
} /* squareTransform */
void squareGenerateRoundKeys (const squareBlock key,
squareKeySchedule roundKeys_e, squareKeySchedule roundKeys_d)
{
int t;
COPY_BLOCK (roundKeys_e[0], D(key));
for (t = 1; t < R+1; t++) {
/* apply the key evolution function: */
roundKeys_d[R-t][0] = roundKeys_e[t][0] = roundKeys_e[t-1][0] ^ PSI_ROTL (roundKeys_e[t-1][3], 8) ^ offset[t-1];
roundKeys_d[R-t][1] = roundKeys_e[t][1] = roundKeys_e[t-1][1] ^ roundKeys_e[t][0];
roundKeys_d[R-t][2] = roundKeys_e[t][2] = roundKeys_e[t-1][2] ^ roundKeys_e[t][1];
roundKeys_d[R-t][3] = roundKeys_e[t][3] = roundKeys_e[t-1][3] ^ roundKeys_e[t][2];
/* apply the theta diffusion function: */
squareTransform (roundKeys_e[t-1]);
}
COPY_BLOCK (roundKeys_d[R], roundKeys_e[0]);
} /* squareGenerateRoundKeys */
void squareExpandKey (const squareBlock key, squareKeySchedule roundKeys_e)
{
int t;
COPY_BLOCK (roundKeys_e[0], D(key));
for (t = 1; t < R+1; t++) {
/* apply the key evolution function: */
roundKeys_e[t][0] = roundKeys_e[t-1][0] ^ PSI_ROTL (roundKeys_e[t-1][3], 8) ^ offset[t-1];
roundKeys_e[t][1] = roundKeys_e[t-1][1] ^ roundKeys_e[t][0];
roundKeys_e[t][2] = roundKeys_e[t-1][2] ^ roundKeys_e[t][1];
roundKeys_e[t][3] = roundKeys_e[t-1][3] ^ roundKeys_e[t][2];
/* apply the theta diffusion function: */
squareTransform (roundKeys_e[t-1]);
}
} /* squareExpandKey */
#ifdef ASSEMBLER_CORE
/* Microsoft x86 version by George Barwood */
/* About 15-20% faster, using less code */
/* Notes:
Calculate 4 outputs of each round in parallel using esi, edx, ecx, edi
eax is used to hold the sub-round input
ebx is used as a byte index register (and also to address text)
ebp is used to address roundKeys and tables
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -