📄 gf128mul.h

📁 加密认证联合模式的VS2005工程文件
💻 H
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 ---------------------------------------------------------------------------
 Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved.

 LICENSE TERMS

 The free distribution and use of this software in both source and binary
 form is allowed (with or without changes) provided that:

   1. distributions of this source code include the above copyright
      notice, this list of conditions and the following disclaimer;

   2. distributions in binary form include the above copyright
      notice, this list of conditions and the following disclaimer
      in the documentation and/or other associated materials;

   3. the copyright holder's name is not used to endorse products
      built using this software without specific written permission.

 ALTERNATIVELY, provided that this notice is retained in full, this product
 may be distributed under the terms of the GNU General Public License (GPL),
 in which case the provisions of the GPL apply INSTEAD OF those given above.

 DISCLAIMER

 This software is provided 'as is' with no explicit or implied warranties
 in respect of its properties, including, but not limited to, correctness
 and/or fitness for purpose.
 ---------------------------------------------------------------------------
 Issue Date: 13/10/2006

 An implementation of field multiplication in Galois Field GF(128)
*/

#ifndef GF128MUL_H
#define GF128MUL_H

#include <stdlib.h>
#include <string.h>

#include "mode_hdr.h"

/*  Table sizes for GF(128) Multiply.  Normally larger tables give 
    higher speed but cache loading might change this. Normally only 
    one table size (or none at all) will be specified here
*/

#if 0
#  define TABLES_64K
#endif
#if 1
#  define TABLES_8K
#endif
#if 0
#  define TABLES_4K
#endif
#if 0
#  define TABLES_256
#endif

/*  Use of inlines is preferred but code blocks can also be expanded inline
    using 'defines'.  But the latter approach will typically generate a LOT
    of code and is not recommended. 
*/
#if 0
#  define USE_INLINES
#endif

/*  Speed critical loops can be unrolled to gain speed but consume more
    memory
*/
#if 0
#  define UNROLL_LOOPS
#endif

/*  Multiply a GF128 field element by x. Field elements are held in arrays
    of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower
    indexed bits placed in the more numerically significant bit positions
    within bytes.

    On little endian machines the bit indexes translate into the bit
    positions within four 32-bit words in the following way

    MS            x[0]           LS  MS            x[1]           LS
    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
    24...31 16...23 08...15 00...07  56...63 48...55 40...47 32...39

    MS            x[2]           LS  MS            x[3]           LS
    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
    88...95 80...87 72...79 64...71  120.127 112.119 104.111 96..103

    On big endian machines the bit indexes translate into the bit
    positions within four 32-bit words in the following way

    MS            x[0]           LS  MS            x[1]           LS
    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
    00...07 08...15 16...23 24...31  32...39 40...47 48...55 56...63

    MS            x[2]           LS  MS            x[3]           LS
    ms   ls ms   ls ms   ls ms   ls  ms   ls ms   ls ms   ls ms   ls
    64...71 72...79 80...87 88...95  96..103 104.111 112.119 120.127
*/

#define GF_BYTE_LEN 16

#if defined( USE_INLINES )
#  if defined( _MSC_VER )
#    define gf_inline __inline
#  elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
#    define gf_inline static inline
#  else
#    define gf_inline static
#  endif
#endif

#if defined(__cplusplus)
extern "C"
{
#endif

/*  These functions multiply a field element x, by x^4 and by x^8 in the 
    polynomial field representation. It uses 32-bit word operations to
    gain speed but compensates for machine endianess and hence works 
    correctly on both styles of machine.
*/
extern const unsigned short gf_tab[256];

#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN

/*  This section is not needed as GF(128) multiplication is now implemented
    but is left in place as it provides a template for an alternative little
    endian implementation approach based on conversion to and from big endian
    format
*/
#if 0

/*  This is a template for mul_x.  The mul_x4 and mul_x8 little endian
    alternative implementations (and their defined versions) follow the 
    big endian functions below in the same way.
*/

gf_inline void mul_x(void *r, const void *x)
{   uint_32t _tt;
    bswap32_block(r, x, 4); 
    _tt = gf_tab[(ui32_ptr(r)[3] << 7) & 0xff];
    ui32_ptr(r)[3] = (ui32_ptr(r)[3] >> 1) | (ui32_ptr(r)[2] << 31);
    ui32_ptr(r)[2] = (ui32_ptr(r)[2] >> 1) | (ui32_ptr(r)[1] << 31);
    ui32_ptr(r)[1] = (ui32_ptr(r)[1] >> 1) | (ui32_ptr(r)[0] << 31);
    ui32_ptr(r)[0] = (ui32_ptr(r)[0] >> 1) ^ bswap_32(_tt);
    bswap32_block(r, r, 4);
}

#endif

#define MSK32_80 0x80808080ul
#define MSK32_F0 0xf0f0f0f0ul
#define MSK64_80 0x8080808080808080ull
#define MSK64_F0 0xf0f0f0f0f0f0f0f0ull
#define VERSION_1

#if defined( USE_INLINES )

#if BFR_UNIT == 64

    gf_inline void mul_x(void *r, const void *x)
    {   uint_64t  _tt = gf_tab[(ui64_ptr(x)[1] >> 49) & MSK64_80];

        ui64_ptr(r)[1] =  (ui64_ptr(x)[1] >> 1) & ~MSK64_80 | ((ui64_ptr(x)[1] << 15) | (ui64_ptr(x)[0] >> 49)) & MSK64_80;
        ui64_ptr(r)[0] = ((ui64_ptr(x)[0] >> 1) & ~MSK64_80 |  (ui64_ptr(x)[0] << 15) & MSK64_80) ^ _tt;
    }

  #if defined( VERSION_1 )

    gf_inline void mul_x4(void *x)
    {   uint_64t   _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & MSK64_F0];

        ui64_ptr(x)[1] =  (ui64_ptr(x)[1] >> 4) & ~MSK64_F0 | ((ui64_ptr(x)[1] << 12) | (ui64_ptr(x)[0] >> 52)) & MSK64_F0;
        ui64_ptr(x)[0] = ((ui64_ptr(x)[0] >> 4) & ~MSK64_F0 |  (ui64_ptr(x)[0] << 12) & MSK64_F0) ^ _tt;
    }

  #else

    gf_inline void mul_x4(void *x)
    {   uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & 0xf0];
        bswap64_block(x, x, 2);
        ui64_ptr(x)[1] = bswap_64((ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60));
        ui64_ptr(x)[0] = bswap_64((ui64_ptr(x)[0] >> 4)) ^ _tt;
    }

  #endif

    gf_inline void mul_x8(void *x)
    {   uint_64t _tt = gf_tab[ui64_ptr(x)[1] >> 56];
        ui64_ptr(x)[1] = (ui64_ptr(x)[1] << 8) | (ui64_ptr(x)[0] >> 56); 
        ui64_ptr(x)[0] = (ui64_ptr(x)[0] << 8) ^ _tt;
    }

#elif BFR_UNIT == 32

    gf_inline void mul_x(void *r, const void *x)
    {   uint_32t  _tt = gf_tab[(ui32_ptr(x)[3] >> 17) & MSK32_80];

        ui32_ptr(r)[3] =  (ui32_ptr(x)[3] >> 1) & ~MSK32_80 | ((ui32_ptr(x)[3] << 15) | (ui32_ptr(x)[2] >> 17)) & MSK32_80;
        ui32_ptr(r)[2] =  (ui32_ptr(x)[2] >> 1) & ~MSK32_80 | ((ui32_ptr(x)[2] << 15) | (ui32_ptr(x)[1] >> 17)) & MSK32_80;
        ui32_ptr(r)[1] =  (ui32_ptr(x)[1] >> 1) & ~MSK32_80 | ((ui32_ptr(x)[1] << 15) | (ui32_ptr(x)[0] >> 17)) & MSK32_80;
        ui32_ptr(r)[0] = ((ui32_ptr(x)[0] >> 1) & ~MSK32_80 |  (ui32_ptr(x)[0] << 15) & MSK32_80) ^ _tt;
    }

  #if defined( VERSION_1 )

    gf_inline void mul_x4(void *x)
    {   uint_32t   _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & MSK32_F0];

        ui32_ptr(x)[3] =  (ui32_ptr(x)[3] >> 4) & ~MSK32_F0 | ((ui32_ptr(x)[3] << 12) | (ui32_ptr(x)[2] >> 20)) & MSK32_F0;
        ui32_ptr(x)[2] =  (ui32_ptr(x)[2] >> 4) & ~MSK32_F0 | ((ui32_ptr(x)[2] << 12) | (ui32_ptr(x)[1] >> 20)) & MSK32_F0;
        ui32_ptr(x)[1] =  (ui32_ptr(x)[1] >> 4) & ~MSK32_F0 | ((ui32_ptr(x)[1] << 12) | (ui32_ptr(x)[0] >> 20)) & MSK32_F0;
        ui32_ptr(x)[0] = ((ui32_ptr(x)[0] >> 4) & ~MSK32_F0 |  (ui32_ptr(x)[0] << 12) & MSK32_F0) ^ _tt;
    }

  #else

    gf_inline void mul_x4(void *x)
    {   uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & 0xf0];
        bswap32_block(x, x, 4);
        ui32_ptr(x)[3] = bswap_32((ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28));
        ui32_ptr(x)[2] = bswap_32((ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28));
        ui32_ptr(x)[1] = bswap_32((ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28));
        ui32_ptr(x)[0] = bswap_32((ui32_ptr(x)[0] >> 4)) ^ _tt;
    }

  #endif

    gf_inline void mul_x8(void *x)
    {   uint_32t   _tt = gf_tab[ui32_ptr(x)[3] >> 24];

        ui32_ptr(x)[3] = (ui32_ptr(x)[3] << 8) | (ui32_ptr(x)[2] >> 24);
        ui32_ptr(x)[2] = (ui32_ptr(x)[2] << 8) | (ui32_ptr(x)[1] >> 24);
        ui32_ptr(x)[1] = (ui32_ptr(x)[1] << 8) | (ui32_ptr(x)[0] >> 24);
        ui32_ptr(x)[0] = (ui32_ptr(x)[0] << 8) ^ _tt;
    }

#else

    gf_inline void mul_x(void *r, const void *x)
    {   uint_8t _tt = ui8_ptr(x)[15] & 1;
        ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7);
        ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7);
        ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7);
        ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7);
        ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7);
        ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7);
        ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7);
        ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7);
        ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7);
        ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7);
        ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7);
        ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7);
        ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7);
        ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7);
        ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7);
        ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00);
    }

    gf_inline void mul_x4(void *x)
    {   uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff];
        ui8_ptr(x)[15] =  (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4);
        ui8_ptr(x)[14] =  (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4);
        ui8_ptr(x)[13] =  (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4);
        ui8_ptr(x)[12] =  (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4);
        ui8_ptr(x)[11] =  (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4);
        ui8_ptr(x)[10] =  (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4);
        ui8_ptr(x)[ 9] =  (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4);
        ui8_ptr(x)[ 8] =  (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4);
        ui8_ptr(x)[ 7] =  (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4);
        ui8_ptr(x)[ 6] =  (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4);
        ui8_ptr(x)[ 5] =  (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4);
        ui8_ptr(x)[ 4] =  (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4);
        ui8_ptr(x)[ 3] =  (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4);
        ui8_ptr(x)[ 2] =  (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4);
        ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt >> 8);
        ui8_ptr(x)[ 0] =  (ui8_ptr(x)[ 0] >> 4) ^ (_tt & 0xff);
    }

    gf_inline void mul_x8(void *x)
    {   uint_16t _tt = gf_tab[ui8_ptr(x)[15]];
        memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15);
        ui8_ptr(x)[1] ^= (_tt >> 8);
        ui8_ptr(x)[0] = (_tt & 0xff);
    }

#endif

#else   /* DEFINES */

#if BFR_UNIT == 64

    #define mul_x(r, x) do { uint_64t  _tt = gf_tab[(ui64_ptr(x)[1] >> 49) & MSK64_80]; \
        ui64_ptr(r)[1] =  (ui64_ptr(x)[1] >> 1) & ~MSK64_80                             \
            | ((ui64_ptr(x)[1] << 15) | (ui64_ptr(x)[0] >> 49)) & MSK64_80;             \
        ui64_ptr(r)[0] = ((ui64_ptr(x)[0] >> 1) & ~MSK64_80                             \
            |  (ui64_ptr(x)[0] << 15) & MSK64_80) ^ _tt;                                \
    } while(0)

  #if defined( VERSION_1 )

    #define mul_x4(x) do { uint_64t   _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & MSK64_F0];  \
        ui64_ptr(x)[1] =  (ui64_ptr(x)[1] >> 4) & ~MSK64_F0 | ((ui64_ptr(x)[1] << 12)   \
            | (ui64_ptr(x)[0] >> 52)) & MSK64_F0;                                       \
        ui64_ptr(x)[0] = ((ui64_ptr(x)[0] >> 4) & ~MSK64_F0                             \
            |  (ui64_ptr(x)[0] << 12) & MSK64_F0) ^ _tt;                                \
    } while(0)

  #else

    #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & 0xf0];        \
        bswap64_block(x, x, 2);                                                         \
        ui64_ptr(x)[1] = bswap_64((ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60));      \
        ui64_ptr(x)[0] = bswap_64((ui64_ptr(x)[0] >> 4)) ^ _tt;                         \
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -