📄 gf128mul.h

📁 加密认证联合模式的VS2005工程文件
💻 H
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
#define tab64k(x)           ((gf_t64k)x)
#define xor_64k(i,a,t,r)    xor_block_aligned(r, tab64k(t)[i][a[i]])

#if defined( USE_INLINES )

#if defined( UNROLL_LOOPS )

gf_inline void gf_mul_64k(unsigned char a[], void *t, void *r)
{
    move_block_aligned(r, tab64k(t)[0][a[0]]); xor_64k( 1, a, t, r);
    xor_64k( 2, a, t, r); xor_64k( 3, a, t, r);
    xor_64k( 4, a, t, r); xor_64k( 5, a, t, r);
    xor_64k( 6, a, t, r); xor_64k( 7, a, t, r);
    xor_64k( 8, a, t, r); xor_64k( 9, a, t, r);
    xor_64k(10, a, t, r); xor_64k(11, a, t, r);
    xor_64k(12, a, t, r); xor_64k(13, a, t, r);
    xor_64k(14, a, t, r); xor_64k(15, a, t, r);
    move_block_aligned(a, r);
}

#else

gf_inline void gf_mul_64k(unsigned char a[], void *t, void *r)
{   int i;
    move_block_aligned(r, tab64k(t)[0][a[0]]);
    for(i = 1; i < GF_BYTE_LEN; ++i)
        xor_64k(i, a, t, r);
    move_block_aligned(a, r);
}

#endif

#else

#if !defined( UNROLL_LOOPS )

#define gf_mul_64k(a, t, r) do {                \
    move_block_aligned(r, tab64k(t)[0][a[0]]);  \
    xor_64k( 1, a, t, r);                       \
    xor_64k( 2, a, t, r); xor_64k( 3, a, t, r); \
    xor_64k( 4, a, t, r); xor_64k( 5, a, t, r); \
    xor_64k( 6, a, t, r); xor_64k( 7, a, t, r); \
    xor_64k( 8, a, t, r); xor_64k( 9, a, t, r); \
    xor_64k(10, a, t, r); xor_64k(11, a, t, r); \
    xor_64k(12, a, t, r); xor_64k(13, a, t, r); \
    xor_64k(14, a, t, r); xor_64k(15, a, t, r); \
    move_block_aligned(a, r);                   \
} while(0)

#else

#define gf_mul_64k(a, t, r) do { int i;         \
    move_block_aligned(r, tab64k(t)[0][a[0]]);  \
    for(i = 1; i < GF_BYTE_LEN; ++i)            \
    {   xor_64k(i, a, t, r);                    \
    }                                           \
    move_block_aligned(a, r);                   \
} while(0)

#endif

#endif

/*  This version uses 8k bytes of table space on the stack.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(128).  If we consider a GF(128) value in
    the buffer's lowest 4-bits, we can construct a table of
    the 16 16 byte values that result from the 16 values
    of these 4 bits. This requires 256 bytes. But we also
    need tables for each of the 32 higher 4 bit groups,
    which makes 8 kbytes in total.
*/

void init_8k_table(unsigned char g[], void *t);

typedef uint_32t    (*gf_t8k)[16][GF_BYTE_LEN >> 2];
#define tab8k(x)    ((gf_t8k)x)
#define xor_8k(i,a,t,r)   \
    xor_block_aligned(r, tab8k(t)[i + i][a[i] & 15]); \
    xor_block_aligned(r, tab8k(t)[i + i + 1][a[i] >> 4])

#if defined( USE_INLINES )

#if defined( UNROLL_LOOPS )

gf_inline void gf_mul_8k(unsigned char a[], void *t, void *r)
{
    move_block_aligned(r, tab8k(t)[0][a[0] & 15]);
    xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]);
                xor_8k( 1, a, t, r); xor_8k( 2, a, t, r); xor_8k( 3, a, t, r);
    xor_8k( 4, a, t, r); xor_8k( 5, a, t, r); xor_8k( 6, a, t, r); xor_8k( 7, a, t, r);
    xor_8k( 8, a, t, r); xor_8k( 9, a, t, r); xor_8k(10, a, t, r); xor_8k(11, a, t, r);
    xor_8k(12, a, t, r); xor_8k(13, a, t, r); xor_8k(14, a, t, r); xor_8k(15, a, t, r);
    move_block_aligned(a, r);
}

#else

gf_inline void gf_mul_8k(unsigned char a[], void *t, void *r)
{   int i;
    memcpy(r, tab8k(t)[0][a[0] & 15], GF_BYTE_LEN);
    xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]);
    for(i = 1; i < GF_BYTE_LEN; ++i)
    {   xor_8k(i, a, t, r);
    }
    memcpy(a, r, GF_BYTE_LEN);
}

#endif

#else

#if defined( UNROLL_LOOPS )

#define gf_mul_8k(a, t, r) do {                     \
    move_block_aligned(r, tab8k(t)[0][a[0] & 15]);  \
    xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]);   \
    xor_8k( 1, a, t, r); xor_8k( 2, a, t, r);       \
    xor_8k( 3, a, t, r); xor_8k( 4, a, t, r);       \
    xor_8k( 5, a, t, r); xor_8k( 6, a, t, r);       \
    xor_8k( 7, a, t, r); xor_8k( 8, a, t, r);       \
    xor_8k( 9, a, t, r); xor_8k(10, a, t, r);       \
    xor_8k(11, a, t, r); xor_8k(12, a, t, r);       \
    xor_8k(13, a, t, r); xor_8k(14, a, t, r);       \
    xor_8k(15, a, t, r); move_block_aligned(a, r);  \
} while(0)

#else

#define gf_mul_8k(a, t, r) do { int i;              \
    memcpy(r, tab8k(t)[0][a[0] & 15], GF_BYTE_LEN); \
    xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]);   \
    for(i = 1; i < GF_BYTE_LEN; ++i)                \
    {   xor_8k(i, a, t, r);                         \
    }                                               \
    memcpy(a, r, GF_BYTE_LEN);                      \
} while(0)

#endif

#endif

/*  This version uses 4k bytes of table space on the stack.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(128).  If we consider a GF(128) value in a
    single byte, we can construct a table of the 256 16 byte
    values that result from the 256 values of this byte.
    This requires 4096 bytes. If we take the highest byte in
    the buffer and use this table to get the result, we then
    have to multiply by x^120 to get the final value. For the
    next highest byte the result has to be multiplied by x^112
    and so on. But we can do this by accumulating the result
    in an accumulator starting with the result for the top
    byte.  We repeatedly multiply the accumulator value by
    x^8 and then add in (i.e. xor) the 16 bytes of the next
    lower byte in the buffer, stopping when we reach the
    lowest byte. This requires a 4096 byte table.
*/

void init_4k_table(unsigned char g[], void *t);

typedef uint_32t        (*gf_t4k)[GF_BYTE_LEN >> 2];
#define tab4k(x)        ((gf_t4k)x)
#define xor_4k(i,a,t,r) mul_x8(r); xor_block_aligned(r, tab4k(t)[a[i]])

#if defined( USE_INLINES )

#if defined( UNROLL_LOOPS )

gf_inline void gf_mul_4k(unsigned char a[], void *t, void *r)
{
    move_block_aligned(r,tab4k(t)[a[15]]);
    xor_4k(14, a, t, r); xor_4k(13, a, t, r); xor_4k(12, a, t, r);
    xor_4k(11, a, t, r); xor_4k(10, a, t, r); xor_4k( 9, a, t, r);
    xor_4k( 8, a, t, r); xor_4k( 7, a, t, r); xor_4k( 6, a, t, r);
    xor_4k( 5, a, t, r); xor_4k( 4, a, t, r); xor_4k( 3, a, t, r);
    xor_4k( 2, a, t, r); xor_4k( 1, a, t, r); xor_4k( 0, a, t, r);
    move_block_aligned(a, r);
}

#else

gf_inline void gf_mul_4k(unsigned char a[], void *t, void *r)
{   int i = 15;
    move_block_aligned(r,tab4k(t)[a[15]]);
    while(i--)
    {
        xor_4k(i, a, t, r);
    }
    move_block_aligned(a, r);
}

#endif

#else

#if defined( UNROLL_LOOPS )

#define gf_mul_4k(a, t, r) do {                                     \
    move_block_aligned(r,tab4k(t)[a[15]]);                          \
    xor_4k(14, a, t, r); xor_4k(13, a, t, r); xor_4k(12, a, t, r);  \
    xor_4k(11, a, t, r); xor_4k(10, a, t, r); xor_4k( 9, a, t, r);  \
    xor_4k( 8, a, t, r); xor_4k( 7, a, t, r); xor_4k( 6, a, t, r);  \
    xor_4k( 5, a, t, r); xor_4k( 4, a, t, r); xor_4k( 3, a, t, r);  \
    xor_4k( 2, a, t, r); xor_4k( 1, a, t, r); xor_4k( 0, a, t, r);  \
    move_block_aligned(a, r);                                       \
} while(0)

#else

#define gf_mul_4k(a, t, r) do { int i = 15; \
    move_block_aligned(r,tab4k(t)[a[15]]);  \
    while(i--)                              \
    {   xor_4k(i, a, t, r);                 \
    }                                       \
    move_block_aligned(a, r);               \
} while(0)

#endif

#endif

/*  This version uses 256 bytes of table space on the stack.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(128).  If we consider a GF(128) value in a
    single 4-bit nibble, we can construct a table of the 16
    16 byte  values that result from the 16 values of this
    byte.  This requires 256 bytes. If we take the highest
    4-bit nibble in the buffer and use this table to get the
    result, we then have to multiply by x^124 to get the
    final value. For the next highest byte the result has to
    be multiplied by x^120 and so on. But we can do this by
    accumulating the result in an accumulator starting with
    the result for the top nibble.  We repeatedly multiply
    the accumulator value by x^4 and then add in (i.e. xor)
    the 16 bytes of the next lower nibble in the buffer,
    stopping when we reach the lowest nibblebyte. This uses
    a 256 byte table.
*/

void init_256_table(unsigned char g[], void *t);

typedef uint_32t    (*gf_t256)[GF_BYTE_LEN >> 2];
#define tab256(t)   ((gf_t256)t)
#define xor_256(i,a,t,r)    \
    mul_x4(r); xor_block_aligned(r, tab256(t)[a[i] & 15]);  \
    mul_x4(r); xor_block_aligned(r, tab256(t)[a[i] >> 4])

#if defined( USE_INLINES )

#if defined( UNROLL_LOOPS )

gf_inline void gf_mul_256(unsigned char a[], void *t, void *r)
{
    move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r);
    xor_block_aligned(r, tab256(t)[a[15] >> 4]);
    xor_256(14, a, t, r); xor_256(13, a, t, r);
    xor_256(12, a, t, r); xor_256(11, a, t, r);
    xor_256(10, a, t, r); xor_256( 9, a, t, r);
    xor_256( 8, a, t, r); xor_256( 7, a, t, r);
    xor_256( 6, a, t, r); xor_256( 5, a, t, r);
    xor_256( 4, a, t, r); xor_256( 3, a, t, r);
    xor_256( 2, a, t, r); xor_256( 1, a, t, r);
    xor_256( 0, a, t, r); move_block_aligned(a, r);
}

#else

gf_inline void gf_mul_256(unsigned char a[], void *t, void *r)
{   int i = 15;
    move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r);
    xor_block_aligned(r, tab256(t)[a[15] >> 4]);
    while(i--)
    {   xor_256(i, a, t, r);
    }
    move_block_aligned(a, r);
}

#endif

#else

#if defined( UNROLL_LOOPS )

#define gf_mul_256(a, t, r) do {                            \
    move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r); \
    xor_block_aligned(r, tab256(t)[a[15] >> 4]);            \
    xor_256(14, a, t, r); xor_256(13, a, t, r);             \
    xor_256(12, a, t, r); xor_256(11, a, t, r);             \
    xor_256(10, a, t, r); xor_256( 9, a, t, r);             \
    xor_256( 8, a, t, r); xor_256( 7, a, t, r);             \
    xor_256( 6, a, t, r); xor_256( 5, a, t, r);             \
    xor_256( 4, a, t, r); xor_256( 3, a, t, r);             \
    xor_256( 2, a, t, r); xor_256( 1, a, t, r);             \
    xor_256( 0, a, t, r); move_block_aligned(a, r);         \
} while(0)

#else

#define gf_mul_256(a, t, r) do { int i = 15;                \
    move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r); \
    xor_block_aligned(r, tab256(t)[a[15] >> 4]);            \
    while(i--)                                              \
    {   xor_256(i, a, t, r);                                \
    }                                                       \
    move_block_aligned(a, r);                               \
} while(0)

#endif

#endif

#if defined(__cplusplus)
}
#endif

#endif
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -