📄 cwc.c

📁 加密认证联合模式的源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 ---------------------------------------------------------------------------
 Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved.

 LICENSE TERMS

 The free distribution and use of this software in both source and binary
 form is allowed (with or without changes) provided that:

   1. distributions of this source code include the above copyright
      notice, this list of conditions and the following disclaimer;

   2. distributions in binary form include the above copyright
      notice, this list of conditions and the following disclaimer
      in the documentation and/or other associated materials;

   3. the copyright holder's name is not used to endorse products
      built using this software without specific written permission.

 ALTERNATIVELY, provided that this notice is retained in full, this product
 may be distributed under the terms of the GNU General Public License (GPL),
 in which case the provisions of the GPL apply INSTEAD OF those given above.

 DISCLAIMER

 This software is provided 'as is' with no explicit or implied warranties
 in respect of its properties, including, but not limited to, correctness
 and/or fitness for purpose.
 ---------------------------------------------------------------------------
 Issue Date: 13/06/2006

 This file contains the code for implementing encryption and authentication
 using AES with a Carter-Wegamn hash function.  Note that it uses Microsoft
 calls to set the FPU into specific operating conditions
*/


#include "cwc.h"
#include "mode_hdr.h"

#if defined(__cplusplus)
extern "C"
{
#endif

#define CBLK_LEN    CWC_CBLK_SIZE
#define ABLK_LEN    CWC_ABLK_SIZE
#define CBLK_MASK  (CBLK_LEN - 1)

#define be_inc(x,n) !++((x)[n+3]) && !++((x)[n+2]) && !++((x)[n+1]) &&  ++((x)[n])
#define le_inc(x,n) !++((x)[n]) && !++((x)[n+1]) && !++((x)[n+2]) &&  ++((x)[n+3])

/* define an unsigned 32-bit type */

#if defined( USE_FLOATS )

#include <float.h>

/*
    Floating Point Unit Control

    _MCW_RC     Rounding Control Mask
    _RC_NEAR    near
    _RC_DOWN    down
    _RC_UP      up
    _RC_CHOP    chop

    _MCW_PC     Precision Control Mask
    _PC_64      64 bits
    _PC_53      53 bits
    _PC_24      24 bits

    Note this only conrols the normal FPU - special intrinsics are needed
    to control the SSE/SSE2 FPU
*/

/* set to 53 bit precision and truncate towards zero */
#define set_FPU     _controlfp(_PC_53 | _RC_CHOP, _MCW_PC | _MCW_RC)

/* set to default state                             */
#define reset_FPU   _controlfp(CW_DEFAULT, 0xffff);

/*
    The main cost in CWC hash calculation is in multiplying two 127 bit
    numbers mod 2^127-1. This can be implemented quite efficiently using
    floating point operations by splitting the two values into 24 bit
    chunks so that product terms are 48 bits and sums of product terms
    still fit into 53 bit double precision values. If b is 2^24 we have:

        x = x[5]*b^5 + x[4]*b^4 + x[3]*b^3 + x[2]*b^2 + x[1]*b + x[0]
        z = z[5]*b^5 + z[4]*b^4 + z[3]*b^3 + z[2]*b^2 + z[1]*b + z[0]

    with the product terms:

        r[ 0] = (x[0] * z[0])
        r[ 1] = (x[0] * z[1] + x[1] * z[0]) * b
        ......
        r[ 9] = (x[4] * z[5] + x[5] * z[4]) * b^9
        r[10] = (x[5] * z[5]) * b ^ 10

    Now we need to compute the modulus of each term mod 2^127 - 1. so
    using the r[9] term (= v * b^9) as an example we can calculate:

        b^9 = (2^24)^9 = 2^216 = 2^127 * 2^89 = 2^127 * b^3 * 2^17

        v * b^9 = 2^127 * v * b^3 * 2^17
                = (2^127 - 1) * v * b^3 * 2^17 + b^3 * (2^17 * v)

        (v * b^9) mod (2^127 - 1) = (2^17 * v) * b^3;

    So we can account for this term in the result by adding an extra
    value to the r[3] term.

    But v is a 48 bit value and we are adding 17 more bits with the
    2^17 term so the result will overflow 53 bit arithmetic. So we
    must split the result so that

        (2^17 * v) * b^3 = v_hi * b^4 + v_lo * b^3

    and add the low part to r[3] and the high part to r[4]. We hence
    need the low 7 bits of v for v_lo and the high bits for v_hi

    When the low bit of a 53 bit floating point number represents
    the value 2^n the high bit represents 2^(52 + n) so if the low
    bit represents 2^7 the high bit represents 2^59.  So if we take
    a number below 2^59 and add 2^59 to it we know that all bits
    that represent values below 2^7 will be lost (however we must
    ensure that the result is truncated and not rounded). So if we
    do the computations

            top = (v + 2^59) - 2^59
            bot = v - top;

    we have
            2^17 * v = 2^17 * (top + bot)
                     = 2^24 * (2^-7 * top) + (2^17 * bot)
    and
            v_hi = 2^-7 * top and v_lo = 2^17 * bot

    We can do the same calculations for r[6] to r[10]. For r[5] we
    need to represent it as:

        r[5] * b^5 = 2^120 * v = 2^127 * v_hi + 2^120 * v_lo
        r[5] mod (2^127 - 1) = v_lo * b^5 + v_hi

    where v_hi and v_lo are extracted as described above. We
    then set r[5] from v_lo and add v_hi to r[0].

    We have now got the values into r[0] to r[5] but they
    are 48+ bit values that are offset by 24 bits as follows:

        r[0]                              xxxxxxxxxxxx
        r[1]                        xxxxxxxxxxxx
        r[2]                  xxxxxxxxxxxx
        .....

    To get the 24 bit components we must hence carry the top
    24+ bits of r[i] into r[i + 1] or i = 0..4. In this case
    the values are split using (v + 2^76) - 2^76).

    When we ripple the carries through like this we may find
    that r[5] is larger than 2^127 - 1, in which case we have
    to reduce r[5] once more.  This might produce more carries
    so if we want all results to fit in 24 bits we may have to
    repeat the carry process.

    However it turns out that we can work the next iteration of
    the product without insisting that the top 29 bits of all
    values are zero.  These values only have to be small enough
    to ensure that the sums of product terms in the next steps
    don't overflow 53 bits.

    Bernstein calls a reduction that ensures that the top 29
    bits of all values are zero a 'freeze' operation. If the
    resulting 'carries' are small, but not necessarily zero,
    he calls the operation a 'squeeze'.

    In fact the carry operations can be performed in many
    different orders and this freedom can be used to optimise
    the calculation. For more details see Daniel Bernstein's
    paper "Floating Point Arithmetic and Message Authentication"
    in the Journal of Cryptology (submitted in March 2000).
*/

static double   tm24 = 1.0 / (65536.0 * 256.0);
static double   tm07 = 2.0 / 256.0;
static double   tp17 = 2.0 * 65536.0;
static double   tp59 = 2048.0 * 65536.0 * 65536.0 * 65536.0;
static double   tp76 = 4096.0 * 65536.0 * 65536.0 * 65536.0 * 65536.0;

/* reduce a value to its canonical form */

void freeze(double h[], int full)
{   double  f, p;

    p = h[0];
    f = (p + tp76) - tp76;
    h[0] = p - f;
    p = h[1] += tm24 * f;
    f = (p + tp76) - tp76;
    h[1] = p - f;
    p = h[2] += tm24 * f;
    f = (p + tp76) - tp76;
    h[2] = p - f;
    p = h[3] += tm24 * f;
    f = (p + tp76) - tp76;
    h[3] = p - f;
    p = h[4] += tm24 * f;
    f = (p + tp76) - tp76;
    h[4] = p - f;
    h[5] += tm24 * f;

    /* do modular reduction step    */
    /* if the value must be fully   */
    /* canonical then ripple any    */
    /* new carries produced by the  */
    /* modular reduction step       */
    if((f = ((h[5] + tp59) - tp59)) != 0.0)
    {   int i = 0;

        h[0] += tm07 * f; h[5] -= f;

        if(full)
            while(i < 5 && (f = ((h[i] + tp76) - tp76)) != 0.0)
            {
                h[i] -= f;
                h[++i] += tm24 * f;
            }
    }
}

/* There are two implementations of cwc to choose from  */

#if 1

void do_cwc(uint_32t in[], cwc_ctx ctx[1])
{   uint_32t   data[3];
    double  a[6], v, f, p;

    /* set FPU to operate in 53 bit precision and   */
    /* in truncate to zero mode                     */
    set_FPU;

    data[2] = bswap_32(in[2]);
    data[1] = bswap_32(in[1]);
    data[0] = bswap_32(in[0]);

    /* split input data into 24 bit double values   */
    a[0] =  data[2] & 0x00ffffff;
    a[1] = (data[2] >> 24) | ((data[1] & 0x0000ffff) << 8);
    a[2] = (data[1] >> 16) | ((data[0] & 0x000000ff) << 16);
    a[3] =  data[0] >> 8;

    /* add into the running hash value              */
    a[0] += ctx->hash[0]; a[1] += ctx->hash[1];
    a[2] += ctx->hash[2]; a[3] += ctx->hash[3];
    a[4] = ctx->hash[4]; a[5] = ctx->hash[5];

    /* calculate the low five terms of the product  */
    ctx->hash[0] = a[0] * ctx->zval[0];
    ctx->hash[1] = a[1] * ctx->zval[0]
                 + a[0] * ctx->zval[1];
    ctx->hash[2] = a[2] * ctx->zval[0]
                 + a[1] * ctx->zval[1]
                 + a[0] * ctx->zval[2];
    ctx->hash[3] = a[3] * ctx->zval[0]
                 + a[2] * ctx->zval[1]
                 + a[1] * ctx->zval[2]
                 + a[0] * ctx->zval[3];
    ctx->hash[4] = a[4] * ctx->zval[0]
                 + a[3] * ctx->zval[1]
                 + a[2] * ctx->zval[2]
                 + a[1] * ctx->zval[3]
                 + a[0] * ctx->zval[4];
    ctx->hash[5] = a[5] * ctx->zval[0]
                 + a[4] * ctx->zval[1]
                 + a[3] * ctx->zval[2]
                 + a[2] * ctx->zval[3]
                 + a[1] * ctx->zval[4]
                 + a[0] * ctx->zval[5];

    v = a[5] * ctx->zval[5];    /* add in r[10] term */
    f = (v + tp59) - tp59;
    ctx->hash[4] += tp17 * (v - f);
    ctx->hash[5] += tm07 * f;

    f = (ctx->hash[5] + tp59) - tp59;
    ctx->hash[5] -= f;          /* modular reduction */
    ctx->hash[0] += tm07 * f;

    v = a[5] * ctx->zval[1]
      + a[4] * ctx->zval[2]
      + a[3] * ctx->zval[3]
      + a[2] * ctx->zval[4]
      + a[1] * ctx->zval[5];    /* add in r[6] term  */
    f = (v + tp59) - tp59;
    p = ctx->hash[0] + tp17 * (v - f);
    v = (p + tp76) - tp76;
    ctx->hash[0] = p - v;
    ctx->hash[1] += tm07 * f + tm24 * v;

    v = a[5] * ctx->zval[2]
      + a[4] * ctx->zval[3]
      + a[3] * ctx->zval[4]
      + a[2] * ctx->zval[5];    /* add in r[7] term  */
    f = (v + tp59) - tp59;
    p = ctx->hash[1] + tp17 * (v - f);
    v = (p + tp76) - tp76;
    ctx->hash[1] = p - v;
    ctx->hash[2] += tm07 * f + tm24 * v;

    v = a[5] * ctx->zval[3]
      + a[4] * ctx->zval[4]
      + a[3] * ctx->zval[5];    /* add in r[8] term  */
    f = (v + tp59) - tp59;
    p = ctx->hash[2] + tp17 * (v - f);
    v = (p + tp76) - tp76;
    ctx->hash[2] = p - v;
    ctx->hash[3] += tm07 * f + tm24 * v;
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -