📄 cwc.c
字号:
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2006, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The free distribution and use of this software in both source and binary
form is allowed (with or without changes) provided that:
1. distributions of this source code include the above copyright
notice, this list of conditions and the following disclaimer;
2. distributions in binary form include the above copyright
notice, this list of conditions and the following disclaimer
in the documentation and/or other associated materials;
3. the copyright holder's name is not used to endorse products
built using this software without specific written permission.
ALTERNATIVELY, provided that this notice is retained in full, this product
may be distributed under the terms of the GNU General Public License (GPL),
in which case the provisions of the GPL apply INSTEAD OF those given above.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 13/06/2006
This file contains the code for implementing encryption and authentication
using AES with a Carter-Wegamn hash function. Note that it uses Microsoft
calls to set the FPU into specific operating conditions
*/
#include "cwc.h"
#include "mode_hdr.h"
#if defined(__cplusplus)
extern "C"
{
#endif
#define CBLK_LEN CWC_CBLK_SIZE
#define ABLK_LEN CWC_ABLK_SIZE
#define CBLK_MASK (CBLK_LEN - 1)
#define be_inc(x,n) !++((x)[n+3]) && !++((x)[n+2]) && !++((x)[n+1]) && ++((x)[n])
#define le_inc(x,n) !++((x)[n]) && !++((x)[n+1]) && !++((x)[n+2]) && ++((x)[n+3])
/* define an unsigned 32-bit type */
#if defined( USE_FLOATS )
#include <float.h>
/*
Floating Point Unit Control
_MCW_RC Rounding Control Mask
_RC_NEAR near
_RC_DOWN down
_RC_UP up
_RC_CHOP chop
_MCW_PC Precision Control Mask
_PC_64 64 bits
_PC_53 53 bits
_PC_24 24 bits
Note this only conrols the normal FPU - special intrinsics are needed
to control the SSE/SSE2 FPU
*/
/* set to 53 bit precision and truncate towards zero */
#define set_FPU _controlfp(_PC_53 | _RC_CHOP, _MCW_PC | _MCW_RC)
/* set to default state */
#define reset_FPU _controlfp(CW_DEFAULT, 0xffff);
/*
The main cost in CWC hash calculation is in multiplying two 127 bit
numbers mod 2^127-1. This can be implemented quite efficiently using
floating point operations by splitting the two values into 24 bit
chunks so that product terms are 48 bits and sums of product terms
still fit into 53 bit double precision values. If b is 2^24 we have:
x = x[5]*b^5 + x[4]*b^4 + x[3]*b^3 + x[2]*b^2 + x[1]*b + x[0]
z = z[5]*b^5 + z[4]*b^4 + z[3]*b^3 + z[2]*b^2 + z[1]*b + z[0]
with the product terms:
r[ 0] = (x[0] * z[0])
r[ 1] = (x[0] * z[1] + x[1] * z[0]) * b
......
r[ 9] = (x[4] * z[5] + x[5] * z[4]) * b^9
r[10] = (x[5] * z[5]) * b ^ 10
Now we need to compute the modulus of each term mod 2^127 - 1. so
using the r[9] term (= v * b^9) as an example we can calculate:
b^9 = (2^24)^9 = 2^216 = 2^127 * 2^89 = 2^127 * b^3 * 2^17
v * b^9 = 2^127 * v * b^3 * 2^17
= (2^127 - 1) * v * b^3 * 2^17 + b^3 * (2^17 * v)
(v * b^9) mod (2^127 - 1) = (2^17 * v) * b^3;
So we can account for this term in the result by adding an extra
value to the r[3] term.
But v is a 48 bit value and we are adding 17 more bits with the
2^17 term so the result will overflow 53 bit arithmetic. So we
must split the result so that
(2^17 * v) * b^3 = v_hi * b^4 + v_lo * b^3
and add the low part to r[3] and the high part to r[4]. We hence
need the low 7 bits of v for v_lo and the high bits for v_hi
When the low bit of a 53 bit floating point number represents
the value 2^n the high bit represents 2^(52 + n) so if the low
bit represents 2^7 the high bit represents 2^59. So if we take
a number below 2^59 and add 2^59 to it we know that all bits
that represent values below 2^7 will be lost (however we must
ensure that the result is truncated and not rounded). So if we
do the computations
top = (v + 2^59) - 2^59
bot = v - top;
we have
2^17 * v = 2^17 * (top + bot)
= 2^24 * (2^-7 * top) + (2^17 * bot)
and
v_hi = 2^-7 * top and v_lo = 2^17 * bot
We can do the same calculations for r[6] to r[10]. For r[5] we
need to represent it as:
r[5] * b^5 = 2^120 * v = 2^127 * v_hi + 2^120 * v_lo
r[5] mod (2^127 - 1) = v_lo * b^5 + v_hi
where v_hi and v_lo are extracted as described above. We
then set r[5] from v_lo and add v_hi to r[0].
We have now got the values into r[0] to r[5] but they
are 48+ bit values that are offset by 24 bits as follows:
r[0] xxxxxxxxxxxx
r[1] xxxxxxxxxxxx
r[2] xxxxxxxxxxxx
.....
To get the 24 bit components we must hence carry the top
24+ bits of r[i] into r[i + 1] or i = 0..4. In this case
the values are split using (v + 2^76) - 2^76).
When we ripple the carries through like this we may find
that r[5] is larger than 2^127 - 1, in which case we have
to reduce r[5] once more. This might produce more carries
so if we want all results to fit in 24 bits we may have to
repeat the carry process.
However it turns out that we can work the next iteration of
the product without insisting that the top 29 bits of all
values are zero. These values only have to be small enough
to ensure that the sums of product terms in the next steps
don't overflow 53 bits.
Bernstein calls a reduction that ensures that the top 29
bits of all values are zero a 'freeze' operation. If the
resulting 'carries' are small, but not necessarily zero,
he calls the operation a 'squeeze'.
In fact the carry operations can be performed in many
different orders and this freedom can be used to optimise
the calculation. For more details see Daniel Bernstein's
paper "Floating Point Arithmetic and Message Authentication"
in the Journal of Cryptology (submitted in March 2000).
*/
static double tm24 = 1.0 / (65536.0 * 256.0);
static double tm07 = 2.0 / 256.0;
static double tp17 = 2.0 * 65536.0;
static double tp59 = 2048.0 * 65536.0 * 65536.0 * 65536.0;
static double tp76 = 4096.0 * 65536.0 * 65536.0 * 65536.0 * 65536.0;
/* reduce a value to its canonical form */
void freeze(double h[], int full)
{ double f, p;
p = h[0];
f = (p + tp76) - tp76;
h[0] = p - f;
p = h[1] += tm24 * f;
f = (p + tp76) - tp76;
h[1] = p - f;
p = h[2] += tm24 * f;
f = (p + tp76) - tp76;
h[2] = p - f;
p = h[3] += tm24 * f;
f = (p + tp76) - tp76;
h[3] = p - f;
p = h[4] += tm24 * f;
f = (p + tp76) - tp76;
h[4] = p - f;
h[5] += tm24 * f;
/* do modular reduction step */
/* if the value must be fully */
/* canonical then ripple any */
/* new carries produced by the */
/* modular reduction step */
if((f = ((h[5] + tp59) - tp59)) != 0.0)
{ int i = 0;
h[0] += tm07 * f; h[5] -= f;
if(full)
while(i < 5 && (f = ((h[i] + tp76) - tp76)) != 0.0)
{
h[i] -= f;
h[++i] += tm24 * f;
}
}
}
/* There are two implementations of cwc to choose from */
#if 1
void do_cwc(uint_32t in[], cwc_ctx ctx[1])
{ uint_32t data[3];
double a[6], v, f, p;
/* set FPU to operate in 53 bit precision and */
/* in truncate to zero mode */
set_FPU;
data[2] = bswap_32(in[2]);
data[1] = bswap_32(in[1]);
data[0] = bswap_32(in[0]);
/* split input data into 24 bit double values */
a[0] = data[2] & 0x00ffffff;
a[1] = (data[2] >> 24) | ((data[1] & 0x0000ffff) << 8);
a[2] = (data[1] >> 16) | ((data[0] & 0x000000ff) << 16);
a[3] = data[0] >> 8;
/* add into the running hash value */
a[0] += ctx->hash[0]; a[1] += ctx->hash[1];
a[2] += ctx->hash[2]; a[3] += ctx->hash[3];
a[4] = ctx->hash[4]; a[5] = ctx->hash[5];
/* calculate the low five terms of the product */
ctx->hash[0] = a[0] * ctx->zval[0];
ctx->hash[1] = a[1] * ctx->zval[0]
+ a[0] * ctx->zval[1];
ctx->hash[2] = a[2] * ctx->zval[0]
+ a[1] * ctx->zval[1]
+ a[0] * ctx->zval[2];
ctx->hash[3] = a[3] * ctx->zval[0]
+ a[2] * ctx->zval[1]
+ a[1] * ctx->zval[2]
+ a[0] * ctx->zval[3];
ctx->hash[4] = a[4] * ctx->zval[0]
+ a[3] * ctx->zval[1]
+ a[2] * ctx->zval[2]
+ a[1] * ctx->zval[3]
+ a[0] * ctx->zval[4];
ctx->hash[5] = a[5] * ctx->zval[0]
+ a[4] * ctx->zval[1]
+ a[3] * ctx->zval[2]
+ a[2] * ctx->zval[3]
+ a[1] * ctx->zval[4]
+ a[0] * ctx->zval[5];
v = a[5] * ctx->zval[5]; /* add in r[10] term */
f = (v + tp59) - tp59;
ctx->hash[4] += tp17 * (v - f);
ctx->hash[5] += tm07 * f;
f = (ctx->hash[5] + tp59) - tp59;
ctx->hash[5] -= f; /* modular reduction */
ctx->hash[0] += tm07 * f;
v = a[5] * ctx->zval[1]
+ a[4] * ctx->zval[2]
+ a[3] * ctx->zval[3]
+ a[2] * ctx->zval[4]
+ a[1] * ctx->zval[5]; /* add in r[6] term */
f = (v + tp59) - tp59;
p = ctx->hash[0] + tp17 * (v - f);
v = (p + tp76) - tp76;
ctx->hash[0] = p - v;
ctx->hash[1] += tm07 * f + tm24 * v;
v = a[5] * ctx->zval[2]
+ a[4] * ctx->zval[3]
+ a[3] * ctx->zval[4]
+ a[2] * ctx->zval[5]; /* add in r[7] term */
f = (v + tp59) - tp59;
p = ctx->hash[1] + tp17 * (v - f);
v = (p + tp76) - tp76;
ctx->hash[1] = p - v;
ctx->hash[2] += tm07 * f + tm24 * v;
v = a[5] * ctx->zval[3]
+ a[4] * ctx->zval[4]
+ a[3] * ctx->zval[5]; /* add in r[8] term */
f = (v + tp59) - tp59;
p = ctx->hash[2] + tp17 * (v - f);
v = (p + tp76) - tp76;
ctx->hash[2] = p - v;
ctx->hash[3] += tm07 * f + tm24 * v;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -