📄 math.c
字号:
uint8_t multiply_1 (uint8p_t *w, uint8x_t *x, uint8_t n, uint8_t y_)
{ // (n-byte) w = (n-byte) x * (uint8_t) y;
uint8_16_t t;
w += n - 1; // Point to LSB of 'w'.
x += n - 1; // Point to LSB of 'x'.
t.c[ HI ] = 0; // Initial carry is zero (0).
do // (Carry,Product) = (x * y) + carry.
{ // (uint16_t) t = (uint8_t) x * (uint8_t) y + (uint8_t) carry;
t.i = (uint16_t) (*x * y_) + t.c[ HI ];
*w = t.c[ LO ]; w--; x--; // w[n] = LSB of product.
} while (--n);
return (t.c[ HI ]); // MSB of (n+1)-byte product.
}
//---------------------------------------------------------------------------//
// Binary multiplicaton of 64-bit 'x',
// by 32-bit 'y'.
// leaving result in 96-bit 'w'.
// *w = *x * *y;
#if MATH_FAST
void multiply_8_4 (uint8x_t *w, uint8x_t *x, uint8p_t *y)
{ // (U96) w = (uint64_t) x * (uint32_t) y;
uint8_t pdata t[9];
w += 4 - 1; // Point to low-order 8 bytes of product 'w'.
y += 4 - 1; // Point to y[3] (i.e. LSB of 'y').
memset_x (w, 0, 9); // Initially 9-byte partial product is zero (0).
// (U80) w += (uint64_t) x * y.
*(w - 1) = macp8 (w, x, *y, t); w--; y--; // Point to w[2] & y[2].
*(w - 1) = macp8 (w, x, *y, t); w--; y--; // Point to w[1] & y[1].
*(w - 1) = macp8 (w, x, *y, t); w--; y--; // Point to w[0] & y[0].
macp8 (w, x, *y, t); // (uint72_t) w += (uint64_t) x * y; No carry possible.
}
// Multiply and accumulate positive 64-bit partial product into 'w'.
// *w += *x * y_; 't' is a scratchpad for partial product.
uint8_t macp8 (uint8x_t *w, uint8x_t *x, uint8_t y_, uint8p_t *t)
{ // (uint64_t) w += (uint64_t) x * (uint8_t) y;
*t = multiply_8_1 (t + 1, x, y_); // (uint72_t) t = (uint64_t) x * (uint8_t) y;
return ((uint8_t) add (w, t, 9)); // Return carry; (uint72_t) w += (uint72_t) t;
}
// Binary multiplicaton of 64-bit 'x',
// by 8-bit 'y_',
// leaving result in 64-bit 'w' with a returned carry.
// *w = *x * y_;
uint8_t multiply_8_1 (uint8p_t *w, uint8x_t *x, uint8_t y)
{ // (uint64_t) w = (uint64_t) x * (uint8_t) y;
uint8_16_t t;
w += 8 - 1; // Point to LSB of 'w'.
x += 8 - 1; // Point to LSB of 'x'.
// Compute two-byte partial products.
t.i = (uint16_t) (*x * y); *w = t.c[ LO ]; w--; x--; // w[7].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[6].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[5].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[4].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[3].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[2].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[1].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; // w[0].
return (t.c[ HI ]); // Return MSB of (uint72_t) product.
}
#else
void multiply_8_4 (uint8x_t *w, uint8x_t *x, uint8p_t *y)
{ // (U96) w = (uint64_t) x * (uint32_t) y;
uint8_t pdata t[9];
multiply (w, x, 8, y, 4, t);
}
#endif // MATH FAST.
#if VA_ELEMENT || VA_SUMS
// Binary multiplicaton of 32-bit 'x',
// by 32-bit 'y'.
// leaving result in 64-bit 'w'.
// (uint64x_t) w = (uint32i_t) x * (uint32x_t) y;
#if EXTRAS
#if MATH_FAST
void multiply_4_4 (uint8x_t *w, uint8x_t *x, uint8p_t *y)
{ // (uint64_t) w = (uint32_t) x * (uint32_t) y;
uint8_t pdata t[5];
w += 4 - 1; // Point to low-order 4 bytes of product 'w'.
y += 4 - 1; // Point to y[3] (i.e. LSB of 'y').
memset_x (w, 0, 5); // Initially 5-byte partial product is zero (0).
// (U48) w += (uint32_t) x * y.
*(w - 1) = macp4 (w, x, *y, t); w--; y--; // Point to w[2] & y[2].
*(w - 1) = macp4 (w, x, *y, t); w--; y--; // Point to w[1] & y[1].
*(w - 1) = macp4 (w, x, *y, t); w--; y--; // Point to w[0] & y[0].
macp4 (w, x, *y, t); // (U40) w += (uint32_t) x * y; No carry possible.
}
// Multiply and accumulate a 32-bit partial product 'w'.
// (uint32x_t) w += (uint32i_t) x * (uint8_t) y;
uint8_t macp4 (uint8x_t *w, uint8x_t *x, uint8_t y_, uint8p_t *t)
{ // (uint32_t) w += (uint32_t) x * (uint8_t) y;
*t = multiply_4_1 (t + 1, x, y_); // (U40) t = (uint32_t) x * (uint8_t) y;
return ((uint8_t) add (w, t, 5)); // Return carry; (U40) w += (U40) t;
}
// (uint32x_t) w = (uint32x_t) x * (uint8x_t) y;
uint8_t multiply_4_1 (uint8p_t *w, uint8x_t *x, uint8_t y)
{ // (uint32_t) w = (uint32_t) x * (uint8_t) y;
uint8_16_t t;
w += 4 - 1;
x += 4 - 1;
// Compute two-byte partial products.
t.i = (uint16_t) (*x * y); *w = t.c[ LO ]; w--; x--; // w[3].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[2].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; w--; x--; // w[1].
t.i = (uint16_t) (*x * y) + t.c[ HI ]; *w = t.c[ LO ]; // w[0].
return (t.c[ HI ]); // Return MSB of (U40) product.
}
#else
void multiply_4_4 (uint8x_t *w, uint8x_t *x, uint8p_t *y)
{ // (uint64_t) w = (uint32_t) x * (uint32_t) y;
uint8_t pdata t[5];
multiply (w, x, 4, y, 4, t);
}
#endif // MATH_FAST.
#endif // VA_ELEMENT/VA_SUMS.
#endif
#if EXTRAS
// About twice as fast as general multiply.
// *w = *x ** 2; Square n-byte 'x' into a 2n-byte
void square (uint8x_t *w, uint8p_t *x, uint8_t n)
{ // About twice as fast as general multiply.
uint8p_t *px;
uint8x_t *pw;
uint8_t i, j, x_;
uint8_16_t k, s;
uint8_16_8_32_t t;
w += n + n - 1;
x += n - 1;
pw = w; px = x;
i = n;
do
{
k.i = *px * *px; px--; // x[i]^2.
*pw = k.c[ LO ]; pw--; // LSB of product.
*pw = k.c[ HI ]; pw--; // MSB of product.
} while (--i);
w--;
j = n;
do
{
pw = w;
x_ = *x; x--;
px = x;
k.i = 0;
i = j - 1;
do
{
s.i = *px * x_; // Make sure we use minimum precision.
t.l = (uint32_t) s.i + (uint32_t) s.i + *pw + k.i;
*pw = t.c.c; // =1byte Sum.
k.i = t.c.i; // =2byte Carry.
pw--; px--; // Next.
} while (--i);
k.i += *pw;
*pw = k.c[ LO ]; pw--; // = Carry.
*pw += k.c[ HI ]; //
w -= 2;
} while (--j > 1);
}
#endif
//===========================================================================//
// Binary divide of unnormalized 8m-bit 'u',
// by unnormalized 8n-bit 'v',
// leaving quotient & remainder back in 'u'.
// *u /= *v; 't' = temporary storage, needs to be at least size of dividend.
// The dividend 'u' becomes the quotient and remainder;
// Remainder is in the low order part of 'u'.
// divide() returns how many bytes are in the quotient.
#define unnormalize divide_1 // Unnormalize remainder.
uint8_t divide (uint8x_t *u, uint8_t m, uint8x_t *v, uint8_t n, uint8p_t *t)
{ // ((m-byte) u /= (n-byte) v.
uint8_t d_; // When calling make sure (u - 1) is allocated.
do // Align divisor.
{
if (*v != 0) break; // Found first significant digit.
v++; // Reduce divisor.
} while (--n);
if ((m >= n) && (n > 0))
{
d_ = normalize (u, m, v, n, t); // Normalize dividend and divisor.
divide_ (u - 1, m, t, n, t + n);// Do normalized divide.
unnormalize (u + m - n, n, d_); // Unnormalize remainder.
}
return ((m >= n) ? m - n : 0); // Size of quotient (in bytes).
}
//===========================================================================//
// Takes a 64-bit 'w',
// Returns a 32-bit number containing the least significant 6 decimal digits.
#if EXTRAS
uint32_t Mod10_6 (uint8x_t *w)
{ // 'w' is a 8-byte quantity.
uint8_t xdata x[9]; // Need extra digit for divide.
uint32_t xdata scale;
uint8_t pdata t[9];
memcpy_xx (x + 1, w, 8); // Copy, divide is destructive.
scale = 1000000L; // Modulo ONE_MILLION.
divide (x + 1, 8, (uint8x_t *)&scale, 4, (uint8p_t *)&t[0]); // x / 1,000,000;
x[5] = 0; // Clear MSB of 4-byte result.
return (* (uint8x_t *) &x[5]); // Return 32-bit (x % 1,000,000).
}
#endif // PULSE_CNT
// Normalize 8m-bit dividend 'u' and 8n-bit divisor 'v' so division is possible.
// It makes the divisor 'v' such that the first digit is..
// ..greater than half the first digit of the dividend 'u';
// 't' = temporary storage, allocate at least 'm' bytes.
uint8_t normalize (uint8x_t *u, uint8_t m, uint8x_t *v, uint8_t n, uint8p_t *t)
{
uint8_t d_;
d_ = 0x100 / (*v + 1); // Value needed to normalize divisor.
*(u - 1) = multiply_1 (t, u, m, d_);// Normalize dividend (i.e. u *= d).
memcpy_xp (u, t, m);
multiply_1 (t, v, n, d_); // Normalize divisor (i.e. t = v * d).
return (d_); // Return 'd' (normalizing multiplier).
}
// Binary divide of normalized 8m-bit 'u',
// by normalized 8n-bit 'v',
// leaving quotient & remainder back in 'u'.
// *u /= *v; like divide, except it assumes everything is already normalized.
void divide_ (uint8x_t *u, uint8_t m, uint8p_t *v, uint8_t n, uint8p_t *v0)
{ // We're already normalized!
uint8_t j, q_, vn1, vn2; // Dividend replaced w/ quotient and remainder.
uint16_t t;
uint8_16_8_32_t r;
// Point to MSB of dividend.
vn1 = v[ 0 ]; // Fetch two MSBs of divisor.
vn2 = v[ 1 ];
r.c.f = 0; // Clear MSB of 'r'.
j = m - n; // Loop 'm - n + 1' times.
do
{
t = * (int16x_t *) u; // Fetch two MSBs of current dividend.
r.c.c = *(u + 2); // Fetch next MSB of dividend.
if (*u == vn1)
q_ = 0xFF; // base - 1.
else
q_ = t / vn1; // Guess at digit of quotient.
r.c.i = t - (q_ * vn1);
while ((uint32_t) (vn2 * q_) > r.l)
{
q_--; // Adjust 'q_' guess.
r.c.i = t - (q_ * vn1);
}
if (macn (u, v, n, q_, v0) != 0) // u -= v * q_;
{
q_--; // Quotient off by 1.
*u += (uint8_t) add (u + 1, v, n);
} // u += v (i.e. add back).
*u = q_; // Store quotient.
u++; // Scan down dividend.
} while (j--); // Loop 'm+1' times.
}
// Multiply and accumulate positive 8n-bit partial product into 'w'.
// Multiplication of 8n-bit 'v',
// and 8-bit 'q_', accumulated positive into 'w'.
// *u -= *v * q_; q_ is the intermediate quotient, a single byte,
// 't' is a scratchpad for partial products, allocate at least 'n' bytes.
uint8_t macn (uint8x_t *u, uint8p_t *v, uint8_t n, uint8_t q_, uint8p_t *t)
{
uint8_t xdata v0[4];
memcpy_xp (v0, v, n);
*t = multiply_1 (t + 1, v0, n, q_);
return ((uint8_t) sub (u, t, n + 1));
}
// *u /= v;
void divide_1 (uint8x_t *u, uint8_t n, uint8_t v)
{ // (n-byte) u /= (n-byte) v;
uint8_16_t t;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -