📄 sc_nbutils.cpp
字号:
}// Compute u -= v, where u is vectors, and v is a scalar.void vec_sub_small_on(int ulen, sc_digit *u, sc_digit v){#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL));#endif for (register int i = 0; i < ulen; ++i) { v = (u[i] + DIGIT_RADIX) - v; u[i] = v & DIGIT_MASK; v = 1 - (v >> BITS_PER_DIGIT); }#ifdef DEBUG_SYSTEMC assert(v == 0);#endif}// Compute w = u * v, where w, u, and v are vectors.voidvec_mul(int ulen, const sc_digit *u, int vlen, const sc_digit *vbegin, sc_digit *wbegin){ /* Consider u = Ax + B and v = Cx + D where x is equal to HALF_DIGIT_RADIX. In other words, A is the higher half of u and B is the lower half of u. The interpretation for v is similar. Then, we have the following picture: u_h u_l u: -------- -------- A B v_h v_l v: -------- -------- C D result (d): carry_before: -------- -------- carry_h carry_l result_before: -------- -------- -------- -------- R1_h R1_l R0_h R0_l -------- -------- BD_h BD_l -------- -------- AD_h AD_l -------- -------- BC_h BC_l -------- -------- AC_h AC_l result_after: -------- -------- -------- -------- R1_h' R1_l' R0_h' R0_l' prod_l = R0_h|R0_l + B * D + 0|carry_l = R0_h|R0_l + BD_h|BD_l + 0|carry_l prod_h = A * D + B * C + high_half(prod_l) + carry_h = AD_h|AD_l + BC_h|BC_l + high_half(prod_l) + 0|carry_h carry = A * C + high_half(prod_h) = AC_h|AC_l + high_half(prod_h) R0_l' = low_half(prod_l) R0_h' = low_half(prod_h) R0 = high_half(prod_h)|low_half(prod_l) where '|' is the concatenation operation and the suffixes 0 and 1 show the iteration number, i.e., 0 is the current iteration and 1 is the next iteration. NOTE: sc_max(prod_l, prod_h, carry) <= 2 * x^2 - 1, so any of these numbers can be stored in a digit. NOTE: low_half(u) returns the lower BITS_PER_HALF_DIGIT of u, whereas high_half(u) returns the rest of the bits, which may contain more bits than BITS_PER_HALF_DIGIT. */#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL)); assert((vlen > 0) && (vbegin != NULL)); assert(wbegin != NULL);#endif#define prod_h carry const sc_digit *uend = (u + ulen); const sc_digit *vend = (vbegin + vlen); while (u < uend) { sc_digit u_h = (*u++); // A|B sc_digit u_l = low_half(u_h); // B u_h = high_half(u_h); // A#ifdef DEBUG_SYSTEMC // The overflow bits must be zero. assert(u_h == (u_h & HALF_DIGIT_MASK));#endif register sc_digit carry = 0; register sc_digit *w = (wbegin++); register const sc_digit *v = vbegin; while (v < vend) { sc_digit v_h = (*v++); // C|D sc_digit v_l = low_half(v_h); // D v_h = high_half(v_h); // C#ifdef DEBUG_SYSTEMC // The overflow bits must be zero. assert(v_h == (v_h & HALF_DIGIT_MASK));#endif sc_digit prod_l = (*w) + u_l * v_l + low_half(carry); prod_h = u_h * v_l + u_l * v_h + high_half(prod_l) + high_half(carry); (*w++) = concat(low_half(prod_h), low_half(prod_l)); carry = u_h * v_h + high_half(prod_h); } (*w) = carry; }#undef prod_h}// Compute w = u * v, where w and u are vectors, and v is a scalar. // - 0 < v < HALF_DIGIT_RADIX.voidvec_mul_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *w){#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL)); assert(w != NULL); assert((0 < v) && (v < HALF_DIGIT_RADIX));#endif#define prod_h carry const sc_digit *uend = (u + ulen); register sc_digit carry = 0; while (u < uend) { sc_digit u_AB = (*u++);#ifdef DEBUG_SYSTEMC // The overflow bits must be zero. assert(high_half(u_AB) == high_half_masked(u_AB));#endif sc_digit prod_l = v * low_half(u_AB) + low_half(carry); prod_h = v * high_half(u_AB) + high_half(prod_l) + high_half(carry); (*w++) = concat(low_half(prod_h), low_half(prod_l)); carry = high_half(prod_h); } (*w) = carry;#undef prod_h}// Compute u = u * v, where u is a vector, and v is a scalar.// - 0 < v < HALF_DIGIT_RADIX. voidvec_mul_small_on(int ulen, sc_digit *u, sc_digit v){#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL)); assert((0 < v) && (v < HALF_DIGIT_RADIX));#endif#define prod_h carry register sc_digit carry = 0; for (register int i = 0; i < ulen; ++i) {#ifdef DEBUG_SYSTEMC // The overflow bits must be zero. assert(high_half(u[i]) == high_half_masked(u[i]));#endif sc_digit prod_l = v * low_half(u[i]) + low_half(carry); prod_h = v * high_half(u[i]) + high_half(prod_l) + high_half(carry); u[i] = concat(low_half(prod_h), low_half(prod_l)); carry = high_half(prod_h); }#undef prod_h#ifdef DEBUG_SYSTEMC if( carry != 0 ) { SC_REPORT_WARNING( sc_core::SC_ID_WITHOUT_MESSAGE_, "vec_mul_small_on( int, sc_digit*, unsigned " "long ) : " "result of multiplication is wrapped around" ); }#endif}// Compute w = u / v, where w, u, and v are vectors. // - u and v are assumed to have at least two digits as uchars.voidvec_div_large(int ulen, const sc_digit *u, int vlen, const sc_digit *v, sc_digit *w){#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL)); assert((vlen > 0) && (v != NULL)); assert(w != NULL); assert(BITS_PER_DIGIT >= 3 * BITS_PER_BYTE);#endif // We will compute q = x / y where x = u and y = v. The reason for // using x and y is that x and y are BYTE_RADIX copies of u and v, // respectively. The use of BYTE_RADIX radix greatly simplifies the // complexity of the division operation. These copies are also // needed even when we use DIGIT_RADIX representation. int xlen = BYTES_PER_DIGIT * ulen + 1; int ylen = BYTES_PER_DIGIT * vlen;#ifdef SC_MAX_NBITS uchar x[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; uchar y[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)]; uchar q[DIV_CEIL2(SC_MAX_NBITS, BITS_PER_BYTE)];#else uchar *x = new uchar[xlen]; uchar *y = new uchar[ylen]; uchar *q = new uchar[xlen - ylen + 1];#endif // q corresponds to w. // Set (uchar) x = (sc_digit) u. xlen = vec_to_char(ulen, u, xlen, x); // Skip all the leading zeros in x. while ((--xlen >= 0) && (! x[xlen])); xlen++; // Set (uchar) y = (sc_digit) v. ylen = vec_to_char(vlen, v, ylen, y); // Skip all the leading zeros in y. while ((--ylen >= 0) && (! y[ylen])); ylen++;#ifdef DEBUG_SYSTEMC assert(xlen > 1); assert(ylen > 1);#endif // At this point, all the leading zeros are eliminated from x and y. // Zero the last digit of x. x[xlen] = 0; // The first two digits of y. register sc_digit y2 = (y[ylen - 1] << BITS_PER_BYTE) + y[ylen - 2]; const sc_digit DOUBLE_BITS_PER_BYTE = 2 * BITS_PER_BYTE; // Find each q[k]. for (register int k = xlen - ylen; k >= 0; --k) { // qk is a guess for q[k] such that q[k] = qk or qk - 1. register sc_digit qk; // Find qk by just using 2 digits of y and 3 digits of x. The // following code assumes that sizeof(sc_digit) >= 3 BYTEs. int k2 = k + ylen; qk = ((x[k2] << DOUBLE_BITS_PER_BYTE) + (x[k2 - 1] << BITS_PER_BYTE) + x[k2 - 2]) / y2; if (qk >= BYTE_RADIX) // qk cannot be larger than the largest qk = BYTE_RADIX - 1; // digit in BYTE_RADIX. // q[k] = qk or qk - 1. The following if-statement determines which: if (qk) { register uchar *xk = (x + k); // A shortcut for x[k]. // x = x - y * qk : register sc_digit carry = 0; for (register int i = 0; i < ylen; ++i) { carry += y[i] * qk; sc_digit diff = (xk[i] + BYTE_RADIX) - (carry & BYTE_MASK); xk[i] = (uchar)(diff & BYTE_MASK); carry = (carry >> BITS_PER_BYTE) + (1 - (diff >> BITS_PER_BYTE)); } // If carry, qk may be one too large. if (carry) { // 2's complement the last digit. carry = (xk[ylen] + BYTE_RADIX) - carry; xk[ylen] = (uchar)(carry & BYTE_MASK); carry = 1 - (carry >> BITS_PER_BYTE); if (carry) { // qk was one too large, so decrement it. --qk; // Since qk was decreased by one, y must be added to x: // That is, x = x - y * (qk - 1) = x - y * qk + y = x_above + y. carry = 0; for (register int i = 0; i < ylen; ++i) { carry += xk[i] + y[i]; xk[i] = (uchar)(carry & BYTE_MASK); carry >>= BITS_PER_BYTE; } if (carry) xk[ylen] = (uchar)((xk[ylen] + 1) & BYTE_MASK); } // second if carry } // first if carry } // if qk q[k] = (uchar)qk; } // for k // Set (sc_digit) w = (uchar) q. vec_from_char(xlen - ylen + 1, q, ulen, w);#ifndef SC_MAX_NBITS delete [] x; delete [] y; delete [] q;#endif}// Compute w = u / v, where u and w are vectors, and v is a scalar.// - 0 < v < HALF_DIGIT_RADIX. Below, we rename w to q.void vec_div_small(int ulen, const sc_digit *u, sc_digit v, sc_digit *q){ // Given (u = u_1u_2...u_n)_b = (q = q_1q_2...q_n) * v + r, where b // is the base, and 0 <= r < v. Then, the algorithm is as follows: // // r = 0; // for (j = 1; j <= n; j++) { // q_j = (r * b + u_j) / v; // r = (r * b + u_j) % v; // } // // In our case, b = DIGIT_RADIX, and u = Ax + B and q = Cx + D where // x = HALF_DIGIT_RADIX. Note that r < v < x and b = x^2. Then, a // typical situation is as follows: // // ---- ---- // 0 r // ---- ---- // A B // ---- ---- ---- // r A B = r * b + u // // Hence, C = (r|A) / v. // D = (((r|A) % v)|B) / v // r = (((r|A) % v)|B) % v#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL)); assert(q != NULL); assert((0 < v) && (v < HALF_DIGIT_RADIX));#endif#define q_h r register sc_digit r = 0; const sc_digit *ubegin = u; u += ulen; q += ulen; while (ubegin < u) { sc_digit u_AB = (*--u); // A|B#ifdef DEBUG_SYSTEMC // The overflow bits must be zero. assert(high_half(u_AB) == high_half_masked(u_AB));#endif register sc_digit num = concat(r, high_half(u_AB)); // num = r|A q_h = num / v; // C num = concat((num % v), low_half(u_AB)); // num = (((r|A) % v)|B) (*--q) = concat(q_h, num / v); // q = C|D r = num % v; }#undef q_h}// Compute w = u % v, where w, u, and v are vectors. // - u and v are assumed to have at least two digits as uchars.voidvec_rem_large(int ulen, const sc_digit *u, int vlen, const sc_digit *v, sc_digit *w){#ifdef DEBUG_SYSTEMC assert((ulen > 0) && (u != NULL)); assert((vlen > 0) && (v != NULL)); assert(w != NULL); assert(BITS_PER_DIGIT >= 3 * BITS_PER_BYTE);#endif // This function is adapted from vec_div_large. int xlen = BYTES_PER_DIGIT * ulen + 1; int ylen = BYTES_PER_DIGIT * vlen;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -