📄 adjlogs.c
字号:
int type;{int i;if (type == ALOG_EVENT_PAIR_A1) return 1;for (i=0; i<a1p; i++) if (type == a1event[i]) return 1;return 0;}int is_a2_event( type )int type;{int i;if (type == ALOG_EVENT_PAIR_A2) return 1;for (i=0; i<a2p; i++) if (type == a2event[i]) return 1;return 0;}int is_b1_event( type )int type;{int i;if (type == ALOG_EVENT_PAIR_B1) return 1;for (i=0; i<b1p; i++) if (type == b1event[i]) return 1;return 0;}unsigned long GlobalTime( time, p, nsync )unsigned long time;int p, nsync;{unsigned long gtime, stime1, stime2;unsigned long frac;unsigned long tdiff;unsigned long ScaleLong();/* Problem: since times are UNSIGNED, we have to be careful about how they are adjusted. time - synctime may not be positive. We make sure that all of the subexpressions are unsigned longs */if (time >= globaloffset[p]) { tdiff = time - globaloffset[p]; frac = ScaleLong( numer[p], denom[p], tdiff ); gtime = frac + globaloffset[0]; }else { tdiff = globaloffset[p] - time; frac = ScaleLong( numer[p], denom[p], tdiff ); if (frac > globaloffset[0]) printf( "Oops!\n" ); gtime = globaloffset[0] - frac; }return gtime;}/* This routine takes offset events and solves for the offsets. The approach is: Let the global time be given by (local_time - offset)*scale , with a different offset and scale on each processor. Each processor originates exactly one communication event (except processor 0), generating an a1 and a2 event. A corresponding number of b2 events are generated, but note that one processor may have more than 1 b2 event (if using Dunnigan's synchronization, there will be np-1 b2 events on processor 0, and none anywhere else). These events are: pi a1 (send to nbr) (recv) a2 pj (recv) b1 (send back) We base the analysis on the assumption that in the GLOBAL time repreresentation, a2-a1 is twice the time to do a (send) and a (recv). This is equivalent to assuming that global((a1+a2)/2) == global(b1). Then, with the unknowns the offsets (the scales are assumed known from the syncevent calculation), the matrix is 1 -s0 s1 .... -sj ... si where si is the scale for the i'th processor (note s0 = 1). The right hand sides are (1/2)(a1(i)+a2(i)) *s(i) - b1(j)*s(j). Because of the triangular nature of the matrix, this reduces to o(i) = (a1(i)+a2(i))/2 - (s(j)/s(i)) * (b1(j)-o(j)) Note that if s(i)==s(j) and b1 == (a1+a2)/2, this gives o(i)==o(j). */void ComputeOffsets( np )int np;{int i, j;unsigned long d1, delta;unsigned long ScaleLong();/* If there aren't enough events, return */if (noffsetevents != np - 1) { if (noffsetevents != 0) fprintf( stderr, "Incorrect number of offset events to compute clock offsets\n" ); else fprintf( stderr, "No clock offset events\n" ); return; }/* Take globaloffset[0] from sync */for (i=1; i<np; i++) { /* o(i) = (a1(i)+a2(i))/2 - (s(j)/s(i)) * (b1(j)-o(j)) */ j = offsetevents[i].p1; /* Compute a1(i)+a2(i)/2. Do this by adding half the difference; this insures that we avoid overflow */ d1 = (offsetevents[i].a2 - offsetevents[i].a1)/2; d1 = offsetevents[i].a1 + d1; /* We form (b1-o(j))(s(j)/s(i)) by noting that s(j)/s(i) == denom(i)/denom(j) (since numer(i)==numer(j)) */ delta = ScaleLong( denom[i], denom[j], offsetevents[i].b1 - globaloffset[j] ); globaloffset[i] = d1 - delta; }}#include <mp.h>static MINT *prod, *qq, *rr;static int mpallocated = 0;unsigned long ScaleLong( n, d, v )unsigned long n, d, v;{char buf[40];char *s;MINT *nn, *dd, *vv;unsigned long q, r;if (!mpallocated) { prod = itom(0); if (!prod) { fprintf( stderr, "Could not allocate mp int\n" ); exit(0); } qq = itom(0); if (!qq) { fprintf( stderr, "Could not allocate mp int\n" ); exit(0); } rr = itom(0); if (!rr) { fprintf( stderr, "Could not allocate mp int\n" ); exit(0); } mpallocated = 1; }sprintf( buf, "%x", n );nn = xtom(buf);if (!nn) { fprintf( stderr, "Could not allocate mp int\n" ); exit(0); }sprintf( buf, "%x", v );vv = xtom(buf);if (!vv) { fprintf( stderr, "Could not allocate mp int\n" ); exit(0); }sprintf( buf, "%x", d );dd = xtom(buf);if (!dd) { fprintf( stderr, "Could not allocate mp int\n" ); exit(0); }mult(nn,vv,prod);mdiv(prod,dd,qq,rr);s = mtox(qq);sscanf( s, "%x", &q );free( s );s = mtox(rr);sscanf( s, "%x", &r );free( s );/* Free the locals */mfree( nn );mfree( dd );mfree( vv );return q;}/* Here is not-quite working code for multiple precision arithmetic */#ifdef DO_MULTIPLE_ARITH/* This routine takes a value v and scales it by (n/d). This routine handles integer overflow by using the following formulas: Let h(u) = high 16 bits of u, and l(u) = low 16 bits of u. Then v *(n/d) = (l(v)+h(v))*(l(u)+h(u))/d = l(v)l(n)/d + (l(n)h(v)+l(v)h(n))/d + h(v)h(n)/d == a1/d + a2/d + a3/d In order to keep the values in-range, we define low(u)=l(u) and high(u) = h(u) >> 16. Then this formula becomes (with high substituted for h): a1/d + (a2<<16)/d + (a3<<32)/d Now, when doing the integer division, we need to propagate the remainders. Let the result be r. Then rd = a1 + (a2<<16) + (a3<<32) if a1 = k1 d + b1, (a2 << 16) = (k2 d + b2), and (a3 << 32) = (k3 d + b3), then r d = (k1 d + b1) + (k2 d + b2) + (k3 d + b3); = (k1 + k2 + k3) d + (b1 + b2 + b3) and so r = (k1 + k2 + k3) + (b1 + b2 + b3) / d To compute (k2,b2) and (k3,b3), we do: (a2<<16)/d: a2 = p2 d + c2 a2 << 16 = p2 d << 16 + c2 << 16 = (p2 << 16) d + c2 << 16 Let c2 << 16 = r2 d + s2, then (finally!) a2 << 16 = (p2 << 16 + r2) d + s2 (a3 << 32)/d: a3 = p3 d + c3 a3 << 32 = p3 d << 32 + c3 << 32 = (p3 << 32) d + c3 << 32 Computing c3 << 32 = r3 d + s3 is a challange, particularly since we need only the low 32 bits (the high 32 bits will be 0) We do this in stages as well: c3 << 32 = (c3 << 16) << 16; (c3 << 16) = t3 d + u3 (c3 << 32) = (t3 << 16)d + u3 << 16 = (t3 << 16 + y3)d + z3, == r3 d + s3 where u3 << 16 = y3 d + z3 Then a3 << 32 = (p3 << 32 + r3) d + s3 */void DivLong();/* ScaleDecomp - convert (a << p) = alpha d + beta, with beta < d This works by recursively: a = b d + r, a<<p = (b << p)d + (r<<p) then process r<<p to bd + r' etc until b == 0 */void ScaleDecomp( a, p, d, alpha, beta )int p;unsigned long a, d, *alpha, *beta;{unsigned long b, r;unsigned long Alpha, Beta;int p1;Alpha = 0; Beta = 0;b = a / d;r = a % d;Alpha = b << p;/* We need to gingerly deal with r, since shifting it by much may make it too large, particularly if d is nearly 32 bits. What we need is r << p = gamma d + delta, with r < d. This is really the hard part. We can not assume that d is much smaller than 32 bits, so this is tricky. */DivLong( r, d, (unsigned long)(1 << p), &b, &r );Alpha += b;*beta = r;#ifdef FOOwhile (p > 1 && r > 0) { p1 = p/2; r = (r << p1); b = r / d; r = r % d; Alpha += b << (p-p1); p = (p - p1); }*alpha = Alpha;*beta = r << p;#endif}#define LOWBITS(a) (unsigned long)((a)&lowmask)#define HIGHBITS(a) (unsigned long)( ((a) >> 16 ) & lowmask )#include <mp.h>unsigned long ScaleLong( n, d, v )unsigned long n, d, v;{#ifdef FOO#define LOWBITS(a) (unsigned long)((a)&lowmask)#define HIGHBITS(a) (unsigned long)( ((a) >> 16 ) & lowmask )unsigned long a1, a21, a22, a3, k1, k21, k22, k3, b1, b21, b22, b3;DivLong( n, d, v, &k1, &b1 );return k1 + b1/d;a1 = LOWBITS(v)*LOWBITS(n);a21 = LOWBITS(v)*HIGHBITS(n);a22 = LOWBITS(n)*HIGHBITS(v);a3 = HIGHBITS(v)*HIGHBITS(n);k1 = a1 / d;b1 = a1 % d;ScaleDecomp( a21, 16, d, &k21, &b21 );ScaleDecomp( a22, 16, d, &k22, &b22 );ScaleDecomp( a3, 32, d, &k3, &b3 );return (k1 + k21 + k22 + k3) + (b1 + b21 + b22 + b3) / d;#elsechar buf[40];MINT *nn, *dd, *vv, *prod, *qq, *rr;unsigned long q, r;sprintf( buf, "%x", n );nn = xtom(buf);sprintf( buf, "%x", v );vv = xtom(buf);sprintf( buf, "%x", d );dd = xtom(buf);prod = itom(0);qq = itom(0);rr = itom(0);mult(nn,vv,prod);mdiv(prod,dd,qq,rr);sscanf( mtox(qq), "%x", &q );sscanf( mtox(rr), "%x", &r );return q;#endif}/* Represent nv = alpha d + beta */void DivLong( n, d, v, alpha, beta )unsigned long n, d, v;unsigned long *alpha, *beta;{unsigned long a1, a21, a22, a3, k1, k21, k22, k3, b1, b21, b22, b3;a1 = LOWBITS(v)*LOWBITS(n);a21 = LOWBITS(v)*HIGHBITS(n);a22 = LOWBITS(n)*HIGHBITS(v);a3 = HIGHBITS(v)*HIGHBITS(n);k1 = a1 / d;b1 = a1 % d;ScaleDecomp( a21, 16, d, &k21, &b21 );ScaleDecomp( a22, 16, d, &k22, &b22 );ScaleDecomp( a3, 32, d, &k3, &b3 );*alpha = k1 + k21 + k22 + k3;*beta = b1 + b21 + b22 + b3;}#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -