📄 group.mx
字号:
} cnt[h]++; } } } /* postprocess by dividing sums by counts */ BATloopFast(bn, p, q, xx) { int h = (int)(*(oid*) BUNhead(bn,p)) - min; @1 *dst = (@1*) BUN@2(bn, p); if (cnt[h] == 0 || sums[h] == @1_nil) { *dst = @1_nil; } else { *dst = sums[h]/cnt[h]; } } GDKfree(sums); GDKfree(cnt);@= arithpumpgroup_export str GRPsum_@1(int *retval, int *bid,int *eid, bit *ignore_nils);str GRPsum_@1(int *retval, int *bid,int *eid, bit *ignore_nils){ BAT *b = NULL, *e = NULL; BAT *bn = BATnew(e->htype, BATttype(b), BATcount(e)); @1 zero = (@1) 0, *sums; BUN p, q, r; int xx,i,range,min,max; (void) ignore_nils; /* fool compiler */ if( (b= BATdescriptor(*bid)) == NULL ){ throw(MAL, "group.sum", "Cannot access descriptor"); } @:chkHeader(b,GRPsum_@1)@ if( (e= BATdescriptor(*eid)) == NULL ){ BBPreleaseref(b->batCacheid); throw(MAL, "group.sum", "Cannot access descriptor"); } @:chkHeader(e,GRPsum_@1)@ /* init: set all sums to zero */ bn->tsorted = bn->hsorted = 0; ALIGNsetH(bn, e); if (BAThordered(e)&1) { min = (int)(*(oid*)BUNhead(e, BUNfirst(e))); BATloopFast(e, p, q, xx) { BUNfastins(bn, BUNhead(e,p), &zero); } max = (int)(*(oid*)BUNhead(e, BUNlast(e)-BUNsize(e))); } else { min = max = (int)(*(oid*)BUNhead(e, BUNfirst(e))); BATloopFast(e, p, q, xx) { BUNfastins(bn, BUNhead(e,p), &zero); i = (int)(*(oid*)BUNhead(e, p)); if (i < min) min = i; else if (i > max) max = i; } } range = max - min + 1; /* scan b, and add values to sums in-place */ if (range > SMALL_AGGR_MAX) { ALGODEBUG THRprintf(GDKout, "#GRPsum_@1: range(=%d) > SMALL_AGGR_MAX(=%d) => large_aggr_sum\n",range,SMALL_AGGR_MAX); @:large_aggr_sum(@1,@2)@ } else { ALGODEBUG THRprintf(GDKout, "#GRPsum_@1: range(=%d) <= SMALL_AGGR_MAX(=%d) => small_aggr_sum\n",range,SMALL_AGGR_MAX); @:small_aggr_sum(@1,@2)@ } *retval= bn->batCacheid; BBPkeepref(*retval); BBPreleaseref(b->batCacheid); BBPreleaseref(e->batCacheid); return MAL_SUCCEED;}group_export str GRPavg_@1(int *retval, int *bid,int *eid);str GRPavg_@1(int *retval, int *bid,int *eid){ BAT *b = NULL, *e = NULL; BAT *bn = BATnew(e->htype, BATttype(b), BATcount(e)); int xx, yy = 0, off = BUNindex(bn,BUNfirst(bn)); int *cnt = (int*) GDKmalloc(BATcount(e)*sizeof(int)); @1 zero = (@1) 0; BUN p, q, r; if( (b= BATdescriptor(*bid)) == NULL ){ throw(MAL, "group.@1", "Cannot access descriptor"); } @:chkHeader(b,GRPsum_@1)@ if( (e= BATdescriptor(*eid)) == NULL ){ BBPreleaseref(b->batCacheid); throw(MAL, "group.@1", "Cannot access descriptor"); } @:chkHeader(e,GRPsum_@1)@ /* init sums and counts to zero */ bn->tsorted = bn->hsorted = 0; ALIGNsetH(bn, e); memset(cnt, 0, BATcount(e)*sizeof(int)); BATloopFast(e, p, q, xx) { BUNfastins(bn, BUNhead(e,p), &zero); } /* scan b, adding sums, and incrementing counts */ (void) BATprepareHash(bn); BATloopFast(b, p, q, xx) { oid *h = (oid*) BUNhead(b,p); @1 *t = (@1*) BUNtail(b,p); BUNfndOID(r,bn,h); if (r) { @1 *dst = (@1*) BUNtloc(bn, r); if (*dst != @1_nil) { if (*t == @1_nil) { *dst = @1_nil; } else { *dst += *t; } cnt[BUNindex(bn,r)-off]++; } } } /* postprocess by dividing sums by counts */ BATloopFast(bn, p, q, xx) { @1 *dst = (@1*) BUNtail(bn, p); if (cnt[yy] == 0) { *dst = @1_nil; } else if (*dst != @1_nil) { *dst /= cnt[yy]; } yy++; } GDKfree(cnt); *retval= bn->batCacheid; BBPkeepref(*retval); BBPreleaseref(b->batCacheid); BBPreleaseref(e->batCacheid); return MAL_SUCCEED;}@c@:arithpump(sht,tloc)@@:arithpump(int,tloc)@@:arithpump(lng,tloc)@@:arithpump(flt,tloc)@@:arithpump(dbl,tloc)@@= extremegroup_export str GRP@1(int *retval, int *bid,int *eid);str GRP@1(int *retval, int *bid,int *eid){ BAT *b = NULL, *e = NULL; BAT *bn; int (*cmp)(ptr,ptr); ptr nil; int xx, yy, off; ptr *extremes; BUN p, q, r; if( (b= BATdescriptor(*bid)) == NULL ){ throw(MAL, "group.@1", "Cannot access descriptor"); } @:chkHeader(b,GRP@1)@ if( (e= BATdescriptor(*eid)) == NULL ){ BBPreleaseref(b->batCacheid); throw(MAL, "group.@1", "Cannot access descriptor"); } @:chkHeader(e,GRP@1)@ bn = BATnew(e->htype, BATttype(b), BATcount(e)); cmp = BATatoms[bn->ttype].atomCmp; nil = ATOMnilptr(bn->ttype); yy = 0; off = BUNindex(e,BUNfirst(e)); extremes = (ptr*) GDKmalloc(BATcount(e)*sizeof(ptr)); /* init: set all extremes to the zero pointer */ memset(extremes, 0, BATcount(e)*sizeof(ptr)); /* scan b and replace totals by the extreme value (just pointers to vals in b) */ (void) BATprepareHash(e); BATloopFast(b, p, q, xx) { oid *h = (oid*) BUNhead(b,p); ptr t = BUNtail(b,p); BUNfndOID(r,e,h); if (r) { ptr *val = extremes + (BUNindex(e,r) - off); if (*val != nil) { if ((*cmp)(t,nil) == 0) { *val = nil; } else if (*val == NULL || (*cmp)(t,*val) @2 0) { *val = t; } } } } /* insert the extreme values into the result */ BATloopFast(e, p, q, xx) { ptr val = extremes[yy++]; BUNfastins(bn, BUNhead(e,p), val?val:nil); } bn->tsorted = bn->hsorted = 0; ALIGNsetH(bn, e); GDKfree(extremes); *retval= bn->batCacheid; BBPkeepref(*retval); BBPreleaseref(b->batCacheid); BBPreleaseref(e->batCacheid); return MAL_SUCCEED;}@c@:extreme(min,<)@@:extreme(max,>)@@-The simple extreme operations exploit the grouping and orderproperties to speed up the process.@= xqMinMaxImplgroup_export str GRP@1_oid_@2(int *retval, int *bid);str GRP@1_oid_@2(int *retval, int *bid){ BAT *b; BAT *bn; oid grp= oid_nil; @2 *val= 0; int xx; BUN p, q; if( (b= BATdescriptor(*bid)) == NULL ){ throw(MAL, "group.@1", "Cannot access descriptor"); } /* use expensive operation when the BAT is not ordered on the head */ if( !(b->hsorted&1) ){ /* determine groups first */ BBPreleaseref(b->batCacheid); throw(MAL, "group","not yet implemented"); } @:chkHeader(b,GRP@1_oid_@2)@ bn = BATnew(BAThtype(b), BATttype(b), BATcount(b)); BATloopFast(b, p, q, xx) { oid h = *(oid*) BUNhead(b,p); @2 t = *(@2*) BUNtail(b,p); if( h != grp){ /* switch to a new group */ BUNins(bn, &h, &t, FALSE); grp= h; val = (@2*) BUNtail(bn,BUNlast(bn)-xx); } else if( t @3 *val ) *val = t; } bn->hsorted = b->hsorted ; bn->tsorted = b->tsorted ; BBPkeepref(*retval= bn->batCacheid); BBPreleaseref(b->batCacheid); return MAL_SUCCEED;}@c @:xqMinMaxImpl(min,oid,<)@ @:xqMinMaxImpl(min,sht,<)@ @:xqMinMaxImpl(min,int,<)@ @:xqMinMaxImpl(min,lng,<)@ @:xqMinMaxImpl(min,flt,<)@ @:xqMinMaxImpl(min,dbl,<)@ @:xqMinMaxImpl(max,oid,>)@ @:xqMinMaxImpl(max,sht,>)@ @:xqMinMaxImpl(max,int,>)@ @:xqMinMaxImpl(max,lng,>)@ @:xqMinMaxImpl(max,flt,>)@ @:xqMinMaxImpl(max,dbl,>)@@-@= large_aggr_count (void) BATprepareHash(bn); BATloopFast(b, p, q, xx) { oid *h = (oid*) BUNhead(b,p); BUNfndOID(r, bn, h); if (r) { ptr t = BUNtail(b,p); (void) t ; /* fool compiler */ /*if (ATOMcmp(btt,t,bt_nil)) {*/ @1 { int *dst = (int*) BUNtloc(bn, r); (*dst)++; } } }@= small_aggr_count cnt = (int*) GDKmalloc(range*sizeof(int)); memset(cnt, 0, range*sizeof(int)); BATloopFast(b, p, q, xx) { int h = (int)(*(oid*) BUNhead(b,p)) - min; if (h >= 0 && h < range) { ptr t = BUNtail(b,p); (void)t; /* fool compiler */ /* if (ATOMcmp(btt,t,bt_nil)) {*/ @1 { cnt[h]++; } } } BATloopFast(bn, p, q, xx) { int h = (int)(*(oid*) BUNhead(bn,p)) - min; *(int*)BUNtloc(bn, p) = cnt[h]; } GDKfree(cnt);@cgroup_export str GRPaggr_count(int *retval, int *bid, int *eid, bit *ignore_nils);strGRPaggr_count(int *retval, int *bid, int *eid, bit *ignore_nils){ BAT *b = NULL, *e = NULL; BAT *bn; int zero = 0, *cnt; BUN p, q, r; int xx, i, range, min, max; int btt; ptr bt_nil; if ((b = BATdescriptor(*bid)) == NULL) { throw(MAL, "group.count", "Cannot access descriptor"); } @:chkHeader(b,GRPaggr_count)@ if ((e = BATdescriptor(*eid)) == NULL) { BBPreleaseref(b->batCacheid); throw(MAL, "group.count", "Cannot access descriptor"); } @:chkHeader(e,GRPaggr_count)@ bn = BATnew(e->htype, TYPE_int, BATcount(e)); btt = b->ttype; bt_nil = ATOMnilptr(b->ttype); /* init: set all sums to zero */ bn->tsorted = bn->hsorted = 0; ALIGNsetH(bn, e); if (BAThordered(e) & 1) { min = (int) (*(oid *) BUNhead(e, BUNfirst(e))); BATloopFast(e, p, q, xx) { BUNfastins(bn, BUNhead(e, p), &zero); } max = (int) (*(oid *) BUNhead(e, BUNlast(e) - BUNsize(e))); } else { min = max = (int) (*(oid *) BUNhead(e, BUNfirst(e))); BATloopFast(e, p, q, xx) { BUNfastins(bn, BUNhead(e, p), &zero); i = (int) (*(oid *) BUNhead(e, p)); if (i < min) min = i; else if (i > max) max = i; } } range = max - min + 1; /* scan b, and add increment totals for true values */ if (range > SMALL_AGGR_MAX) { if (*ignore_nils) { ALGODEBUG THRprintf(GDKout, "#CMDaggr_count: range(=%lu) > SMALL_AGGR_MAX(=%d) => large_aggr_count(if (ATOMcmp(btt,t,bt_nil)))\n", (unsigned long) range, SMALL_AGGR_MAX); @:large_aggr_count(if (ATOMcmp(btt, t, bt_nil)))@ } else { ALGODEBUG THRprintf(GDKout, "#CMDaggr_count: range(=%lu) > SMALL_AGGR_MAX(=%d) => large_aggr_count()\n", (unsigned long) range, SMALL_AGGR_MAX); @:large_aggr_count()@ } } else { if (*ignore_nils) { ALGODEBUG THRprintf(GDKout, "#CMDaggr_count: range(=%lu) <= SMALL_AGGR_MAX(=%d) => small_aggr_count(if (ATOMcmp(btt,t,bt_nil)))\n", (unsigned long) range, SMALL_AGGR_MAX); @:small_aggr_count(if (ATOMcmp(btt, t, bt_nil)))@ } else { ALGODEBUG THRprintf(GDKout, "#CMDaggr_count: range(=%lu) <= SMALL_AGGR_MAX(=%d) => small_aggr_count()\n", (unsigned long) range, SMALL_AGGR_MAX); @:small_aggr_count()@ } } *retval = bn->batCacheid; BBPkeepref(*retval); BBPreleaseref(b->batCacheid); BBPreleaseref(e->batCacheid); return MAL_SUCCEED;}group_export str GRPsize(int *retval, int *bid, int *eid);strGRPsize(int *retval, int *bid, int *eid){ BAT *b = NULL, *e = NULL; BAT *bn; int zero = 0; BUN p, q, r; int xx; if ((b = BATdescriptor(*bid)) == NULL) { throw(MAL, "group.size", "Cannot access descriptor"); } @:chkHeader(b,GRPsize)@ if ((e = BATdescriptor(*eid)) == NULL) { BBPreleaseref(b->batCacheid); throw(MAL, "group.size", "Cannot access descriptor"); } @:chkHeader(e,GRPsize)@ bn = BATnew(e->htype, TYPE_int, BATcount(e)); /* init: set all sums to zero */ bn->tsorted = bn->hsorted = 0; ALIGNsetH(bn, e); BATloopFast(e, p, q, xx) { BUNfastins(bn, BUNhead(e, p), &zero); } /* scan b, and add increment totals for true values */ (void) BATprepareHash(bn); BATloopFast(b, p, q, xx) { if (*(bit *) BUNtloc(b, p) == TRUE) { oid *h = (oid *) BUNhead(b, p); BUNfndOID(r, bn, h); if (r) { int *dst = (int *) BUNtloc(bn, r); (*dst)++; } } } *retval = bn->batCacheid; BBPkeepref(*retval); BBPreleaseref(b->batCacheid); BBPreleaseref(e->batCacheid); return MAL_SUCCEED;}@-The window aggregate functions@= windowsumgroup_export str GRPslidingsum_@1(int *retval, int *bid, int *size, int *shift);str GRPslidingsum_@1(int *retval, int *bid, int *size, int *shift){ BAT *b, *bn; @2 sum; oid o= oid_nil; BUN p, q; int xx,i; if( *shift <=0){ throw(MAL, "group.sum", "Illegal shift value"); } if( (b= BATdescriptor(*bid)) == NULL ){ throw(MAL, "group.sum", "Cannot access descriptor"); } @:chkHeader(b,GRPwindowsum_@1)@ /* init: set all sums to zero */ bn= BATnew( TYPE_void, TYPE_@2, BATcount(b)/ *size +1); bn->tsorted = bn->hsorted = 0; BATseqbase(bn,0); i= *size; sum =0; BATloopFast(b, p, q, xx) { sum += *(@1*) BUNtail(b,p); if( --i == 0){ BUNfastins(bn, &o, &sum); /* slide here */ if( *size != *shift ){ p-= (*size-*shift)*xx; } i= *size; sum =0; } } if(i != *size) BUNfastins(bn, &o, &sum); *retval= bn->batCacheid; BBPkeepref(*retval); BBPreleaseref(b->batCacheid); return MAL_SUCCEED;}group_export str GRPwindowsum_@1(int *retval, int *bid, int *size);str GRPwindowsum_@1(int *retval, int *bid, int *size){ return GRPslidingsum_@1(retval,bid,size,size);}@c@:windowsum(sht,lng)@@:windowsum(int,lng)@@:windowsum(lng,lng)@@:windowsum(flt,dbl)@@:windowsum(dbl,dbl)@@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -