📄 gdk_batop.mx
字号:
} else if (BATtvoid(b) || (BATtordered(b) & 1)) { size_t l, h = 0, o = BUNindex(b, BUNfirst(b)); for (yy = 0; yy < n; yy++) { l = h; if (yy == n - 1) { r = BUNlast(m); h = BUNindex(b, r) - o; } else if (BATtdense(b)) { h = ((*(oid *) seps[yy]) - b->tseqbase) + 1; } else { r = SORTfndlast(m, seps[yy]); h = BUNindex(b, r) - o; } bf = BATslice(bn, l, h); /* produces view bat */ if (BUNins(metabat, seps[yy], &bf->batCacheid, FALSE) == NULL) goto bunins_failed; }@-CASE 3: full scan.If it ain't sorted then we gotta scan the whole thing and split.This is heavy stuff so we optimize execution according to datatype.@DELETED: simple walk-through list approach for(yy=0; yy<n-1; yy++) { if (@@1_LE(val, seps[yy], @@3)) break; } bunfastins(bats[yy], BUNhead(b,r), val);@We now use binary search for getting to the right bucket, sothat this routine also works efficiently on many buckets.@= rangesplit BATloopFast(b, r, s, xx) { ptr val = BUNt@2(b,r); size_t lo = 0; size_t hi = n-1; for (;;) { yy = (lo+hi)/2; if (yy < n-1 && @1_GT(val, seps[yy], @3)) { lo = yy + 1; if (lo >= hi) { yy = hi; break; } } else if (yy > 0 && @1_LE(val, seps[yy-1], @3)) { hi = yy - 1; if (hi <= lo) { yy = lo; break; } } else { break; } } bunfastins(bats[yy], BUNhead(b,r), val); }@c } else { BAT **bats = (BAT **) GDKmalloc(n * sizeof(BAT *)); for (yy = 0; yy < n; yy++) { bats[yy] = part_bat(bn, BAThtype(bn), bn->ttype, sizes[yy], TRUE); if (bats[yy] == NULL) { BBPreclaim(metabat); return NULL; } if (BUNins(metabat, seps[yy], &bats[yy]->batCacheid, FALSE) == NULL) goto bunins_failed; } switch (tpe = ATOMstorage(b->ttype)) {#ifndef NOEXPAND_CHR case TYPE_chr: @:rangesplit(simple,loc,chr)@ break;#endif#ifndef NOEXPAND_BTE case TYPE_bte: @:rangesplit(simple,loc,bte)@ break;#endif#ifndef NOEXPAND_SHT case TYPE_sht: @:rangesplit(simple,loc,sht)@ break;#endif#ifndef NOEXPAND_INT case TYPE_int: @:rangesplit(simple,loc,int)@ break;#endif#ifndef NOEXPAND_FLT case TYPE_flt: @:rangesplit(simple,loc,flt)@ break;#endif#ifndef NOEXPAND_DBL case TYPE_dbl: @:rangesplit(simple,loc,dbl)@ break;#endif#ifndef NOEXPAND_LNG case TYPE_lng: @:rangesplit(simple,loc,lng)@ break;#endif default: if (b->tvarsized) { @:rangesplit(atom,var,tpe)@ } else { @:rangesplit(atom,loc,tpe)@ } break; } for (yy = 0; yy < n; yy++) BBPunfix(bats[yy]->batCacheid); GDKfree(bats); } for (yy = 0; yy < n - 1; yy++) GDKfree(seps[yy]); GDKfree(seps); GDKfree(sizes); return metabat; bunins_failed: BBPreclaim(metabat); return NULL;}@}@+ Introducing OID ColumnsThe @%BATmark@ operation is normally used to prepare a class of queryresults. Likewise, @%BATnumber@ is heavily used in the SQL front-end.@{@cBAT *BATmark(BAT *b, oid oid_base){ BAT *bn; BATcheck(b, "BATmark"); bn = VIEWhead(b); if (bn) { BATseqbase(BATmirror(bn), oid_base); if (BATrestricted(b) != BAT_READ) { BAT *v = bn; bn = BATcopy(v, v->htype, v->ttype, TRUE); BBPreclaim(v); } } return bn;}#define BUNnumber(bx,hx,tx) bunfastins_nocheck(bx, r, hx, (ptr)&i, yy); r += yy; i++;BAT *BATnumber(BAT *b){/* 64bit: BATnumber should return a [any,wrd] bat instead of [any,int] */ int i = 0, yy; BAT *bn; BUN r; BATcheck(b, "BATnumber"); /* assert(BATcount(b) <= MAXINT); */ bn = BATnew(b->htype, TYPE_int, BATcount(b)); if (bn == NULL) return NULL; r = BUNfirst(bn); yy = BUNsize(bn); @:updateloop(bn,b,BUNnumber)@ ALIGNsetH(bn, b); BATsetprop_wrd(bn, GDK_AGGR_CARD, i); /* 64bit: no (wrd) cast to remind us */ bn->hsorted = BAThordered(b); bn->tsorted = GDK_SORTED; return bn; bunins_failed: BBPreclaim(bn); return NULL;}BAT *BATgroup(BAT *b, int start, int incr, int grpsize){/* 64bit: this should probably use wrd instead of int */ BUN p, q, r; int ngroups = 1, i = 0, xx, yy; BAT *bn; BATcheck(b, "BATgroup"); bn = BATnew(b->htype, TYPE_int, BATcount(b)); if (bn == NULL) return NULL; r = BUNfirst(bn); yy = BUNsize(bn); ALIGNsetH(bn, b); BATloopFast(b, p, q, xx) { bunfastins_nocheck(bn, r, BUNhead(b, p), (ptr) &start, yy); r += yy; if (i == grpsize - 1) { start += incr; i = 0; ngroups++; } else { i++; } } if (i == 0) ngroups--; BATsetprop_wrd(bn, GDK_AGGR_CARD, ngroups); bn->hsorted = BAThordered(b); bn->tsorted = GDK_SORTED; return bn; bunins_failed: BBPreclaim(bn); return NULL;}BAT *BATconst(BAT *b, int tailtype, ptr v){ BAT *bn; BUN p, q, r; int xx, yy; BATcheck(b, "BATconst"); if (BATrestricted(b) == BAT_READ && tailtype == TYPE_void) { oid seqbase = (tailtype == TYPE_void) ? oid_nil : *(oid *) v; bn = VIEWhead(b); BATseqbase(BATmirror(bn), seqbase); return bn; } if (tailtype == TYPE_bat) { v = &((BAT *) v)->batCacheid; } bn = BATnew(b->htype, tailtype, BATcount(b)); if (bn == NULL) { return bn; } r = BUNfirst(bn); yy = BUNsize(bn); BATloopFast(b, p, q, xx) { bunfastins_nocheck(bn, r, BUNhead(b, p), v, yy); r += yy; } ALIGNsetH(bn, b); bn->tsorted = GDK_SORTED; BATsetprop_wrd(bn, GDK_AGGR_CARD, (wrd) (BATcount(b) > 0)); if (tailtype == TYPE_bit) { BATsetprop_wrd(bn, GDK_AGGR_SIZE, (*(bit *) v == TRUE) ? BATcount(b) : 0); } return bn; bunins_failed: BBPreclaim(bn); return NULL;}@}@+ BAT AggregatesWe retain the size() and card() aggregate results in the column descriptor.We would like to have such functionality in an extensible way for many aggregates,for DD (1) we do not want to change the binary BAT format on disk and (2) aggrand size are the most relevant aggregates.It is all hacked into the aggr[3] records; three adjacent integers thatwere left over in the column record. We refer to these as if it where an int aggr[3] array.The below routines set and retrieve the aggregate values from the tail of the BAT, as manyaggregate-manipulating BAT functions work on tail.The rules are as follows: aggr[0] contains the alignment ID of the column (if set i.e. nonzero).Hence, if this value is nonzero and equal to b->talign, the precomputed aggregate values inaggr[GDK_AGGR_SIZE] and aggr[GDK_AGGR_CARD] hold. However, only one of them may be setat the time. This is encoded by the value int_nil, which cannot occur in these two aggregates.This was now extended to record the property whether we know there is a nil value presentby mis-using the highest bits of both GDK_AGGR_SIZE and GDK_AGGR_CARD.@{@c#define GDK_NIL_BIT 0x80000000 /* (1 << 31) */voidPROPdestroy(PROPrec *p){ PROPrec *n; while(p){ n = p->next; if (p->v.vtype == TYPE_str) GDKfree(p->v.val.sval); GDKfree(p); p = n; }}static PROPrec *BATgetprop(BAT *b, int idx){ PROPrec *p = b->T->props; while(p) { if (p->id == idx) return p; p = p -> next; } return NULL;}bitBATgetprop_bit(BAT *b, int idx){ PROPrec *p = BATgetprop(b, idx); return (p)?p->v.val.cval[0]:bit_nil;}intBATgetprop_int(BAT *b, int idx){ PROPrec *p = BATgetprop(b, idx); return (p)?p->v.val.ival:int_nil;}wrdBATgetprop_wrd(BAT *b, int idx){ PROPrec *p = BATgetprop(b, idx); return (p)?p->v.val.wval:wrd_nil;}const char *BATgetprop_str(BAT *b, int idx){ PROPrec *p = BATgetprop(b, idx); return (p)?p->v.val.pval:str_nil;}static void BATsetprop( BAT *b, int idx, int type, void *v){ ValRecord vr; PROPrec *p = BATgetprop(b, idx); if (!p) { p = (PROPrec*)GDKmalloc(sizeof(PROPrec)); p->id = idx; p->next = b->T->props; b->T->props = p; } VALset(&vr, type, v); VALcopy(&p->v, &vr); b->batDirtydesc = TRUE;}voidBATsetprop_bit(BAT *b, int idx, bit val){ BATsetprop(b, idx, TYPE_bit, &val);}voidBATsetprop_int(BAT *b, int idx, int val){ BATsetprop(b, idx, TYPE_int, &val);}voidBATsetprop_wrd(BAT *b, int idx, wrd val){ BATsetprop(b, idx, TYPE_wrd, &val);}voidBATsetprop_str(BAT *b, int idx, str val){ BATsetprop(b, idx, TYPE_str, val);}voidBATpropagate(BAT *dst, BAT *src, int idx) { PROPrec *p = BATgetprop(src, idx); if (p) BATsetprop(dst, idx, p->v.vtype, VALget(&p->v));}@}@-The @%BAThistogram@ function calculates the frequency distribution of thetail values in its operand bat. Notice, that updates on the result donot affect the delta administration.Construction of a histogram over a string (or complex object)can be sped up using the reference information in the BUNand bulk copying the heap.There are separate versions for each type, and for each ahash- and a merge-algorithms.@{@= histoloop_inner if (b->tkey) { yy=1; BATloopFast(b, p, q, xx) bunfastins(bn, BUNt@1(b,p), &yy); } else if (!(BATtordered(b)&1)) { BATloopFast(b, p, q, xx) { ptr v = (ptr) BUNt@1(b,p); if (BATprepareHash(bn)) goto bunins_failed; HASHloop@2(bn, bn->hhash, tt, v, r) goto found@3@2; if (BUNins(bn, v, &yy, FALSE) == NULL) goto bunins_failed; r = BUNlast(bn) - BUNsize(bn);found@3@2: (*(int*) BUNtloc(bn,r))++; } HASHdestroy(bn); } else if (BATcount(b)) { ptr prev = (ptr) BUNt@1(b, BUNfirst(b)); BATloopFast(b, p, q, xx) { ptr v = (ptr) BUNt@1(b,p); if (@3_CMP(v, prev, @4) == 0) { yy++; } else { bunfastins(bn, prev, &yy); yy = 1; } prev = v; } bunfastins(bn, prev, &yy); }@= histoloop{ hash_t tt; int xx, yy = 0; BUN p, q, r; switch(ATOMstorage(tt=bn->htype)) {#ifndef NOEXPAND_CHR case TYPE_chr: @:histoloop_inner(loc,_chr,simple,chr)@ break;#endif#ifndef NOEXPAND_BTE case TYPE_bte: @:histoloop_inner(loc,_bte,simple,bte)@ break;#endif#ifndef NOEXPAND_SHT case TYPE_sht: @:histoloop_inner(loc,_sht,simple,sht)@ break;#endif#if !defined(NOEXPAND_INT) || !defined(NOEXPAND_FLT) case TYPE_int: case TYPE_flt: @:histoloop_inner(loc,_int,simple,int)@ break;#endif#if !defined(NOEXPAND_LNG) || !defined(NOEXPAND_DBL) case TYPE_lng: case TYPE_dbl: @:histoloop_inner(loc,_lng,simple,lng)@ break;#endif default: if (bn->hvarsized) { @:histoloop_inner(var,var,atom,tt)@ } else { @:histoloop_inner(loc,loc,atom,tt)@ } break; }}@cBAT *BAThistogram(BAT *b){ BAT *bn; int tricky = 0; BATcheck(b, "BAThistogram"); tricky = (b->ttype == TYPE_str && strElimDoubles(b->theap)); if (b->talign == 0) { b->talign = OIDnew(1); } bn = BATnew(tricky ? TYPE_var : b->ttype, TYPE_int, 200); if (bn == NULL) { return bn; } @:histoloop()@@-And now correct the interpretation of the values encounteredby bulk copying the heap as well@c if (tricky) { BAT *bm; bn->hheap = (Heap*)GDKzalloc(sizeof(Heap)); if (bn->hheap && b->theap->filename) { char *nme = BBP_physical(bn->batCacheid); bn->hheap->filename = (str) GDKmalloc(strlen(nme) + 12); GDKfilepath(bn->hheap->filename, NULL, nme, "hheap"); } if (HEAPcopy(bn->hheap, b->theap) < 0) { bunins_failed: BBPreclaim(bn); return NULL; } bm = BATmirror(bn); bm->ttype = bn->htype = b->ttype; bm->tvarsized = bn->hvarsized = 1; } bn->hsorted = (BATcount(bn) < 2 ? GDK_SORTED : BATtordered(b)); bn->tsorted = (BATcount(bn) < 2 ? GDK_SORTED : FALSE); bn->halign = NOID_AGGR(b->talign); if (BATcount(bn) == BATcount(b)) { BAT *bm = BATmirror(b); ALIGNsetH(bn, bm); } BATkey(bn, TRUE); BATkey(BATmirror(bn), BATcount(bn) < 2); if (b->ttype == TYPE_bit) { bit trueval = TRUE; BUN p = BUNfnd(bn, &trueval); BATsetprop_wrd(b, GDK_AGGR_SIZE, p ? *(int *) BUNtloc(bn, p) : 0); } BATsetprop_wrd(b, GDK_AGGR_CARD, BATcount(bn)); return bn;}@-The @%BATcount_no_nil@ function counts all BUN in a BAT that have a non-nil tail value.@= cntloop BATloopFast(b, p, q, xx) { if (!@1_EQ(nil,BUNt@2(b, p),@3)) { cnt++; } }@csize_tBATcount_no_nil(BAT *b){ size_t cnt = (size_t) 0; BUN p, q; int xx; hash_t tt; ptr nil; BATcheck(b, "BATcnt"); tt = b->ttype; nil = ATOMnilptr(tt); switch (ATOMstorage(tt)) {#ifndef NOEXPAND_CHR case TYPE_chr: @:cntloop(simple,loc,chr)@ break;#endif#ifndef NOEXPAND_BTE case TYPE_bte: @:cntloop(simple,loc,bte)@ break;#endif#ifndef NOEXPAND_SHT case TYPE_sht: @:cntloop(simple,loc,sht)@ break;#endif#ifndef NOEXPAND_INT case TYPE_int: @:cntloop(simple,loc,int)@ break;#endif#ifndef NOEXPAND_FLT case TYPE_flt: @:cntloop(simple,loc,flt)@ break;#endif#ifndef NOEXPAND_LNG case TYPE_lng: @:cntloop(simple,loc,lng)@ break;#endif#ifndef NOEXPAND_DBL case TYPE_dbl: @:cntloop(simple,loc,dbl)@ break;#endif default: if (b->tvarsized) { @:cntloop(atom,var,tt)@ } else { @:cntloop(atom,loc,tt)@ } break; } return cnt;}@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -