📄 gdk_bat.mx
字号:
do { BUNdelete(b, p, force); } while ((p = BUNfnd(b, x)) != NULL); } return b;}@Deletion of strings leads to garbage on the variable stack.This can be removed by compaction of the BAT through copying it.@- BUN replaceThe last operation in this context is BUN replace. It assumes thatthe header denotes a key. The old value association is destroyed (if itexists in the first place) and the new value takes its place.In order to make updates on void columns workable; replaces on them are always done in-place. Performing them without bun-movements greatly simplifies the problem. The 'downside' is that when transactionmanagement has to be performed, replaced values should be saved explicitly.@= uncommit_replace @:tacc_update(del,t@1,p,pit)@ ATOMreplace(b->ttype, b->theap, BUNtloc(b,p), t); @:tacc_update(ins,t@1,p,pit)@ if (BATtordered(b)&1 || BATtordered(b)==(bit)GDK_SORTED_REV) { int bs = BUNsize(b), tt = b->ttype; BUN prv = p - bs; BUN nxt = p + bs; if (prv < b->batFirst) prv = NULL; if (nxt > last) nxt = NULL; if (BATtordered(b)&1) { if ((prv && ATOMcmp(tt, t, BUNt@1(b,prv)) < 0) || (nxt && ATOMcmp(tt, t, BUNt@1(b,nxt)) > 0)) { b->tsorted = FALSE; b->T->nosorted = pit; } else if (b->ttype != TYPE_void && b->tdense) { if (((prv && 1 + *(oid*) BUNtloc(b,prv) != *(oid*) t) || (nxt && *(oid*) BUNtloc(b,nxt) != 1 + *(oid*) t))) { b->tdense = FALSE; b->T->nodense = pit; } else if (!prv && !nxt) { bm->hseqbase = b->tseqbase = *(oid*)t; } } } else { if ((prv && ATOMcmp(tt, t, BUNt@1(b,prv)) > 0) || (nxt && ATOMcmp(tt, t, BUNt@1(b,nxt)) < 0)) { b->tsorted = FALSE; b->T->nosorted_rev = pit; } } }@cBAT *BUNinplace(BAT *b, BUN p, ptr h, ptr t, bit force){ if (p >= b->batInserted || force) { /* uncommitted BUN elements */ BUN last = BUNlast(b) - BUNsize(b); BAT *bm = BBP_cache(-b->batCacheid); size_t pit = BUNindex(b, p); ALIGNinp(b, "BUNreplace", force); /* zap alignment info */ if (b->tvarsized) { size_t tsize = b->theap->size; @:uncommit_replace(var)@ if (b->thash && tsize != b->theap->size) HEAPwarm(b->theap); } else { @:uncommit_replace(loc)@ } if (((b->ttype != TYPE_void) & b->tkey & !(b->tkey & BOUND2BTRUE)) && BATcount(b) > 1) { BATkey(bm, FALSE); } b->batDirtybuns = b->theapdirty = TRUE; } else { /* committed BUN */ BUNdelete(b, p, force); if (BUNins(b, h, t, force) == NULL) { bunins_failed: return NULL; } } return b;}BAT *BUNreplace(BAT *b, ptr h, ptr t, bit force){ BUN p; BATcheck(b, "BUNreplace\n"); BATcheck(h, "BUNreplace: head value is nil\n"); BATcheck(t, "BUNreplace: tail value is nil\n"); if (!(p = BUNfnd(b, h))) return b; if ((b->tkey & BOUND2BTRUE) && BUNfnd(BATmirror(b), t)) { return b; } if (b->ttype == TYPE_void) { size_t i; /* no need to materialize if value doesn't change */ if (b->tseqbase == oid_nil || * (oid *) BUNtpos(b, p) == * (oid *) t) return b; i = BUNindex(b, p); b = BATmaterializet(b, BATcount(b)); if (b == NULL) return NULL; p = BUNptr(b, i); } return BUNinplace(b, p, h, t, force);}intvoid_inplace(BAT *b, oid id, ptr val, bit force){ int res = GDK_SUCCEED; BUN p = NULL; BUN oldInserted = b->batInserted; assert(b->htype == TYPE_void); assert(b->hseqbase != oid_nil); assert(BATcount(b) > (id -b->hseqbase)); b->batInserted = NULL; BUNfndVOID(p, b, (ptr) &id); assert(p >= b->batInserted); /* we don't want delete/ins */ assert(force || !b->batRestricted); if (!BUNinplace(b, p, (ptr) &id, val, force)) res = GDK_FAIL; b->batInserted = oldInserted; return res;}ssize_tvoid_replace_bat(BAT *b, BAT *u, bit force){ size_t nr = 0; BUN r, s; BATloop(u, r, s) { oid updid = *(oid *) BUNhead(u, r); ptr val = BUNtail(u, r); if (void_inplace(b, updid, val, force) == GDK_FAIL) return -1; nr++; } return nr;}@- BUN LookupLocation of a BUN using a value should use the available indexesto speed up access. If indexes are lacking then a hash indexis constructed under the assumption that 1) multiple access to the BAT can be expected and 2) building the hash is only slightly more expensivethan the full linear scan.NULL is returned if no such element could be found.In those cases where the type is known and a hash index is available,one should use the inline functions to speed-up processing.@cBUNBUNfnd(BAT *b, ptr v){ BUN r; BATcheck(b, "BUNfnd"); if (BAThvoid(b)) { BUNfndVOID(r, b, v); return r; } if (!b->hhash) { if (BAThordered(b) & 1) return (BUN) SORTfnd(b, v); } switch (ATOMstorage(b->htype)) { case TYPE_chr: HASHfnd_chr(r, b, v); break; case TYPE_bte: HASHfnd_bte(r, b, v); break; case TYPE_sht: HASHfnd_sht(r, b, v); break; case TYPE_int: case TYPE_flt: HASHfnd_int(r, b, v); break; case TYPE_dbl: case TYPE_lng: HASHfnd_lng(r, b, v); break; case TYPE_str: HASHfnd_str(r, b, v); break; default: HASHfnd(r, b, v); } return r;}@= swapif if (@1) { int (*_cmp) (ptr, ptr); ptr _p; _cmp = hcmp; hcmp = tcmp; tcmp = _cmp; _p = x; x = y; y = _p; b = BATmirror(b); if (v) v = BATmirror(v); } @= dohash ATOMstorage(@1->@2type) != TYPE_chr && (ATOMstorage(@1->@2type) != TYPE_str || !GDK_ELIMDOUBLES(@1->@2heap)) @cBUNBUNlocate(BAT *b, ptr x, ptr y){ int (*hcmp) (ptr, ptr) = BATatoms[b->htype].atomCmp; int (*tcmp) (ptr, ptr) = BATatoms[b->ttype].atomCmp; int htpe, ttpe, hint = 0, tint = 0, hlng = 0, tlng = 0, xx; var_t hidx, tidx; BUN p, q; BAT *v = NULL; BATcheck(b, "BUNlocate: BAT parameter"); BATcheck(x, "BUNlocate: value parameter"); p = BUNfirst(b); q = BUNlast(b); xx = BUNsize(b); if (p == q) return NULL; /* empty bat */ /* sometimes BUNlocate is just about a single column */ @:swapif(y && ((BAThordered(b)&1) && (*hcmp)(x,BUNhead(b,p)) == 0 && (*hcmp)(x,BUNhead(b,q-xx)) == 0))@ if (y == NULL || ((BATtordered(b)&1) && (*tcmp)(y,BUNtail(b,p)) == 0 && (*tcmp)(y,BUNtail(b,q-xx)) == 0)) { return BUNfnd(b, x); } /* positional lookup is always the best choice */ @:swapif(BATtdense(b))@ if (BAThdense(b)) { oid i = *(oid *) x - b->hseqbase; if ((size_t) i < BATcount(b)) { i += BUNindex(b, BUNfirst(b)); p = BUNptr(b, i); if ((*tcmp) (y, BUNtail(b, p)) == 0) return p; } return NULL; } /* next, try to restrict the range using sorted columns */ if (BATtordered(b) & 1) { p = SORTfndfirst(b,y); q = SORTfndlast(b,y); } if (BAThordered(b) & 1) { BUN mp = SORTfndfirst(BATmirror(b),x); BUN mq = SORTfndlast(BATmirror(b),x); if (mp > p) p = mp; if (mq < p) q = mq; } if (p >= q) return NULL; /* value combination cannot occur */ /* if the range is still larger than 32 BUNs, consider investing in a hash table */ if ((q-p) > (xx<<5)) { /* regrettably MonetDB support only single-column hashes * strategy: create a hash on both columns, and select the column with the best distribution */ @:swapif((b->thash && b->hhash == NULL) || !(@:dohash(b,h)@))@ if (b->hhash == NULL && (v = VIEWcreate_(b, TRUE)) != NULL) { /* As we are going to remove the worst hash table later, we must do everything * in a view, as it is not permitted to remove a hash table from a read-only * operation (like BUNlocate). Other threads might then crash. */ if (@:dohash(v,h)@) (void) BATprepareHash(v); if (@:dohash(v,t)@) (void) BATprepareHash(BATmirror(v)); if (v->hhash && v->thash) { /* we can choose between two hash tables */ size_t hcnt = 0, tcnt = 0, i; for(i=0; i<=v->hhash->mask; i++) hcnt += (v->hhash->hash[i] != HASH_MAX); for(i=0; i<=v->thash->mask; i++) tcnt += (v->thash->hash[i] != HASH_MAX); @:swapif(hcnt < tcnt)@ /* remove the least selective hash table */ HASHremove(BATmirror(v)); } @:swapif(v->hhash == NULL)@ if (v->hhash) { gdk_set_lock(GDKhashLock[ABS(b->batCacheid) & BBPLOCKMASK], "BUNlocate"); if (b->hhash == NULL) { /* give it to the parent */ b->hhash = BATmirror(b)->thash = v->hhash; } gdk_unset_lock(GDKhashLock[ABS(b->batCacheid) & BBPLOCKMASK], "BUNlocate"); } BBPreclaim(v); v = NULL; } } /* exploit string double elimination, when present */ htpe = ATOMstorage(b->htype); ttpe = ATOMstorage(b->ttype); if (htpe == TYPE_str && GDK_ELIMDOUBLES(b->hheap)) { hidx = strLocate(b->hheap, x); if (hidx == 0) return NULL; /* x does not occur */ if (b->hhash == NULL) { htpe = TYPE_oid; x = &hidx; } } if (ttpe == TYPE_str && GDK_ELIMDOUBLES(b->theap)) { tidx = strLocate(b->theap, y); if (tidx == 0) return NULL; /* y does not occur */ ttpe = TYPE_oid; y = &tidx; } /* type analysis. For equi-lookup {flt,dbl,wrd,oid} can all be treated as either int or lng */ if (!ATOMvarsized(htpe)) { hint = (ATOMsize(htpe) == sizeof(int)); hlng = (ATOMsize(htpe) == sizeof(lng)); } if (!ATOMvarsized(ttpe)) { tint = (ATOMsize(ttpe) == sizeof(int)); tlng = (ATOMsize(ttpe) == sizeof(lng)); } /* hashloop over head values, check tail values */ if (b->hhash) { hash_t h; if (hint && tint) { HASHloop_int(b, b->hhash, h, x, p) if (*(int*) y == *(int*) BUNtloc(b, p)) return p; } else if (hint && tlng) { HASHloop_int(b, b->hhash, h, x, p) if (*(lng*) y == *(lng*) BUNtloc(b, p)) return p; } else if (hlng && tint) { HASHloop_lng(b, b->hhash, h, x, p) if (*(int*) y == *(int*) BUNtloc(b, p)) return p; } else if (hlng && tlng) { HASHloop_lng(b, b->hhash, h, x, p) if (*(lng*) y == *(lng*) BUNtloc(b, p)) return p; } else { HASHloop(b, b->hhash, h, x) if ((*tcmp) (y, BUNtail(b, p = BUNptr(b,h))) == 0) return p; } return NULL; } /* linear check; we get here for small ranges, [chr,chr] bats, and hash alloc failure */ if (ATOMstorage(b->htype) == TYPE_chr && ATOMstorage(b->ttype) == TYPE_chr) { for(;p < q; p+=xx) if (*(chr*) BUNhloc(b, p) == *(chr*) x && *(chr*) BUNtloc(b, p) == *(chr*) y) return p; } else if (hint && tint) { for(;p < q; p+=xx) if (*(int*) BUNhloc(b, p) == *(int*) x && *(int*) BUNtloc(b, p) == *(int*) y) return p; } else if (hint && tlng) { for(;p < q; p+=xx) if (*(int*) BUNhloc(b, p) == *(int*) x && *(lng*) BUNtloc(b, p) == *(lng*) y) return p; } else if (hlng && tint) { for(;p < q; p+=xx) if (*(lng*) BUNhloc(b, p) == *(lng*) x && *(int*) BUNtloc(b, p) == *(int*) y) return p; } else if (hlng && tlng) { for(;p < q; p+=xx) if (*(lng*) BUNhloc(b, p) == *(lng*) x && *(lng*) BUNtloc(b, p) == *(lng*) y) return p; } else { for(;p < q; p+=xx) if ((*hcmp) (x, BUNhead(b, p)) == 0 && (*tcmp) (y, BUNtail(b, p)) == 0) return p; } return NULL;}@}@+ BAT Property ManagementThe function @%BATcount@ returns the number of active elements in a BAT.Counting is type independent.It can be implemented quickly, because the system ensures a denseBUN list.@{@csize_tBATcount(BAT *b){ BATcheck(b, "BATcount"); return b->batCount;}@-The alternative routine is @%BATbuncount@, which calculates thetotal buns in use.@csize_tBATbuncount(BAT *b){ size_t f; BATcheck(b, "BATbuncount"); f = b->batBuns->size - (BUNfirst(b) - b->batBuns->base); return f / BUNsize(b);}size_tBATvmsize(BAT *b, int dirty){ BATcheck(b, "BATvmsize"); if (b->batDirty || (b->batPersistence != TRANSIENT && !b->batCopiedtodisk)) dirty = 0; return ((dirty == 0 || b->batDirtybuns) ? HEAPvmsize(b->batBuns) : 0) + (((dirty == 0 || b->batDirtybuns) && b->hhash) ? HEAPvmsize(b->hhash->heap) : 0) + (((dirty == 0 || b->batDirtybuns) && b->thash) ? HEAPvmsize(b->thash->heap) : 0) + (((dirty == 0 || b->hheapdirty) && b->hheap) ? HEAPvmsize(b->hheap) : 0) + (((dirty == 0 || b->theapdirty) && b->theap) ? HEAPvmsize(b->theap) : 0);}size_tBATmemsize(BAT *b, int dirty){ BATcheck(b, "BATmemsize"); if (b->batDirty || (b->batPersistence != TRANSIENT && !b->batCopiedtodisk)) dirty = 0; return ((dirty == 0 || b->batDirtydesc) ? sizeof(BATstore) : 0) + ((dirty == 0 || b->batDirtybuns) ? HEAPmemsize(b->batBuns) : 0) + (((dirty == 0 || b->batDirtybuns) && b->hhash) ? HEAPmemsize(b->hhash->heap) : 0) + (((dirty == 0 || b->batDirtybuns) && b->thash) ? HEAPmemsize(b->thash->heap) : 0) + (((dirty == 0 || b->hheapdirty) && b->hheap) ? HEAPmemsize(b->hheap) : 0) + (((dirty == 0 || b->theapdirty) && b->theap) ? HEAPmemsize(b->theap) : 0);}@@}@-The key and name properties can be changed at any time.Keyed dimensions are automatically supported by an auxiliary hash-basedaccess structure to speed up searching. Turning off the key integrity property does not cause the index to disappear. It can still be used tospeed-up retrieval. The routine @%BATkey@ sets the key property of theassociation head. @{@cBAT *BATkey(BAT *b, int flag){ bat parent; BATcheck(b, "BATkey"); parent = VIEWparentcol(b); if (b->htype == TYPE_void) { if (b->hseqbase == oid_nil && flag == BOUND2BTRUE) { GDKerror("BATkey: nil-column cannot be kept unique.\n"); } if (b->hseqbase != oid_nil && flag == FALSE) { GDKerror("BATkey: dense column must be unique.\n"); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -