📄 gdk_batop.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f gdk_batop@a M. L. Kersten, P. Boncz, S. Manegold@* Common BAT OperationsThis module contains the following BAT algebra operations:@itemize @item bulk updatesmulti-insert, multi-delete, multi-replace@item common aggregatesmin, max and histogram@item oid column manipulationsmark, number and split.@item bat selectionsselect, slice, sample, fragment and restrict.Note: non hash-/index-supported scanselects have been "outsourced"to gdk_scanselect.mx as the fully expanded code grows too large to be(conveniently) compiled in a single file.@item bat partitioninghash partition, range partitioning@end itemizeWe factor out all possible overhead by inlining code.This includes the macros BUNhead and BUNtail,which do a test to see whether the atom resides in the buns or in avariable storage heap. The updateloop(dstbat, srcbat, operation) macroinvokes operation(dstbat, BUNhead(srcbat), BUNtail(srcbat)) on all bunsof the srcbat, but testing only once where they reside.@{@c#include "monetdb_config.h"#include "gdk.h"#include "gdk_scanselect.h"@= updateloop{ BUN p1, p2; int xx; BATloopFast(@2, p1, p2, xx) { @3(@1, BUNhead(@2, p1), BUNtail(@2, p1)); }}@}@+ BAT insert/delete/replaceThe content of a BAT can be appended to (removed from) another using@%BATins@ (@%BATdel@).@{@c#define bunins(b,h,t) if (BUNins(b,h,t,force) == NULL) return NULL;BAT *BATins(BAT *b, BAT *n, bit force){ BAT *tmp = NULL, *res = NULL; ssize_t needed = 0; int fastpath = 0; int countonly = (b->htype == TYPE_void && b->ttype == TYPE_void); if (b == NULL || n == NULL || BATcount(n) == 0) { return b; } ALIGNins(b, "BATins", force); BATcompatible(b, n); if (b->htype != TYPE_void && (b->ttype == TYPE_void || (!b->hhash && b->thash && ATOMstorage(b->ttype) == TYPE_int))) { /* OIDDEPEND */ return BATmirror(BATins(BATmirror(b), BATmirror(n), force)); } if (b->htype == TYPE_void && b->hseqbase != oid_nil) { oid t = *(oid *) BUNhead(b, BUNlast(b) - BUNsize(b)); oid h = *(oid *) BUNhead(n, BUNfirst(n)); if (BATcount(b) == 0 && (BATcount(n) == 1 || BAThdense(n))) { b->hseqbase = h; BATmirror(b)->tseqbase = h; } else if ((t + 1) != h || !BAThdense(n)) { b = BATmaterialize(b, BATcount(b) + BATcount(n)); countonly=0; if (b == NULL) return NULL; } } if (b->ttype == TYPE_void && b->tseqbase != oid_nil) { oid t = *(oid *) BUNtail(b, BUNlast(b) - BUNsize(b)); oid h = *(oid *) BUNtail(n, BUNfirst(n)); if (BATcount(b) == 0 && (BATcount(n) == 1 || BATtdense(n))) { b->tseqbase = h; BATmirror(b)->hseqbase = h; } else if ((t + 1) != h || !BATtdense(n)) { b = BATmaterializet(b, BATcount(b) + BATcount(n)); countonly=0; if (b == NULL) return NULL; } } if (b->thash == NULL && b->batSet == 0 && (b->tkey & BOUND2BTRUE) == 0 && ((b->hkey & BOUND2BTRUE) == 0 || n->hkey) && (b->hhash == NULL || ATOMstorage(b->htype) == TYPE_int)) { if ((b->hkey & BOUND2BTRUE)) { tmp = n = BATkdiff(n, b); if (n == NULL) return NULL; } fastpath = 1; } needed = BATcount(n) - (((b)->batBuns->size - (b)->batBuns->free) / BUNsize(b)); if (needed > 0) { /* if needed exceeds a normal growth extend just with needed */ size_t ncap = BATcapacity(b) + needed; size_t grows = BATgrows(b); if (ncap > grows) grows = ncap; if (BATextend(b, grows) == NULL) fastpath = 0; } if (b->hhash == NULL && b->hkey & BOUND2BTRUE) { BAThash(b, BATcount(b) + BATcount(n)); } if (b->thash == NULL && b->tkey & BOUND2BTRUE) { BAThash(BATmirror(b), BATcount(b) + BATcount(n)); } b->batDirty = 1; if (fastpath) { BUN p, q, r = BUNlast(b); int xx, yy = BUNsize(b); if (BATcount(b) == 0) { ALIGNset(b, n); } else if (BATcount(n)) { BUN last = BUNlast(b) - BUNsize(b); size_t idx = BUNindex(b, BUNlast(b)); xx = ATOMcmp(b->htype, BUNhead(n, BUNfirst(n)), BUNhead(b, last)); if ((BAThordered(b) & 1) && ((BAThordered(n) & 1) == 0 || xx < 0)) { b->hsorted = FALSE; b->H->nosorted = idx; if (b->hdense & 1) { b->hdense = FALSE; b->H->nodense = idx; } } if ((BAThordered(b) == (bit)GDK_SORTED_REV) && ((BAThordered(n) != (bit)GDK_SORTED_REV) || xx > 0)) { b->hsorted = FALSE; b->H->nosorted_rev = idx; } if (b->hkey && (b->hkey & BOUND2BTRUE) == 0 && ((BAThordered(b) & 1) == 0 || n->hkey == 0 || xx == 0)) { /* StM: GDK_SORTED_REV ? */ BATkey(b, FALSE); } if (b->htype != TYPE_void && (b->hsorted & b->hdense & 1) && (BAThdense(n) == 0 || *(oid *) BUNhloc(b, last) != 1 + *(oid *) BUNhead(n, BUNfirst(n)))) { b->hdense = FALSE; b->H->nodense = idx; } xx = ATOMcmp(b->ttype, BUNtail(n, BUNfirst(n)), BUNtail(b, last)); if ((BATtordered(b) & 1) && ((BATtordered(n) & 1) == 0 || xx < 0)) { b->tsorted = FALSE; b->T->nosorted = idx; if (b->tdense & 1) { b->tdense = FALSE; b->T->nodense = idx; } } if ((BATtordered(b) == (bit)GDK_SORTED_REV) && ((BATtordered(n) != (bit)GDK_SORTED_REV) || xx > 0)) { b->tsorted = FALSE; b->T->nosorted_rev = idx; } if ((BATtordered(b) & 1) == 0 || n->tkey == 0 || xx == 0) { /* StM: GDK_SORTED_REV ? */ if (b->tkey) BATkey(BATmirror(b), FALSE); } if (b->ttype != TYPE_void && (b->tsorted & b->tdense & 1) && (BATtdense(n) == 0 || *(oid *) BUNtloc(b, last) != 1 + *(oid *) BUNtail(n, BUNfirst(n)))) { b->tdense = FALSE; b->T->nodense = idx; } } if (countonly) { b->batBuns->free += n->batCount; BATsetcount(b, b->batCount + n->batCount); } else if (b->htype == TYPE_void) { BATloopFast(n, p, q, xx) { bunfastins_nocheck(b, r, NULL, BUNtail(n, p), yy); r += yy; } } else if (b->hhash) { size_t i = BUNindex(b, BUNlast(b)); BATloopFast(n, p, q, xx) { ptr v = BUNhead(n, p); bunfastins_nocheck(b, r, v, BUNtail(n, p), yy); HASHins_int(b->hhash, (hash_t) i, v); r += yy; i++; } } else { BATloopFast(n, p, q, xx) { bunfastins_nocheck(b, r, BUNhead(n, p), BUNtail(n, p), yy); r += yy; } } } else { @:updateloop(b,n,bunins)@ } res = b; bunins_failed: if (tmp) BBPreclaim(tmp); return res;}BAT*BATappend(BAT *b, BAT *n, bit force){ ssize_t sz = BATcount(n); ssize_t needed = 0; int fastpath = 1; int countonly = (b->htype == TYPE_void && b->ttype == TYPE_void); if (b == NULL || n == NULL || sz == 0) { return b; } ALIGNapp(b, "BATappend", force); BATcompatible(b, n); b->batDirty = 1; needed = sz - (((b)->batBuns->size - (b)->batBuns->free) / BUNsize(b)); if (!countonly && needed > 0) { /* if needed exceeds a normal growth extend just with needed */ size_t ncap = BATcapacity(b) + needed; size_t grows = BATgrows(b); if (ncap > grows) grows = ncap; if (BATextend(b, grows) == NULL) fastpath = 0; } /* append two void,void bats */ if (b->ttype == TYPE_void && BATtdense(b)) { oid f = n->tseqbase; if (b->ttype != TYPE_void) f = *(oid*)BUNtloc(n, BUNfirst(n)); if (BATcount(b) == 0 && f != oid_nil) BATseqbase(BATmirror(b), f); if (BATtdense(n) && BATcount(b) + b->tseqbase == f) { if (b->htype != TYPE_void) { BUN r = BUNlast(b); int yy = BUNsize(b); oid m; f = MAXoid(b); f++; m = f + sz; for(; f<m; f++, r+= yy) { bunfastins_nocheck(b, r, (ptr)&f, NULL, yy); } } else { sz += BATcount(b); BATsetcount(b, sz); b->batBuns->free = sz*BUNsize(b); } return b; } /* we need to materialize the tail */ BATmaterializet(b, BATcount(b)+sz); } /* a hash is useless for void bats */ if (b->hhash) HASHremove(b); if (b->thash && (2* b->thash->mask) < (BATcount(b)+sz)) { HASHremove(BATmirror(b)); } if (!(b->thash == NULL && b->batSet == 0 && (b->tkey & BOUND2BTRUE) == 0 && (b->hhash == NULL || ATOMstorage(b->htype) == ATOMstorage(TYPE_oid)))) fastpath = 0; if (fastpath) { BUN p, q, r = BUNlast(b); int xx, yy = BUNsize(b); if (BATcount(b) == 0) { BAT *bm = BATmirror(b); ALIGNsetH(bm, BATmirror(n)); b->tseqbase = bm->hseqbase = n->tseqbase; if (n->tdense && n->ttype == TYPE_oid) { b->tseqbase = bm->hseqbase = *(oid*)BUNtail(n, BUNfirst(n)); } b->tdense = bm->hdense = n->tdense; b->T->nodense = bm->H->nodense = n->T->nodense; b->tkey = bm->hkey |= (n->tkey & TRUE); b->T->nokey[0] = bm->H->nokey[0] = n->T->nokey[0]; b->T->nokey[1] = bm->H->nokey[1] = n->T->nokey[1]; } else { BUN last = BUNlast(b) - BUNsize(b); size_t idx = BUNindex(b, BUNlast(b)); xx = ATOMcmp(b->ttype, BUNtail(n, BUNfirst(n)), BUNtail(b, last)); if ((BATtordered(b) & 1) && ((BATtordered(n) & 1) == 0 || xx < 0)) { b->tsorted = FALSE; b->T->nosorted = idx; if (b->tdense & 1) { b->tdense = FALSE; b->T->nodense = idx; } } if ((BATtordered(b) == (bit)GDK_SORTED_REV) && ((BATtordered(n) != (bit)GDK_SORTED_REV) || xx > 0)) { b->tsorted = FALSE; b->T->nosorted_rev = idx; } if ((BATtordered(b) & 1) == 0 || n->tkey == 0 || xx == 0) { /* StM: GDK_SORTED_REV ? */ if (b->tkey) BATkey(BATmirror(b), FALSE); } if (b->ttype != TYPE_void && (b->tsorted & b->tdense & 1) && (BATtdense(n) == 0 || *(oid *) BUNtloc(b, last) != 1 + *(oid *) BUNtail(n, BUNfirst(n)))) { b->tdense = FALSE; b->T->nodense = idx; } } if (b->htype == TYPE_void) { BATloopFast(n, p, q, xx) { bunfastins_nocheck(b, r, NULL, BUNtail(n, p), yy); r += yy; } } else { oid o = MAXoid(b); o++; BATloopFast(n, p, q, xx) { bunfastins_nocheck(b, r, &o, BUNtail(n, p), yy); o++; r += yy; } } } else { BUN p, q; int xx; size_t i = BUNindex(b, BUNlast(b)); BAT *bm = BBP_cache(-b->batCacheid); if (b->tkey & BOUND2BTRUE) { b->tdense = b->tsorted = 0; if (b->hseqbase != oid_nil) { oid h = b->hseqbase + i; BATloopFast(n, p, q, xx) { ptr t = BUNtail(n, p); if (!BUNfnd(bm, t)) { bunfastins(b ,&h, t); if (b->thash) { HASHins(bm, (hash_t) i, t); } h++; i++; } } } else { oid on = oid_nil; BATloopFast(n, p, q, xx) { ptr t = BUNtail(n, p); if (!BUNfnd(bm, t)) { bunfastins(b ,&on, t); if (b->thash) { HASHins(bm, (hash_t) i, t); } i++; } } BATkey(b, FALSE); b->hdense = b->hsorted = 0; } } else if (b->hseqbase != oid_nil) { oid h = b->hseqbase + BATcount(b); BATloopFast(n, p, q, xx) { ptr t = BUNtail(n, p); bunfastins(b ,&h, t); if (b->thash) { HASHins(bm, (hash_t) i, t); } i++; h++; } BATkey(BATmirror(b), FALSE); b->tdense = b->tsorted = 0; } else { oid on = oid_nil; BATloopFast(n, p, q, xx) { ptr t = BUNtail(n, p); bunfastins(b ,&on, t); if (b->thash) { HASHins(bm, (hash_t) i, t); } i++; } BATkey(b, FALSE); BATkey(BATmirror(b), FALSE); b->hdense = b->hsorted = 0; b->tdense = b->tsorted = 0; } } return b;bunins_failed: return NULL;}#define bundel(b,h,t) if (BUNdel(b,h,t,force) == NULL) return NULL;BAT *BATdel(BAT *b, BAT *n, bit force){ ERRORcheck(b == NULL, "set:BAT required\n"); ERRORcheck(n == NULL, "set:BAT required\n"); if (BATcount(n) == 0) { return b; } ALIGNdel(b, "BATdel", force); TYPEcheck(b->htype, n->htype); TYPEcheck(b->ttype, n->ttype); @:updateloop(b,n,bundel)@ return b;}#define bundelhead(b,h,t) if (BUNdelHead(b,h,force) == NULL) return NULL;BAT *BATdelHead(BAT *b, BAT *n, bit force){ ERRORcheck(b == NULL, "set:BAT required\n"); ERRORcheck(n == NULL, "set:BAT required\n"); if (BATcount(n) == 0) { return b; } ALIGNdel(b, "BATdelHead", force); TYPEcheck(b->htype, n->htype); @:updateloop(b,n,bundelhead)@ return b;}@-The last in this series is a BATreplace, which replaces all thebuns mentioned.@c#define bunreplace(b,h,t) if (BUNreplace(b,h,0) == NULL) return NULL;BAT *BATreplace(BAT *b, BAT *n, bit force){ if (b == NULL || n == NULL || BATcount(n) == 0) { return b; } BATcompatible(b, n);#define BUNreplace_force(a,b,c) BUNreplace(a,b,c,force) @:updateloop(b,n,BUNreplace_force)@ return b;}@}@+ BAT SelectionsThe BAT selectors are among the most heavily used operators.Their efficient implementation is therefore mandatory.The interface supports seven operations: @%BATslice@, @%BATselect@,@%BATfragment@, @%BATsample@, @%BATproject@, @%BATrestrict@.@- BAT sliceThis function returns a horizontal slice from a BAT. It optimizesexecution by avoiding to copy when the BAT is memory mapped (in thiscase, an independent submap is created) or else when it is read-only,then a VIEW bat is created as a result.If a new copy has to be created, this function takes care to preservevoid-columns (in this case, the seqbase has to be recomputed in the result).Note that the BATslice() is used indirectly as well as a specialcase for BATselect (range selection on sorted column), BATrangesplit(fragmentation on sorted column) and BATsemijoin (when two dense columnsare semijoined).NOTE new semantics, the selected range is excluding the high value.@{@cBAT *BATslice(BAT *b, size_t l, size_t h){ size_t low = l; int xx; BAT *bn; BATcheck(b, "BATslice"); if (h > BATcount(b)) h = BATcount(b); if (h < l) h = l; l += BUNindex(b, BUNfirst(b)); h += BUNindex(b, BUNfirst(b));@-If the source BAT is readonly, then we can obtain a VIEWthat just reuses the memory of the source.@c if (BATrestricted(b) == BAT_READ) { bn = VIEWcreate_(b, TRUE); bn->batBuns->base = BUNptr(b, l); bn->batFirst = bn->batDeleted = bn->batInserted = bn->batBuns->base; bn->batBuns->maxsize = bn->batBuns->size = bn->batBuns->free = BUNptr(b, h) - bn->batBuns->base; BATsetcount(bn, bn->batBuns->free/BUNsize(bn));@-We have to do it: create a new BAT and put everything into it.@c } else { BUN p = BUNptr(b, l); BUN q = BUNptr(b, h); bn = BATnew(b->htype, b->ttype, h - l); if (bn == NULL) { return bn; } if (b->htype != b->ttype || b->htype != TYPE_void) { for (xx = BUNsize(b); p < q; p += xx) { bunfastins(bn, BUNhead(b, p), BUNtail(b, p)); } } else { BATsetcount(bn, h-l); bn->batBuns->free = (h-l); } } bn->hsorted = BAThordered(b); bn->tsorted = BATtordered(b); BATkey(bn, BAThkey(b)); BATkey(BATmirror(bn), BATtkey(b)); if (BAThdense(b)) { bn->hdense = TRUE; BATseqbase(bn, b->hseqbase + low); } else if (bn->hkey && bn->htype == TYPE_oid) { if (BATcount(bn) == 0) { bn->hdense = TRUE; BATseqbase(bn, 0); } else if (* (oid *) BUNhloc(bn, BUNfirst(bn)) + BATcount(bn) - 1 == * (oid *) BUNhloc(bn, BUNlast(bn) - BUNsize(bn))) { bn->hdense = TRUE; BATseqbase(bn, * (oid *) BUNhloc(bn, BUNfirst(bn))); } } if (BATtdense(b)) { bn->tdense = TRUE; BATseqbase(BATmirror(bn), b->tseqbase + low); } else if (bn->tkey && bn->ttype == TYPE_oid) { if (BATcount(bn) == 0) { bn->tdense = TRUE; BATseqbase(BATmirror(bn), 0); } else if (* (oid *) BUNtloc(bn, BUNfirst(bn)) + BATcount(bn) - 1 == * (oid *) BUNtloc(bn, BUNlast(bn) - BUNsize(bn))) { bn->tdense = TRUE; BATseqbase(BATmirror(bn), * (oid *) BUNtloc(bn, BUNfirst(bn))); } } return bn; bunins_failed: BBPreclaim(bn); return NULL;}@- Value SelectionsThe string search is optimized for the degenerated casethat th = tl, and double elimination in the string heap.We allow value selections on the nil atom. This is formallynot correct, as in MIL (nil = nil) != true. However, we doneed an implementation for selecting nil (in MIL, this is donethrough is the "isnil" predicate). So we implement it here.@= valselect HASHloop@2(b, b->hhash, i, tl, p) { if (q < r) bunfastins_nocheck(bn, q, BUNt@1(b, p), tl, bs); q += bs; }@= stringselect if (strElimDoubles(b->hheap)) { size_t j; HASHloop_fstr(b, b->hhash, i, j, tl) { p = BUNptr(b, i); if (q < r)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -