📄 gdk_bat.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f gdk_bat@a M. L. Kersten, P. Boncz@* BAT ModuleIn this Chapter we describe the BAT implementation in more detail.The routines mentioned are primarily meant to simplify the libraryimplementation.@+ BAT ConstructionBATs are implemented in several blocks of memory, prepared for diskstorage and easy shipment over a network.The BAT starts with a descriptor, which indicates the required BATlibrary version and the BAT administration details. In particular, itdescribes the binary relationship maintained and the location offields required for storage.The general layout of the BAT in this implementation is as follows.Each BAT comes with a heap for the loc-size buns and, optionally,with heaps to manage the variable-sized data items of bothdimensions. The buns are assumed to be stored as loc-sizeobjects. This is essentially an array of structs to store theassociations. The size is determined at BAT creation time using anupper bound on the number of elements to be accommodated. In case ofoverflow, its storage space is extended automatically.The capacity of a BAT places an upper limit on the number of BUNs tobe stored initially. The actual space set aside may be quite large.Moreover, the size is aligned to int boundaries to speedup access andavoid some machine limitations.Initialization of the variable parts rely on type specific routinescalled atomHeap.@{ @h#ifndef _GDK_BAT_H_#define _GDK_BAT_H_#include "gdk.h"gdk_export void BATinit_idents(BAT *bn);gdk_export ssize_t void_replace_bat(BAT *b, BAT *u, bit force);gdk_export int void_inplace(BAT *b, oid id, ptr val, bit force);extern int default_ident( char *s ); extern oid MAXoid(BAT *i);#endif /* _GDK_BAT_H_ */@c#include "monetdb_config.h"#include "gdk.h"#ifdef ALIGN#undef ALIGN#endif#define ALIGN(n,b) ((b)?(b)*(1+(((n)-1)/(b))):n)#define ATOMneedheap(tpe) (BATatoms[tpe].atomHeap != NULL)char *BATstring_h = "h";char *BATstring_t = "t";int default_ident( char *s){ return ((s) == BATstring_h || (s) == BATstring_t);}voidBATinit_idents(BAT * bn) { bn->hident = (char*)BATstring_h; bn->tident = (char*)BATstring_t;}BAT *BATcreatedesc(int ht, int tt, int heapnames){ BAT *bn;@-Alloc space for the BAT and its dependent records.@c BATstore *bs = (BATstore *) GDKzalloc(sizeof(BATstore));@-assert needed in the kernel to get symbol eprintf resolved.Else modules using assert fail to load.@c assert(ht >= 0 && tt >= 0); bn = &bs->B; bn->H = &bs->H; bn->T = &bs->T; bn->P = &bs->P; bn->U = &bs->U;@-Fill in basic column info@c bn->htype = ht; bn->ttype = tt; bn->hkey = FALSE; bn->tkey = FALSE; bn->hsorted = ((bit) ATOMlinear(ht) ? GDK_SORTED : FALSE); bn->tsorted = ((bit) ATOMlinear(tt) ? GDK_SORTED : FALSE); bn->GDKversion = GDKLIBRARY; bn->batBuns = &bn->U->buns; bn->hident = (char*)BATstring_h; bn->tident = (char*)BATstring_t; bn->halign = OIDnew(2); bn->talign = bn->halign + 1; bn->hseqbase = (ht == TYPE_void) ? oid_nil : 0; bn->tseqbase = (tt == TYPE_void) ? oid_nil : 0; bn->batPersistence = TRANSIENT; bn->H->props = bn->T->props = NULL; bn->void_tid = -1; bn->void_cnt = 0; bn->void_seq1 = oid_nil; bn->void_seq2 = oid_nil;@-add to BBP@c BBPinsert(bn);@-fill in heap names, so HEAPallocs can resort to disk for very large writes.@c bn->batBuns->filename = NULL; bn->batMapbuns = STORE_UNSET; if (heapnames) { str nme = BBP_physical(bn->batCacheid); bn->batBuns->filename = (str) GDKmalloc(strlen(nme) + 12); GDKfilepath(bn->batBuns->filename, NULL, nme, "buns"); if (ATOMneedheap(ht)) { bn->hheap = (Heap*)GDKzalloc(sizeof(Heap)); bn->hheap->filename = (str) GDKmalloc(strlen(nme) + 12); GDKfilepath(bn->hheap->filename, NULL, nme, "hheap"); bn->batMaphheap = STORE_UNSET; } if (ATOMneedheap(tt)) { bn->theap = (Heap*)GDKzalloc(sizeof(Heap)); bn->theap->filename = (str) GDKmalloc(strlen(nme) + 12); GDKfilepath(bn->theap->filename, NULL, nme, "theap"); bn->batMaptheap = STORE_UNSET; } } bn->batDirty = TRUE; return bn;}intBATelmshift(BAT *b){ int sh, i = BUNsize(b) >> 1; for (sh = 0; i != 0; sh++) { i >>= 1; } if (BUNsize(b) != ((size_t) 1 << sh)) { return -1; } return sh;}voidBATsetdims(BAT *b){ if (ATOMalign(b->htype) >= ATOMalign(b->ttype)) { b->tloc = ALIGN(ATOMsize(b->htype), ATOMalign(b->htype)); b->hloc = 0; b->dims.bunwidth = b->tloc + ATOMsize(b->ttype); if (b->ttype == TYPE_void) b->tloc = 0; } else { b->hloc = ALIGN(ATOMsize(b->ttype), ATOMalign(b->htype)); b->tloc = 0; b->dims.bunwidth = b->hloc + ATOMsize(b->htype); if (b->htype == TYPE_void) b->hloc = 0; } if (b->dims.bunwidth == 0) b->dims.bunwidth = 1; b->dims.bunwidth = ALIGN(b->dims.bunwidth, ATOMalign(b->htype)); b->dims.bunwidth = ALIGN(b->dims.bunwidth, ATOMalign(b->ttype)); b->dims.bunshift = BATelmshift(b); b->hvarsized = BATatoms[b->htype].varsized; b->tvarsized = BATatoms[b->ttype].varsized;}@- BAT allocation Allocate BUN heap and variable-size atomheaps (see e.g. strHeap).We now initialize new BATs with their heapname such that the modifiedHEAPalloc/HEAPextend primitives can possibly use memory mapped filesas temporary heap storage.In case of huge bats, we want HEAPalloc to write a file to disk, and memory mapit. To make this possible, we must provide it with filenames.@cBAT *BATnewstorage(int ht, int tt, size_t cap){ BAT *bn, *recycled; int vv = (!ht && !tt); bn = recycled = BBPrecycle(ht,tt,cap); if (!bn) bn = BATcreatedesc(ht, tt, (ht||tt)); if (bn == NULL) return NULL; if (!recycled) { BATsetdims(bn); /* alloc the main heaps */ if ((ht||tt) && HEAPalloc(bn->batBuns, cap, bn->dims.bunwidth) < 0) { return NULL; } if (ATOMheap(ht, bn->hheap, cap) < 0) { HEAPfree(bn->batBuns); GDKfree(bn->hheap); if (bn->theap) GDKfree(bn->theap); return NULL; } if (ATOMheap(tt, bn->theap, cap) < 0) { HEAPfree(bn->batBuns); if (bn->hheap) { HEAPfree(bn->hheap); GDKfree(bn->hheap); } GDKfree(bn->theap); return NULL; } DELTAinit(bn); BBPcacheit(bn); } if (vv) { bn->batBuns->base = (char*)1; DELTAinit(bn); } return bn;}BAT *BATnew(int ht, int tt, size_t cap){ ERRORcheck((ht < 0) || (ht > GDKatomcnt), "BATnew:ht error\n"); ERRORcheck((tt < 0) || (tt > GDKatomcnt), "BATnew:tt error\n"); /* determine the minimum size. BATTINY bats will be cached */ cap = MAX(BATTINY, cap); if (ht == TYPE_void && tt == TYPE_void) cap = 1; return BATnewstorage(ht, tt, cap);}@-The routine BATclone creates a bat with the same types as b.@cBAT *BATclone(BAT *b, size_t cap){ BAT *c = BATnew(b->htype, b->ttype, cap); if (c->htype == TYPE_void && b->hseqbase != oid_nil) BATseqbase(c, b->hseqbase); if (c->ttype == TYPE_void && b->tseqbase != oid_nil) BATseqbase(BATmirror(c), b->tseqbase); return c;}@- If the BAT runs out of storage for BUNS it will reallocate space.For memory mapped BATs we simple extend the administration afterhaving an assurance that the BAT still can be safely stored away.@-Most BAT operations use a BAT to assemble the result. In several casesit is rather difficult to give a precise estimate of the required space.The routine @%BATguess@ is used internally for this purpose.It balances the cost of small BATs with their probability of occurrence.Small results BATs are more likely then 100M BATs.Likewise, the routine @%BATgrows@ provides a heuristic to enlarge the space.@csize_tBATguess(BAT *b){ size_t newcap; BATcheck(b, "BATguess"); newcap = BATcount(b); if (newcap < 10 * BATTINY) return newcap; if (newcap < 50 * BATTINY) return newcap / 2; if (newcap < 100 * BATTINY) return newcap / 10; return newcap / 100;}size_tBATgrows(BAT *b){ size_t oldcap, newcap; BATcheck(b, "BATgrows"); newcap = oldcap = BATcapacity(b); if (newcap < BATTINY) newcap = 2 * BATTINY; else if (newcap < 10 * BATTINY) newcap = 4 * newcap; else if (newcap < 50 * BATTINY) newcap = 2 * newcap; else newcap = (size_t) ((double) newcap * BATMARGIN); if (newcap == oldcap) newcap += 10; /* if we can extend in reserved space, do not miss the opportunity */ if (b->batBuns->size + BUNsize(b) <= b->batBuns->maxsize) { size_t rescap = oldcap + (b->batBuns->maxsize - b->batBuns->size) / BUNsize(b); newcap = MIN(newcap, rescap); } return newcap;}@-The routine should ensure that the BAT keeps its locationin the BAT buffer.Overflow in the other heaps are dealt with in the atom routines.Here we merely copy their references into the new administration space.@cBAT *BATextend(BAT *b, size_t newcap){ size_t heap_size; int ret; assert(b->htype || b->ttype); BATcheck(b, "BATextend");@- The main issue is to properly predict the new BAT size.storage overflow. The assumption taken is that capacityoverflow is rare. It is changed only when the positionof the next available BUN surpasses the free area marker.Be aware that the newcap should be greater than the oldvalue, otherwise you may easily corrupt the administration ofmalloc.@c if (newcap <= BATbuncount(b)) { return b; } heap_size = BUNsize(b) * newcap; DELTAsave(b); ret = HEAPextend(b->batBuns, heap_size); DELTAload(b); if (ret < 0) return NULL; HASHdestroy(b); return b;}@} @+ BAT destruction@-BATclear quickly removes all elements from a BAT. It must respect thetransaction rules; so stable elements must be moved to the "deleted" section of the BAT (they cannot be fully deleted yet). For the elementsthat really disappear, we must free heapspace and unfix the atoms ifthey have fix/unfix handles. As an optimization, in the case of no stableelements, we quickly empty the heaps by copying a standard small empty image over them.@c@{BAT *BATclear(BAT *b){ int bs; BUN p, q; int voidbat; BAT *bm; BATcheck(b, "BATclear"); bs = BUNsize(b); voidbat = 0; bm = BATmirror(b); if (BAThdense(b) && b->htype == TYPE_void) { voidbat = 1; } if (BATtdense(b) && b->ttype == TYPE_void) { voidbat = 1; } /* small BAT: delete all elements by hand */ if (!voidbat && BATcount(b) < 20) { BATloopDEL(b, p, q, bs) { p = BUNdelete(b, p, FALSE); } return b; } /* kill all search accelerators */ if (b->hhash) { HASHremove(b); } if (b->thash) { HASHremove(bm); } /* we must dispose of all inserted atoms */ if (b->batDeleted == b->batInserted && BATatoms[b->htype].atomDel == NULL && BATatoms[b->ttype].atomDel == NULL) { Heap hh, th; /* no stable elements: we do a quick heap clean */ /* need to clean heap which keep data even though the BUNs got removed. This means reinitialize when free > 0 */ size_t cap = 0; hh.filename = th.filename = NULL; if (b->hheap && b->hheap->free > 0) { if (ATOMheap(b->htype, &hh, cap) < 0) return NULL; } if (b->theap && b->theap->free > 0 && ATOMheap(b->ttype, &th, cap) < 0) { if (b->hheap && b->hheap->free > 0) HEAPfree(&hh); return NULL; } if (b->hheap && b->hheap->free > 0) { HEAPfree(b->hheap); *b->hheap = hh; } if (b->theap && b->theap->free > 0) { HEAPfree(b->theap); *b->theap = th; } } else { /* do heap-delete of all inserted atoms */ void (*hatmdel)(Heap*,var_t*) = BATatoms[b->htype].atomDel; void (*tatmdel)(Heap*,var_t*) = BATatoms[b->ttype].atomDel; if (hatmdel || tatmdel) { for(p = b->batInserted, q = BUNlast(b); p < q; p += bs) { if (hatmdel)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -