📄 gdk_search.c
字号:
#line 260 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#include "monetdb_config.h"#include "gdk.h"hash_tHASHmask(size_t cnt){ size_t m = 8; /* minumum size */ while (m + m < cnt) m += m; if (m + m - cnt < 2 * (cnt - m)) m += m; return m;}static voidHASHclear(Hash *h){ hash_t *i, *j; for (i = h->hash, j = i + h->mask; i <= j; i++) { *i = HASH_MAX; }}Hash *HASHnew(Heap *hp, int tpe, size_t size, hash_t mask){ Hash *h = NULL; if (HEAPalloc(hp, mask + size, sizeof(hash_t)) < 0) return NULL; h = (Hash*)GDKmalloc(sizeof(Hash)); if (!h) return h; h->lim = size; h->mask = mask - 1; h->link = (hash_t *) hp->base; h->hash = h->link + h->lim; h->type = tpe; h->heap = hp; HASHclear(h); /* zero the mask */ return h;}#line 330 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"BAT *BAThash(BAT *b, size_t masksize){ gdk_set_lock(GDKhashLock[ABS(b->batCacheid) & BBPLOCKMASK], "BAThash"); if (b->hhash == NULL) { unsigned int xx, tpe = ATOMstorage(b->htype); size_t i, cnt = BATcount(b); hash_t mask; BUN p = BUNfirst(b), q = BUNlast(b), r; Hash *h = NULL; Heap *hp = NULL; str nme = BBP_physical(b->batCacheid); /* cnt = 0, hopefully there is a proper capacity from which * we can derive enough information */ if (!cnt) cnt = BATcapacity(b); if (b->htype == TYPE_void) { BATDEBUG GDKwarning("BAThash: creating hash-table on void column..\n"); tpe = TYPE_void; } /* determine hash mask size */ if (masksize > 0) { mask = HASHmask(masksize); /* p = first; so no dynamic scheme */ } else if (b->hkey) { mask = HASHmask(cnt); /* p = first; so no dynamic scheme */ } else { /* dynamic hash: we start with HASHmask(cnt/64); if there are too many collisions * we try HASHmask(cnt/16), then HASHmask(cnt/4), and finally HASHmask(cnt). */ mask = HASHmask(cnt >> 6); p += (cnt >> 2) * BUNsize(b); /* try out on first 25% of b */ if (p > q) p = q; } if (mask < 1024) mask = 1024; do { size_t nslots = mask >> 3; /* 1/2 full is too full */ r = BUNfirst(b); i = BUNindex(b, r); if (hp) { HEAPfree(hp); GDKfree(hp); } if (h) GDKfree(h); /* create the hash structures */ hp = (Heap *) GDKmalloc(sizeof(Heap)); hp->filename = GDKmalloc(strlen(nme) + 12); sprintf(hp->filename, "%s.%chash", nme, b->batCacheid > 0 ? 'h' : 't'); if ((h = HASHnew(hp, ATOMtype(b->htype), BATcapacity(b), (size_t) mask)) == NULL) { gdk_unset_lock(GDKhashLock[ABS(b->batCacheid) & BBPLOCKMASK], "BAThash"); GDKfree(hp->filename); GDKfree(hp); return NULL; } if (hp->storage & STORE_MMAP) MT_mmap_pin(hp->base, hp->maxsize); switch (tpe) {#ifndef NOEXPAND_CHR case TYPE_chr: #line 305 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); r < p; r += xx) { ptr v = BUNhloc(b, r); hash_t c = hash_chr(h, v); if (h->hash[c] == HASH_MAX && nslots-- == 0) { break; /* mask too full */ } h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 394 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#ifndef NOEXPAND_BTE case TYPE_bte: #line 305 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); r < p; r += xx) { ptr v = BUNhloc(b, r); hash_t c = hash_bte(h, v); if (h->hash[c] == HASH_MAX && nslots-- == 0) { break; /* mask too full */ } h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 398 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#ifndef NOEXPAND_SHT case TYPE_sht: #line 305 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); r < p; r += xx) { ptr v = BUNhloc(b, r); hash_t c = hash_sht(h, v); if (h->hash[c] == HASH_MAX && nslots-- == 0) { break; /* mask too full */ } h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 402 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#if !defined(NOEXPAND_INT) || !defined(NOEXPAND_FLT) case TYPE_int: case TYPE_flt: #line 305 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); r < p; r += xx) { ptr v = BUNhloc(b, r); hash_t c = hash_int(h, v); if (h->hash[c] == HASH_MAX && nslots-- == 0) { break; /* mask too full */ } h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 407 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#if !defined(NOEXPAND_DBL) || !defined(NOEXPAND_LNG) case TYPE_dbl: case TYPE_lng: #line 305 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); r < p; r += xx) { ptr v = BUNhloc(b, r); hash_t c = hash_lng(h, v); if (h->hash[c] == HASH_MAX && nslots-- == 0) { break; /* mask too full */ } h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 412 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif default: #line 305 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); r < p; r += xx) { ptr v = BUNhead(b, r); hash_t c = hash_any(h, v); if (h->hash[c] == HASH_MAX && nslots-- == 0) { break; /* mask too full */ } h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 415 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" } } while (r < p && mask < cnt && (mask <<= 2)); /* finish the hashtable with the current mask */ p = r; switch (tpe) {#ifndef NOEXPAND_CHR case TYPE_chr: #line 317 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); p < q; p += xx) { ptr v = BUNhloc(b, p); hash_t c = hash_chr(h, v); h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 424 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#ifndef NOEXPAND_BTE case TYPE_bte: #line 317 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); p < q; p += xx) { ptr v = BUNhloc(b, p); hash_t c = hash_bte(h, v); h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 428 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#ifndef NOEXPAND_SHT case TYPE_sht: #line 317 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); p < q; p += xx) { ptr v = BUNhloc(b, p); hash_t c = hash_sht(h, v); h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 432 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#if !defined(NOEXPAND_INT) || !defined(NOEXPAND_FLT) case TYPE_int: case TYPE_flt: #line 317 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); p < q; p += xx) { ptr v = BUNhloc(b, p); hash_t c = hash_int(h, v); h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 437 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif#if !defined(NOEXPAND_DBL) || !defined(NOEXPAND_LNG) case TYPE_dbl: case TYPE_lng: #line 317 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); p < q; p += xx) { ptr v = BUNhloc(b, p); hash_t c = hash_lng(h, v); h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 442 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#endif default: #line 317 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" for (xx = BUNsize(b); p < q; p += xx) { ptr v = BUNhead(b, p); hash_t c = hash_any(h, v); h->link[i] = h->hash[c]; h->hash[c] = (hash_t) i++; } break;#line 445 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx" } b->hhash = BATmirror(b)->thash = h; } gdk_unset_lock(GDKhashLock[ABS(b->batCacheid) & BBPLOCKMASK], "BAThash"); return b;}#line 457 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"hash_tHASHprobe(Hash *h, ptr v){ switch (ATOMstorage(h->type)) {#ifndef NOEXPAND_CHR case TYPE_chr: return hash_chr(h, v);#endif#ifndef NOEXPAND_BTE case TYPE_bte: return hash_bte(h, v);#endif#ifndef NOEXPAND_SHT case TYPE_sht: return hash_sht(h, v);#endif#if !defined(NOEXPAND_INT) || !defined(NOEXPAND_FLT) case TYPE_int: case TYPE_flt: return hash_int(h, v);#endif#if !defined(NOEXPAND_DBL) || !defined(NOEXPAND_LNG) case TYPE_dbl: case TYPE_lng: return hash_lng(h, v);#endif default: return hash_any(h, v); }}BAT *HASHprint(BAT *b){#if SIZEOF_OID == SIZEOF_INT BAT *bn = BATnew(BAThtype(b), TYPE_int, BATcount(b));#else BAT *bn = BATnew(BAThtype(b), TYPE_lng, BATcount(b));#endif BUN p, q; if (bn == NULL) return NULL; if (!(b && b->hhash)) return 0; BATloop(b, p, q) { size_t i = HASHprobe(b->hhash, BUNhead(b, p)); bunfastins(bn, BUNhead(b, p), &i); } bn->hsorted = BAThordered(b); bn->tsorted = FALSE; return bn; bunins_failed: BBPreclaim(bn); return NULL;}hash_tHASHlist(Hash *h, size_t i){ hash_t j; hash_t c = 1; while ((j = h->link[i]) != HASH_MAX) { c++; i = j; if (i > h->lim) { stream_printf(GDKout, "hash inconsistency link " SZFMT "\n", i); break; } } return c;}voidHASHremove(BAT *b){ if (b->hhash) { BAT *p = BBP_cache(VIEWparent(b)); BAT *m = BATmirror(b); if (p && b->hhash == p->hhash) { GDKerror("HASHremove: view-bat cannot remove parent hash-table\n"); assert(0); } else { HEAPfree(b->hhash->heap); if (b->hhash->heap->storage & STORE_MMAP) HEAPdelete(b->hhash->heap, BBP_physical(b->batCacheid), (b->batCacheid > 0) ? "hhash" : "thash"); GDKfree(b->hhash->heap); GDKfree(b->hhash); } if (m) m->thash = NULL; b->hhash = NULL; }}voidHASHdestroy(BAT *b){ HASHremove(b); if (BATmirror(b)) HASHremove(BATmirror(b));}int HASHgonebad(BAT *b, ptr v){ Hash *h = b->hhash; if (h == NULL) return 1; /* no hash is bad hash? */ if (h->mask*2 < BATcount(b)) { int (*cmp)(ptr, ptr) = BATatoms[b->htype].atomCmp; hash_t cnt, hit, i = h->hash[HASHprobe(h, v)]; for(cnt=hit=1; i != HASH_MAX; i = h->link[i], cnt++) hit += ((*cmp)(v,BUNhead(b,BUNptr(b,i))) == 0); if (cnt/hit > 4) return 1; /* linked list too long */ /* in this case, linked lists are long but contain the desired values * such hash tables may be useful for locating all duplicates */ } return 0; /* a-ok */}#line 583 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#line 800 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"#line 659 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_search.mx"BUNSORTfndfirst_chr(BAT *b, ptr v){ BUN end = BUNfirst(b), cur = end; size_t lo = BUNindex(b,end), hi = BUNindex(b,BUNlast(b)); int cmp = 1, bunsize = BUNsize(b); if (lo >= hi || simple_CMP(BUNtloc(b, cur), v, chr)>=0) { /* shortcut: if BAT is empty or first (and hence all) tail * value is >= v, we're done. */ return cur; } while (lo < hi) { size_t mid = (lo+hi)>>1; cur = BUNptr(b, mid); cmp = simple_CMP(BUNtloc(b, cur), v, chr); if (cmp < 0) { lo = ++mid; cur += bunsize; } else if (cmp > 0) { hi = mid; } else { break; } } if (cmp == 0 && b->tkey == 0) { /* shift over multiple equals */ while((cur-=bunsize) >= end) { if (!simple_EQ(BUNtloc(b,cur),v,chr)) break; } cur += bunsize; } return cur;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -