📄 gdk_relop.c
字号:
#line 361 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if (BAThdense(r) || (swap && BATtdense(l))) { /* batmergejoin can't handle void tail columns at all (fetchjoin is better anyway) */ return batfetchjoin(l, r, estimate, swap, FALSE); } if (swap && (((BAThordered(r) & 1) == 0) || ((BATtordered(l) & 1) && (BATcount(l) > BATcount(r))))) { /* reverse join if required (r not sorted) or if l is larger (quick jump through l with binary search) */ BAT *bn = mergejoin(BATmirror(r), BATmirror(l), NULL, NULL, estimate); return bn ? BATmirror(bn) : NULL; } return mergejoin(l, r, NULL, NULL, estimate);}BAT *BATmergejoin(BAT *l, BAT *r, size_t estimate){ /* allows swapping of left and right input for faster processing */ return batmergejoin(l, r, estimate, TRUE);}BAT *BATleftmergejoin(BAT *l, BAT *r, size_t estimate){ /* do not swap left and right input, and hence maintain order of left head in result */ return batmergejoin(l, r, estimate, FALSE);}#line 414 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"BAT *BAThashjoin(BAT *l, BAT *r, size_t estimate){ ptr v, nil = ATOMnilptr(r->htype); BUN p, q, w; int any; BAT *bn = NULL; #line 51 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" ERRORcheck(l == NULL, "BAThashjoin: invalid left operand"); ERRORcheck(r == NULL, "BAThashjoin: invalid right operand"); ERRORcheck(TYPEerror(l->ttype, r->htype), "BAThashjoin: type conflict\n");#line 422 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" #line 135 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { size_t _estimate = estimate; #line 55 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if ( _estimate == (size_t) oid_nil) { size_t _lcount = BATcount(l); size_t _rcount = BATcount(r); size_t _slices = 0; /* limit estimate with simple bounds first; only spend effort if the join result might be big */ if (JOIN_EQ == JOIN_EQ) { if (l->tkey) _estimate = r->hkey ? MIN(_rcount, _lcount) : _rcount; else if (r->hkey) _estimate = _lcount; } if ( _estimate == oid_nil) { size_t _heuristic = 3 * MIN(_lcount, _rcount); if (_heuristic <= (1 << SAMPLE_TRESHOLD_LOG)) _estimate = _heuristic; } if ( _estimate == oid_nil) { size_t _idx; for (_idx = _lcount; _idx > 0; _idx >>= 1) _slices++; } if (_slices > SAMPLE_TRESHOLD_LOG) { /* use cheapo sampling by taking a number of slices and joining those with the algo */ size_t _idx = 0, _tot = 0, _step, _lo, _avg, _sample, *_cnt; BAT *_tmp1 = l, *_tmp2, *_tmp3 = NULL; _step = _lcount / (_slices -= SAMPLE_TRESHOLD_LOG); _sample = _slices * SAMPLE_SLICE_SIZE; _cnt = (size_t *) alloca(_slices * sizeof(size_t)); for (_lo = 0; _idx < _slices; _lo += _step) { size_t _size = 0, _hi = _lo + SAMPLE_SLICE_SIZE; l = BATslice(_tmp1, _lo, _hi); /* slice keeps all parent properties */ if (l == NULL) return NULL; _tmp2 = BAThashjoin(l,r,oid_nil); /* BAThashjoin(l,r,oid_nil) = e.g. BATXjoin(l,r) */ if (_tmp2) { _size = BATcount(_tmp2); BBPreclaim(_tmp2); } _tot += (_cnt[_idx++] = _size); BBPreclaim(l); } /* do outlier detection on sampling results; this guards against skew */ if (JOIN_EQ == JOIN_EQ) { for (_avg = _tot / _slices, _idx = 0; _idx < _slices; _idx++) { size_t _diff = _cnt[_idx] - _avg; if (_avg > _cnt[_idx]) _diff = _avg - _cnt[_idx]; if (_diff > MAX(SAMPLE_SLICE_SIZE, _avg)) break; } if (_idx < _slices) { /* outliers detected, compute a real sample on at most 1% of the data */ _sample = MIN(_lcount / 100, (1 << SAMPLE_TRESHOLD_LOG) / 3); _tmp2 = BATsample(_tmp1, _sample); if (_tmp2) { _tmp3 = BATjoin(_tmp2, r, oid_nil); /* might be expensive */ if (_tmp3) { _tot = BATcount(_tmp3); BBPreclaim(_tmp3); } BBPreclaim(_tmp2); } if (_tmp3 == NULL) return NULL; } } /* overestimate always by 5% */ _estimate = (size_t) ((double) (((lng) _tot) * ((lng) _lcount)) / (0.95 * (double) _sample)); l = _tmp1; } else { _estimate = MAX(_lcount,_rcount); } }#line 138 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" bn = BATnew(BAThtype(l), BATttype(r), _estimate); if (bn == NULL) { return bn; } }#line 423 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" BATmmap_pin(r); if (BATprepareHash(r)) return NULL; switch (any = ATOMstorage(l->ttype)) {#ifndef NOEXPAND_CHR case TYPE_chr: #line 395 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { int xx; hash_t yy; BATloopFast(l, p, q, xx) { v = BUNtloc(l,p); if (simple_EQ(v,nil,chr)) { continue; /* skip nil */ } HASHloop_chr(r, r->hhash, yy, v, w) { bunfastins(bn, BUNhead(l,p), BUNtail(r,w)); } } /* set sorted flags by hand, because we used BUNfastins() */ bn->hsorted = BAThordered(l); bn->tsorted = FALSE; break; }#line 431 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#endif#ifndef NOEXPAND_BTE case TYPE_bte: #line 395 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { int xx; hash_t yy; BATloopFast(l, p, q, xx) { v = BUNtloc(l,p); if (simple_EQ(v,nil,bte)) { continue; /* skip nil */ } HASHloop_bte(r, r->hhash, yy, v, w) { bunfastins(bn, BUNhead(l,p), BUNtail(r,w)); } } /* set sorted flags by hand, because we used BUNfastins() */ bn->hsorted = BAThordered(l); bn->tsorted = FALSE; break; }#line 435 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#endif#ifndef NOEXPAND_SHT case TYPE_sht: #line 395 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { int xx; hash_t yy; BATloopFast(l, p, q, xx) { v = BUNtloc(l,p); if (simple_EQ(v,nil,sht)) { continue; /* skip nil */ } HASHloop_sht(r, r->hhash, yy, v, w) { bunfastins(bn, BUNhead(l,p), BUNtail(r,w)); } } /* set sorted flags by hand, because we used BUNfastins() */ bn->hsorted = BAThordered(l); bn->tsorted = FALSE; break; }#line 439 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#endif#if !defined(NOEXPAND_INT) || !defined(NOEXPAND_FLT) case TYPE_int: case TYPE_flt: #line 395 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { int xx; hash_t yy; BATloopFast(l, p, q, xx) { v = BUNtloc(l,p); if (simple_EQ(v,nil,int)) { continue; /* skip nil */ } HASHloop_int(r, r->hhash, yy, v, w) { bunfastins(bn, BUNhead(l,p), BUNtail(r,w)); } } /* set sorted flags by hand, because we used BUNfastins() */ bn->hsorted = BAThordered(l); bn->tsorted = FALSE; break; }#line 444 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#endif#if !defined(NOEXPAND_DBL) || !defined(NOEXPAND_LNG) case TYPE_dbl: case TYPE_lng: #line 395 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { int xx; hash_t yy; BATloopFast(l, p, q, xx) { v = BUNtloc(l,p); if (simple_EQ(v,nil,lng)) { continue; /* skip nil */ } HASHloop_lng(r, r->hhash, yy, v, w) { bunfastins(bn, BUNhead(l,p), BUNtail(r,w)); } } /* set sorted flags by hand, because we used BUNfastins() */ bn->hsorted = BAThordered(l); bn->tsorted = FALSE; break; }#line 449 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#endif default: #line 395 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { int xx; hash_t yy; BATloopFast(l, p, q, xx) { v = BUNtail(l,p); if (atom_EQ(v,nil,any)) { continue; /* skip nil */ } HASHloop_any(r, r->hhash, yy, v, w) { bunfastins(bn, BUNhead(l,p), BUNtail(r,w)); } } /* set sorted flags by hand, because we used BUNfastins() */ bn->hsorted = BAThordered(l); bn->tsorted = FALSE; break; }#line 452 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" } BATmmap_unpin(r); /* propagate alignment info */ bn->hsorted = BAThordered(l); if (BAThkey(r)) { if (BATcount(bn) == BATcount(l)) ALIGNsetH(bn, l); if (BAThkey(l)) BATkey(bn, TRUE); } ESTIDEBUG THRprintf(GDKout, "#BAThashjoin: actual resultsize: " SZFMT "\n", BATcount(bn)); return bn; bunins_failed: BBPreclaim(bn); BATmmap_unpin(r); return NULL;}#line 475 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 494 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#define HLATOMput(bn, dst) ATOMput(bn->htype, bn->hheap, dst, BUNhloc(l,l_cur))#define HVATOMput(bn, dst) ATOMput(bn->htype, bn->hheap, dst, BUNhvar(l,l_cur))#define TLATOMput(bn, dst) ATOMput(bn->ttype, bn->theap, dst, BUNtloc(r,r_cur))#define TVATOMput(bn, dst) ATOMput(bn->ttype, bn->theap, dst, BUNtvar(r,r_cur))#line 503 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 500 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#define Hchrput(bn,dst) *(chr*) (dst) = *(chr*) (BUNhloc(l,l_cur))#define Tchrput(bn,dst) *(chr*) (dst) = *(chr*) (BUNtloc(r,r_cur))#line 503 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 500 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#define Hbteput(bn,dst) *(bte*) (dst) = *(bte*) (BUNhloc(l,l_cur))#define Tbteput(bn,dst) *(bte*) (dst) = *(bte*) (BUNtloc(r,r_cur))#line 504 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 500 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#define Hshtput(bn,dst) *(sht*) (dst) = *(sht*) (BUNhloc(l,l_cur))#define Tshtput(bn,dst) *(sht*) (dst) = *(sht*) (BUNtloc(r,r_cur))#line 505 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 500 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#define Hintput(bn,dst) *(int*) (dst) = *(int*) (BUNhloc(l,l_cur))#define Tintput(bn,dst) *(int*) (dst) = *(int*) (BUNtloc(r,r_cur))#line 506 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 500 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#define Hlngput(bn,dst) *(lng*) (dst) = *(lng*) (BUNhloc(l,l_cur))#define Tlngput(bn,dst) *(lng*) (dst) = *(lng*) (BUNtloc(r,r_cur))#line 507 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 513 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 619 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 620 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" #line 514 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"static BAT *densefetchjoin_chr_chr(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, dst; ssize_t offset; size_t base, xx; int yy, zz; BAT *ret = NULL; zz = BUNsize(bn); yy = BUNsize(r); dst = BUNfirst(bn); base = BUNindex(r, BUNfirst(r)); offset = (ssize_t) (base - r->hseqbase); r_cur = BUNptr(r, offset + *(oid *) BUNtail(l, BUNfirst(l))); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: densefetchjoin(chr,chr);\n"); BATloopFast(l, l_cur, l_end, xx) { #line 510 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" Hchrput(bn, BUNhloc(bn, dst)); Tchrput(bn, BUNtloc(bn, dst));#line 533 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" r_cur += yy; dst += zz; } ret = bn; goto bunins_failed; bunins_failed: bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/zz); if (!ret) BBPreclaim(bn); return ret;}static BAT *orderedfetchjoin_chr_chr(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, dst; ssize_t offset; size_t base, xx, yy; int zz; BAT *ret = NULL; zz = BUNsize(bn); dst = BUNfirst(bn); base = BUNindex(r, BUNfirst(r)); offset = (ssize_t) (base - r->hseqbase); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: orderedfetchjoin(chr,chr);\n"); BATloopFast(l, l_cur, l_end, xx) { yy = offset + *(oid *) BUNtail(l, l_cur); r_cur = BUNptr(r, yy); #line 510 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" Hchrput(bn, BUNhloc(bn, dst)); Tchrput(bn, BUNtloc(bn, dst));#line 566 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" dst += zz; } ret = bn; goto bunins_failed; bunins_failed: bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/zz); if (!ret) BBPreclaim(bn); return ret;}static BAT *defaultfetchjoin_chr_chr(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, dst; ssize_t offset; size_t xx, yy, base, end; int zz; BAT *ret = NULL; zz = BUNsize(bn);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -