📄 gdk_relop.c
字号:
#line 26 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#include "monetdb_config.h"#include "gdk.h"#define SAMPLE_TRESHOLD_LOG 17#define SAMPLE_SLICE_SIZE 1000#line 32 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 223 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"/* serves both normal equi-join (nil_on_miss==NULL) and outerjoin (nil_on_miss=nil) */static BAT *mergejoin(BAT *l, BAT *r, BAT *bn, ptr nil_on_miss, size_t estimate){ ptr nil = ATOMnilptr(r->htype); int r_scan = -1; /* no scanning in r */ BAT *rr = BATmirror(r); BUN l_last, r_last; /* last BUN of the BAT */ BUN l_start, r_start; /* start of current chunk */ BUN l_end, r_end; /* end of current chunk */ int r_next = BUNsize(r); int l_next = BUNsize(l); int l_key = l->tkey; int r_key = r->hkey; BUN r_cur, r_lim; int loc, var, hasnils = 0; if (BATtordered(l) & 1) { size_t i; int logr = 4; /* 4*log2(r.count) = estimation of the cost of binary search in units of scan comparisons */ for (i = BATcount(r); i > 0; logr++) i >>= 1; r_scan = logr * BUNsize(r); /* opportunistic scan window in r */ } if (!(BAThordered(r) & 1)) { GDKerror("mergejoin: right input is not sorted.\n"); return NULL; } if (bn == NULL) { #line 135 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" { size_t _estimate = estimate; #line 55 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if ( _estimate == (size_t) oid_nil) { size_t _lcount = BATcount(l); size_t _rcount = BATcount(r); size_t _slices = 0; /* limit estimate with simple bounds first; only spend effort if the join result might be big */ if (JOIN_EQ == JOIN_EQ) { if (l->tkey) _estimate = r->hkey ? MIN(_rcount, _lcount) : _rcount; else if (r->hkey) _estimate = _lcount; } if ( _estimate == oid_nil) { size_t _heuristic = 3 * MIN(_lcount, _rcount); if (_heuristic <= (1 << SAMPLE_TRESHOLD_LOG)) _estimate = _heuristic; } if ( _estimate == oid_nil) { size_t _idx; for (_idx = _lcount; _idx > 0; _idx >>= 1) _slices++; } if (_slices > SAMPLE_TRESHOLD_LOG) { /* use cheapo sampling by taking a number of slices and joining those with the algo */ size_t _idx = 0, _tot = 0, _step, _lo, _avg, _sample, *_cnt; BAT *_tmp1 = l, *_tmp2, *_tmp3 = NULL; _step = _lcount / (_slices -= SAMPLE_TRESHOLD_LOG); _sample = _slices * SAMPLE_SLICE_SIZE; _cnt = (size_t *) alloca(_slices * sizeof(size_t)); for (_lo = 0; _idx < _slices; _lo += _step) { size_t _size = 0, _hi = _lo + SAMPLE_SLICE_SIZE; l = BATslice(_tmp1, _lo, _hi); /* slice keeps all parent properties */ if (l == NULL) return NULL; _tmp2 = mergejoin(l,r,NULL,nil_on_miss,oid_nil); /* mergejoin(l,r,NULL,nil_on_miss,oid_nil) = e.g. BATXjoin(l,r) */ if (_tmp2) { _size = BATcount(_tmp2); BBPreclaim(_tmp2); } _tot += (_cnt[_idx++] = _size); BBPreclaim(l); } /* do outlier detection on sampling results; this guards against skew */ if (JOIN_EQ == JOIN_EQ) { for (_avg = _tot / _slices, _idx = 0; _idx < _slices; _idx++) { size_t _diff = _cnt[_idx] - _avg; if (_avg > _cnt[_idx]) _diff = _avg - _cnt[_idx]; if (_diff > MAX(SAMPLE_SLICE_SIZE, _avg)) break; } if (_idx < _slices) { /* outliers detected, compute a real sample on at most 1% of the data */ _sample = MIN(_lcount / 100, (1 << SAMPLE_TRESHOLD_LOG) / 3); _tmp2 = BATsample(_tmp1, _sample); if (_tmp2) { _tmp3 = BATjoin(_tmp2, r, oid_nil); /* might be expensive */ if (_tmp3) { _tot = BATcount(_tmp3); BBPreclaim(_tmp3); } BBPreclaim(_tmp2); } if (_tmp3 == NULL) return NULL; } } /* overestimate always by 5% */ _estimate = (size_t) ((double) (((lng) _tot) * ((lng) _lcount)) / (0.95 * (double) _sample)); l = _tmp1; } else { _estimate = MAX(_lcount,_rcount); } }#line 138 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" bn = BATnew(BAThtype(l), BATttype(r), _estimate); if (bn == NULL) { return bn; } }#line 254 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" } /* the algorithm */ loc = ATOMstorage(l->ttype); l_last = BUNlast(l); r_last = BUNlast(r); l_start = l_end = BUNfirst(l); r_start = r_end = BUNfirst(r); switch (loc) {#ifndef NOEXPAND_CHR case TYPE_chr: #line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if (((!BATtvoid(l)) || l->tseqbase != oid_nil) && ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) { assert(r->htype != TYPE_void); while (l_start < l_last) { ptr v2, v1 = BUNtloc(l, l_start); int neq = 1; /* lookup range in l */ l_end = l_start; if (l_key) { l_end += l_next; } else do { if ((l_end += l_next) >= l_last) break; v2 = BUNtloc(l, l_end); } while (simple_EQ(v1, v2, chr)); /* lookup value in r (if not nil, that is) */ if (!simple_EQ(v1, nil, chr)) { if (r_scan > 0) { /* first try scanning; but give up after a while */ for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) { v2 = BUNhloc(r, r_end); neq = simple_CMP(v1, v2, chr); if (neq <= 0) break; } r_start = r_end; } if (neq == 1) { /* use binary search after failed scan or if scanning is impossible (l not sorted) */ if (r_scan < 0 || r_start < r_last) { /* if merge not ended (or if no merge at all) */ r_start = (BUN) SORTfndfirst_chr(rr, v1); } if (r_start < r_last) { v2 = BUNhloc(r, r_start); neq = !simple_EQ(v1, v2, chr); } else if (r_scan >= 0) { /* r is already at end => break off merge join */ break; } } } if (neq == 0) { /* lookup range in r */ r_end = r_start + r_next; if (r_key == 0) while (r_end < r_last) { v2 = BUNhloc(r, r_end); if (!simple_EQ(v1, v2, chr)) break; r_end += r_next; } /* generate match-product as join result */ for (; l_start < l_end; l_start += l_next) for (r_cur = r_start; r_cur < r_end; r_cur += r_next) bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur)); } else if (nil_on_miss) { /* outerjoin inserts nils on a miss */ hasnils = 1; for (; l_start < l_end; l_start += l_next) bunfastins(bn, BUNhead(l, l_start), nil_on_miss); } else { l_start = l_end; /* no match found in equi-join */ } } }#line 268 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"; break;#endif#ifndef NOEXPAND_BTE case TYPE_bte: #line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if (((!BATtvoid(l)) || l->tseqbase != oid_nil) && ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) { assert(r->htype != TYPE_void); while (l_start < l_last) { ptr v2, v1 = BUNtloc(l, l_start); int neq = 1; /* lookup range in l */ l_end = l_start; if (l_key) { l_end += l_next; } else do { if ((l_end += l_next) >= l_last) break; v2 = BUNtloc(l, l_end); } while (simple_EQ(v1, v2, bte)); /* lookup value in r (if not nil, that is) */ if (!simple_EQ(v1, nil, bte)) { if (r_scan > 0) { /* first try scanning; but give up after a while */ for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) { v2 = BUNhloc(r, r_end); neq = simple_CMP(v1, v2, bte); if (neq <= 0) break; } r_start = r_end; } if (neq == 1) { /* use binary search after failed scan or if scanning is impossible (l not sorted) */ if (r_scan < 0 || r_start < r_last) { /* if merge not ended (or if no merge at all) */ r_start = (BUN) SORTfndfirst_bte(rr, v1); } if (r_start < r_last) { v2 = BUNhloc(r, r_start); neq = !simple_EQ(v1, v2, bte); } else if (r_scan >= 0) { /* r is already at end => break off merge join */ break; } } } if (neq == 0) { /* lookup range in r */ r_end = r_start + r_next; if (r_key == 0) while (r_end < r_last) { v2 = BUNhloc(r, r_end); if (!simple_EQ(v1, v2, bte)) break; r_end += r_next; } /* generate match-product as join result */ for (; l_start < l_end; l_start += l_next) for (r_cur = r_start; r_cur < r_end; r_cur += r_next) bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur)); } else if (nil_on_miss) { /* outerjoin inserts nils on a miss */ hasnils = 1; for (; l_start < l_end; l_start += l_next) bunfastins(bn, BUNhead(l, l_start), nil_on_miss); } else { l_start = l_end; /* no match found in equi-join */ } } }#line 273 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"; break;#endif#ifndef NOEXPAND_SHT case TYPE_sht: #line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if (((!BATtvoid(l)) || l->tseqbase != oid_nil) && ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) { assert(r->htype != TYPE_void); while (l_start < l_last) { ptr v2, v1 = BUNtloc(l, l_start); int neq = 1; /* lookup range in l */ l_end = l_start; if (l_key) { l_end += l_next; } else do { if ((l_end += l_next) >= l_last) break; v2 = BUNtloc(l, l_end); } while (simple_EQ(v1, v2, sht)); /* lookup value in r (if not nil, that is) */ if (!simple_EQ(v1, nil, sht)) { if (r_scan > 0) { /* first try scanning; but give up after a while */ for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) { v2 = BUNhloc(r, r_end); neq = simple_CMP(v1, v2, sht); if (neq <= 0) break; } r_start = r_end; } if (neq == 1) { /* use binary search after failed scan or if scanning is impossible (l not sorted) */ if (r_scan < 0 || r_start < r_last) { /* if merge not ended (or if no merge at all) */ r_start = (BUN) SORTfndfirst_sht(rr, v1); } if (r_start < r_last) { v2 = BUNhloc(r, r_start); neq = !simple_EQ(v1, v2, sht); } else if (r_scan >= 0) { /* r is already at end => break off merge join */ break; } } } if (neq == 0) { /* lookup range in r */ r_end = r_start + r_next; if (r_key == 0) while (r_end < r_last) { v2 = BUNhloc(r, r_end); if (!simple_EQ(v1, v2, sht)) break; r_end += r_next; } /* generate match-product as join result */ for (; l_start < l_end; l_start += l_next) for (r_cur = r_start; r_cur < r_end; r_cur += r_next) bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur)); } else if (nil_on_miss) { /* outerjoin inserts nils on a miss */ hasnils = 1; for (; l_start < l_end; l_start += l_next) bunfastins(bn, BUNhead(l, l_start), nil_on_miss); } else { l_start = l_end; /* no match found in equi-join */ } } }#line 278 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"; break;#endif#ifndef NOEXPAND_INT case TYPE_int: #line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if (((!BATtvoid(l)) || l->tseqbase != oid_nil) && ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) { assert(r->htype != TYPE_void); while (l_start < l_last) { ptr v2, v1 = BUNtloc(l, l_start); int neq = 1; /* lookup range in l */ l_end = l_start; if (l_key) { l_end += l_next; } else do { if ((l_end += l_next) >= l_last) break; v2 = BUNtloc(l, l_end); } while (simple_EQ(v1, v2, int)); /* lookup value in r (if not nil, that is) */ if (!simple_EQ(v1, nil, int)) { if (r_scan > 0) { /* first try scanning; but give up after a while */ for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) { v2 = BUNhloc(r, r_end); neq = simple_CMP(v1, v2, int); if (neq <= 0) break; } r_start = r_end; } if (neq == 1) { /* use binary search after failed scan or if scanning is impossible (l not sorted) */ if (r_scan < 0 || r_start < r_last) { /* if merge not ended (or if no merge at all) */ r_start = (BUN) SORTfndfirst_int(rr, v1); } if (r_start < r_last) { v2 = BUNhloc(r, r_start); neq = !simple_EQ(v1, v2, int); } else if (r_scan >= 0) { /* r is already at end => break off merge join */ break; } } } if (neq == 0) { /* lookup range in r */ r_end = r_start + r_next; if (r_key == 0) while (r_end < r_last) { v2 = BUNhloc(r, r_end); if (!simple_EQ(v1, v2, int)) break; r_end += r_next; } /* generate match-product as join result */ for (; l_start < l_end; l_start += l_next) for (r_cur = r_start; r_cur < r_end; r_cur += r_next) bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur)); } else if (nil_on_miss) { /* outerjoin inserts nils on a miss */ hasnils = 1; for (; l_start < l_end; l_start += l_next) bunfastins(bn, BUNhead(l, l_start), nil_on_miss); } else { l_start = l_end; /* no match found in equi-join */ } } }#line 283 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"; break;#endif#ifndef NOEXPAND_FLT case TYPE_flt: #line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx" if (((!BATtvoid(l)) || l->tseqbase != oid_nil) && ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) { assert(r->htype != TYPE_void); while (l_start < l_last) { ptr v2, v1 = BUNtloc(l, l_start); int neq = 1; /* lookup range in l */ l_end = l_start; if (l_key) { l_end += l_next; } else do { if ((l_end += l_next) >= l_last) break; v2 = BUNtloc(l, l_end); } while (simple_EQ(v1, v2, flt)); /* lookup value in r (if not nil, that is) */ if (!simple_EQ(v1, nil, flt)) { if (r_scan > 0) { /* first try scanning; but give up after a while */ for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -