📄 gdk_relop.mx
字号:
BBPreclaim(bn); BATmmap_unpin(r); return NULL;}@}@- Fetch-joinThe @`BATfetchjoin@5(l,r) does a join on the basis of positional lookup.It looks up index numbers from the second parameter in first parameter BAT.The right parameter may contain OIDs, in which case their base issubtracted.In a typical join(BAT[any::1,oid) L, BATvoid,any::2] R) : BAT[any::1,any::2]we expect each tuple of L to hit exactly once in R. Now if any::1=voidthis void column can be carried over to the result. We do that.However, it is possible that an tail-oid is out of range with respectto R; in that case some tuples will be missing and we cannot carry onproducing a void column. In that case, we have to switch backon-the-fly to the non-dense implementation.The aftermath -- property setting -- is relatively straightforward here.@{@c#define HLATOMput(bn, dst) ATOMput(bn->htype, bn->hheap, dst, BUNhloc(l,l_cur))#define HVATOMput(bn, dst) ATOMput(bn->htype, bn->hheap, dst, BUNhvar(l,l_cur))#define TLATOMput(bn, dst) ATOMput(bn->ttype, bn->theap, dst, BUNtloc(r,r_cur))#define TVATOMput(bn, dst) ATOMput(bn->ttype, bn->theap, dst, BUNtvar(r,r_cur))@= SIMPLEput#define H@1put(bn,dst) *(@1*) (dst) = *(@1*) (BUNhloc(l,l_cur))#define T@1put(bn,dst) *(@1*) (dst) = *(@1*) (BUNtloc(r,r_cur))@c@:SIMPLEput(chr)@@:SIMPLEput(bte)@@:SIMPLEput(sht)@@:SIMPLEput(int)@@:SIMPLEput(lng)@@= bunfastins_nocheck_ H@1put(bn, BUNhloc(bn, dst)); T@2put(bn, BUNtloc(bn, dst));@c@= fetchjoinstatic BAT *densefetchjoin_@1_@2(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, dst; ssize_t offset; size_t base, xx; int yy, zz; BAT *ret = NULL; zz = BUNsize(bn); yy = BUNsize(r); dst = BUNfirst(bn); base = BUNindex(r, BUNfirst(r)); offset = (ssize_t) (base - r->hseqbase); r_cur = BUNptr(r, offset + *(oid *) BUNtail(l, BUNfirst(l))); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: densefetchjoin(@1,@2);\n"); BATloopFast(l, l_cur, l_end, xx) { @:bunfastins_nocheck_(@1,@2)@ r_cur += yy; dst += zz; } ret = bn; goto bunins_failed; bunins_failed: bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/zz); if (!ret) BBPreclaim(bn); return ret;}static BAT *orderedfetchjoin_@1_@2(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, dst; ssize_t offset; size_t base, xx, yy; int zz; BAT *ret = NULL; zz = BUNsize(bn); dst = BUNfirst(bn); base = BUNindex(r, BUNfirst(r)); offset = (ssize_t) (base - r->hseqbase); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: orderedfetchjoin(@1,@2);\n"); BATloopFast(l, l_cur, l_end, xx) { yy = offset + *(oid *) BUNtail(l, l_cur); r_cur = BUNptr(r, yy); @:bunfastins_nocheck_(@1,@2)@ dst += zz; } ret = bn; goto bunins_failed; bunins_failed: bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/zz); if (!ret) BBPreclaim(bn); return ret;}static BAT *defaultfetchjoin_@1_@2(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, dst; ssize_t offset; size_t xx, yy, base, end; int zz; BAT *ret = NULL; zz = BUNsize(bn); dst = BUNfirst(bn); base = BUNindex(r, BUNfirst(r)); offset = (ssize_t) (base - r->hseqbase); end = base + BATcount(r); BATmmap_pin(r); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: defaultfetchjoin(@1,@2);\n"); BATloopFast(l, l_cur, l_end, xx) { yy = offset + *(oid *) BUNtail(l, l_cur); if (yy < base || yy >= end) { continue; } r_cur = BUNptr(r, yy); @:bunfastins_nocheck_(@1,@2)@ dst += zz; } BATmmap_unpin(r); ret = bn; goto bunins_failed; bunins_failed: BATmmap_unpin(r); bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/zz); if (!ret) BBPreclaim(bn); return ret;}@c@= fetchjoin2 @:fetchjoin(@1,chr)@ @:fetchjoin(@1,bte)@ @:fetchjoin(@1,sht)@ @:fetchjoin(@1,int)@ @:fetchjoin(@1,lng)@ @:fetchjoin(@1,VATOM)@ @:fetchjoin(@1,LATOM)@@c@:fetchjoin2(chr)@@:fetchjoin2(bte)@@:fetchjoin2(sht)@@:fetchjoin2(int)@@:fetchjoin2(lng)@@:fetchjoin2(VATOM)@@:fetchjoin2(LATOM)@@c@= fetchjoin_switch_rtt if (ATOMstorage(rtt) == TYPE_chr) { bn = @1fetchjoin_@2_chr(bn,l,r); } else if (ATOMstorage(rtt) == TYPE_bte) { bn = @1fetchjoin_@2_bte(bn,l,r); } else if (ATOMstorage(rtt) == TYPE_sht) { bn = @1fetchjoin_@2_sht(bn,l,r); } else if (rtt != TYPE_bat && (ATOMstorage(rtt) == TYPE_int || ATOMstorage(rtt) == TYPE_flt#if SIZEOF_OID == SIZEOF_INT || ATOMstorage(rtt) == TYPE_oid#endif )) { /* ensure use of ATOMput for TYPE_bat */ bn = @1fetchjoin_@2_int(bn,l,r); } else if (ATOMstorage(rtt) == TYPE_lng || ATOMstorage(rtt) == TYPE_dbl#if SIZEOF_OID == SIZEOF_LNG || ATOMstorage(lht) == TYPE_oid#endif ) { bn = @1fetchjoin_@2_lng(bn,l,r); } else if (r->tvarsized) { bn = @1fetchjoin_@2_VATOM(bn,l,r); } else { bn = @1fetchjoin_@2_LATOM(bn,l,r); }@c@= fetchjoin_switch_lht if (ATOMstorage(lht) == TYPE_chr) { @:fetchjoin_switch_rtt(@1,chr)@ } else if (ATOMstorage(lht) == TYPE_bte) { @:fetchjoin_switch_rtt(@1,bte)@ } else if (ATOMstorage(lht) == TYPE_sht) { @:fetchjoin_switch_rtt(@1,sht)@ } else if (lht != TYPE_bat && (ATOMstorage(lht) == TYPE_int || ATOMstorage(lht) == TYPE_flt#if SIZEOF_OID == SIZEOF_INT || ATOMstorage(lht) == TYPE_oid#endif )) { /* ensure use of ATOMput for TYPE_bat */ @:fetchjoin_switch_rtt(@1,int)@ } else if (ATOMstorage(lht) == TYPE_lng || ATOMstorage(lht) == TYPE_dbl#if SIZEOF_OID == SIZEOF_LNG || ATOMstorage(lht) == TYPE_oid#endif ) { @:fetchjoin_switch_rtt(@1,lng)@ } else if (l->hvarsized) { @:fetchjoin_switch_rtt(@1,VATOM)@ } else { @:fetchjoin_switch_rtt(@1,LATOM)@ }@c@= densevoidfetchjoin ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: densevoidfetchjoin(@1,@2,@3);\n"); r_cur = BUNptr(r, offset + *(oid*) BUNtloc(l,BUNfirst(l))); yy = BUNsize(r); BATloopFast(l, l_cur, l_end, xx) { @3put(@2, bn->theap, dst, BUN@1(r, r_cur)); r_cur += yy; dst += zz; }@c@= orderedvoidfetchjoin ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: orderedvoidfetchjoin(@1,@2,@3);\n"); BATloopFast(l, l_cur, l_end, xx) { size_t _yy = offset + *(oid*) BUNtloc(l,l_cur); r_cur = BUNptr(r, _yy); @3put(@2, bn->theap, dst, BUN@1(r, r_cur)); dst += zz; }@c@= defaultvoidfetchjoin ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: defaultvoidfetchjoin(@1,@2,@3);\n"); BATloopFast(l, l_cur, l_end, xx) { size_t _yy = offset + *(oid*) BUNtloc(l,l_cur); if (_yy < base || _yy >= end) { bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/zz); BBPreclaim(bn); nondense = 1; break; } r_cur = BUNptr(r, _yy); @3put(@2, bn->theap, dst, BUN@1(r, r_cur)); dst += zz; } if (nondense) { /* not (yet?) completely typ-optimized ! */ ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: defaultvoidfetchjoin(@1,@2,@3): discovered non-density, resuming with non-void head\n"); bn = BATnew(BAThtype(l), ATOMtype(tpe), BATcount(l)); if (bn == NULL) return bn; dst = BUNfirst(bn); zz = BUNsize(bn); BATloopFast(l, l_cur, l_end, xx) { size_t _yy = offset + *(oid*) BUNtail(l,l_cur); if (_yy < base || _yy >= end) { continue; } r_cur = BUNptr(r, _yy); ATOMput(bn->htype, bn->hheap, BUNhloc(bn, dst), BUNhead(l, l_cur)); @3put(@2, bn->theap, BUNtloc(bn, dst), BUN@1(r, r_cur)); dst += zz; } }@c@= voidfetchjoin if (BATtdense(l)) { /* dense => ordered, i.e., we did check the bounderies already above */ /* and we can do a "synchronized walk" through l & r */ ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BAThvoid(l) && !BATtvoid(l) && BATtdense(l)\n"); @:densevoidfetchjoin(@1,@2,@3)@ } else if ((BATtordered(l)&1) || hitalways) { /* we did check the bounderies already above */ ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BAThvoid(l) && !BATtvoid(l) && !BATtdense(l) && BATtordered(l)&1\n"); @:orderedvoidfetchjoin(@1,@2,@3)@ } else { ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BAThvoid(l) && !BATtvoid(l) && !BATtdense(l) && !BATtordered(l)&1\n"); @:defaultvoidfetchjoin(@1,@2,@3)@ }@c#define SIMPLEput(tpe,hp,dst,src) *(tpe*) (dst) = *(tpe*) (src)static BAT *batfetchjoin(BAT *l, BAT *r, size_t estimate, bit swap, bit hitalways){ int xx, yy, zz, lht, rtt; size_t base, end; ssize_t offset; size_t lcount,rcount; BUN r_cur, l_cur, l_end, dst; oid seqbase; BAT *ret = NULL, *bn = NULL, *l_orig = l; int hitalways_check = 0; BATcheck(l, "BATfetchjoin: l"); BATcheck(r, "BATfetchjoin: r"); lcount= BATcount(l); rcount= BATcount(r); if (estimate == oid_nil || estimate < lcount) { /* upper bound to avoid size checks in the join loop */ estimate = lcount; } if (swap) { if (!BAThdense(r)) { ERRORcheck(!BATtdense(l), "BATfetchjoin: one join column must be dense"); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BATmirror(BATfetchjoin(BATmirror(r),BATmirror(l)));\n"); return BATmirror(batfetchjoin(BATmirror(r), BATmirror(l), estimate, FALSE, FALSE)); } } else { ERRORcheck(!BAThdense(r), "BATfetchjoin: head column of right input must be dense"); } /* not checking boundaries is very dangerous; use regression tests with debugmask=8 first */ PROPDEBUG { hitalways_check = hitalways; hitalways = 0; } if ( (lcount == 0) || (rcount == 0) ){ /* below range checking do not support empty bats. so treat them separately (easy) */ @:return_empty_join_result(l_orig,r, BATfetchjoin: |l|==0 or |r|==0)@@= return_empty_join_result ALGODEBUG THRprintf(GDKout, "#@3 => empty result\n"); bn = BATnew(@1->htype, @2->ttype, 0); bn->hsorted = bn->tsorted = GDK_SORTED; BATkey(bn, TRUE); BATkey(BATmirror(bn), TRUE); if (bn->htype == TYPE_void || bn->htype == TYPE_oid) { bn->hdense = TRUE; BATseqbase(bn, (@1->htype == TYPE_void)?@1->hseqbase:0); } if (bn->ttype == TYPE_void || bn->ttype == TYPE_oid) { bn->tdense = TRUE; BATseqbase(BATmirror(bn), (@2->ttype == TYPE_void)?@2->tseqbase:0); } return bn;@c } else if (hitalways && BAThdense(r) && BATtdense(r) && r->hseqbase == r->tseqbase) { /* idempotent join: always hit and substitute tail with the same value */ ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: VIEWcreate(l)\n"); return VIEWcreate(l); } else if ((BATtordered(l) & 1) && !hitalways) { /* optimization to be able to carry over more void head columns */ /* (only needed if neither operand is empty) */ oid r_lo = *(oid *) BUNhead(r, BUNfirst(r)); oid r_hi = *(oid *) BUNhead(r, BUNlast(r) - BUNsize(r)); oid l_lo = *(oid *) BUNtail(l, BUNfirst(l)); oid l_hi = *(oid *) BUNtail(l, BUNlast(l) - BUNsize(l)); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BATtordered(l)&1\n"); ALGODEBUG THRprintf(GDKout, "#r_lo=" OIDFMT ", r_hi=" OIDFMT ", l_lo=" OIDFMT ", l_hi=" OIDFMT ".\n", r_lo, r_hi, l_lo, l_hi); if (r_lo > l_hi || r_hi < l_lo) { @:return_empty_join_result(l_orig, r, BATfetchjoin: r_lo > l_hi || r_hi < l_lo)@ } if (r_lo > l_lo || r_hi < l_hi) { ALGODEBUG THRprintf(GDKout, "#shrinking!\n"); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: l = BATselect(l, &r_lo, &r_hi);\n"); l = BATselect(l, &r_lo, &r_hi); /* sorted, so it will be a slice */ if (l == NULL) return NULL; lcount = BATcount(l); if (lcount == 0) { if (l != l_orig) { BBPreclaim(l); /* was created as a temporary (slice) select on l */ } @:return_empty_join_result(l_orig, r, BATfetchjoin: |l|==0)@ } } } ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: 1\n"); base = BUNindex(r, BUNfirst(r)); offset = (ssize_t) (base - r->hseqbase); end = base + rcount; /* only BUNhead crashes on empty bats with TYPE != virtual oid */ seqbase = (l->htype == TYPE_void) ? l->hseqbase : (lcount ? (l->htype == TYPE_int ? (oid) *(int *) BUNhead(l, BUNfirst(l)) : (l->htype == TYPE_oid ? *(oid *) BUNhead(l, BUNfirst(l)) : (l->htype == TYPE_lng ? (oid) *(lng *) BUNhead(l, BUNfirst(l)) : oid_nil))) : oid_nil); ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: 2\n"); if (!BAThvoid(l)) { ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: !BAThvoid(l)\n"); /* default case: no void column to propagate */ lht = l->htype; rtt = r->ttype; if (ATOMstorage(rtt) == TYPE_str /*&& GDK_ELIMDOUBLES(r->theap)*/) { if (!rcount || (lcount<<3) > rcount) rtt = TYPE_var; /* insert double-eliminated strings as ints */ } bn = BATnew(BAThtype(l), ATOMtype(rtt), estimate); if (bn == NULL) goto ready; ESTIDEBUG THRprintf(GDKout, "#BATfetchjoin: estimated resultsize: " SZFMT "\n", lcount); dst = BUNfirst(bn); zz = BUNsize(bn); /* TODO: apply the "string trick" (see below) here too */ if (BATtdense(l)) { /* dense => ordered, i.e., we did check the bounderies already above */ /* and we can do a "synchronized walk" through l & r */ ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: !BAThvoid(l) && BATtdense(l)\n"); @:fetchjoin_switch_lht(dense)@ } else if ((BATtordered(l) & 1) || hitalways) { /* we did check the boundaries already above */ ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: !BAThvoid(l) && !BATtdense(l) && BATtordered(l)&1\n"); @:fetchjoin_switch_lht(ordered)@ } else { ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: !BAThvoid(l) && !BATtdense(l) && !BATtordered(l)&1\n"); @:fetchjoin_switch_lht(default)@ } /* handle string trick */ if (rtt == TYPE_var && ATOMstorage(r->ttype) == TYPE_str) { BAT *bm = BATmirror(bn); bn->theap = (Heap*)GDKzalloc(sizeof(Heap)); if (bn->theap && r->theap->filename) { char *nme = BBP_physical(bn->batCacheid); bn->theap->filename = (str) GDKmalloc(strlen(nme) + 12); GDKfilepath(bn->theap->filename, NULL, nme, "theap"); } if (HEAPcopy(bn->theap, r->theap) < 0) { BBPreclaim(bn); goto ready; } bn->ttype = bm->htype = r->ttype; bn->tvarsized = bm->hvarsized = 1; } bn->hsorted = (BATtordered(l) & BAThordered(r) & 1) ? BAThordered(l) : 0; } else if (!BATtvoid(l)) { /* propagation of void columns in the result */ int nondense = 0; int tpe = r->ttype; ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BAThvoid(l) && !BATtvoid(l)\n"); if (ATOMstorage(tpe) == TYPE_str /*&& GDK_ELIMDOUBLES(r->theap)*/) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -