📄 gdk_relop.mx
字号:
} BATmmap_unpin(l); /* propagate properties */ bn->hsorted = bn->tsorted = 0; if (BAThkey(r) && BATtkey(l) && BATcount(bn) == BATcount(r)) { ALIGNsetH(bn, r); } ESTIDEBUG THRprintf(GDKout, "#BATrevsemijoin: actual resultsize: " SZFMT "\n", BATcount(bn)); return bn; bunins_failed: BATmmap_unpin(l); BBPreclaim(bn); return NULL;}@}@-The positional semijoin performs a semijoin using positional lookup.This implementation is dirty as it also allows fetches withhard integer positions, rather than oid matching on a dense-oid column.@{@cstatic BAT *BATfetchsemijoin(BAT *l, BAT *r, BAT *cpy, int denselookup){ int xx; size_t base, end, yy; ssize_t offset; BUN l_cur, l_end, r_cur; BAT *bn; BATcheck(l, "BATfetchsemijoin: l"); BATcheck(r, "BATfetchsemijoin: r"); if (denselookup) { if (!BAThdense(r)) { GDKerror("BATfetchsemijoin: left column must be dense.\n"); return NULL; } else if (ATOMstorage(l->htype) != ATOMstorage(TYPE_oid)) { GDKerror("BATfetchsemijoin: illegal index type %s.\n", ATOMname(l->htype)); return NULL; } } if (BAThvoid(l)) { /* redirect semijoin on two dense regions to a select (and hence to BATslice) */ oid min = oid_nil, max = oid_nil; BAT *other = (cpy == l) ? r : l; if (BATcount(other)) { min = *(oid *) BUNhead(l, BUNfirst(other)); max = *(oid *) BUNhead(l, BUNlast(other) - BUNsize(other)); } if (denselookup) { min -= r->hseqbase; max -= r->hseqbase; } return BATslice(cpy, min, 1+max); } base = BUNindex(r, BUNfirst(r)); end = base + BATcount(r); bn = BATnew(BAThtype(cpy), BATttype(cpy), MIN(BATcount(r), BATcount(l))); if (bn == NULL) return bn; ESTIDEBUG THRprintf(GDKout, "#BATfetchsemijoin: estimated resultsize: " SZFMT "\n", MIN(BATcount(r), BATcount(l))); if (bn == NULL) { return NULL; } if (denselookup) { offset = (ssize_t) (base - r->hseqbase); /* translate oid to BUN position */ } else { offset = (ssize_t) base; /* fetch by hard BUNindex */ } /* iterate l; positional fetch in r */ if (!BAThordered(l)) /* StM: (!(BAThordered(l)&1)) ? */ BATmmap_pin(r); BATloopFast(l, l_cur, l_end, xx) { yy = offset + *(oid *) BUNhloc(l, l_cur); if (yy < base || yy >= end) { continue; } r_cur = BUNptr(r, yy); if (cpy == r) { bunfastins(bn, BUNhead(r, r_cur), BUNtail(r, r_cur)); } else { bunfastins(bn, BUNhead(r, r_cur), BUNtail(l, l_cur)); } } if (!BAThordered(l)) /* StM: (!(BAThordered(l)&1)) ? */ BATmmap_unpin(r); /* property propagation */ bn->hsorted = (BAThordered(l) & BAThordered(r) & 1 ? GDK_SORTED : FALSE); bn->tsorted = (bn->hsorted & BATtordered(cpy) & 1 ? GDK_SORTED : FALSE); if (denselookup && BATcount(bn) == BATcount(l)) { ALIGNsetH(bn, l); } else { BATkey(bn, BAThkey(l) && BAThkey(r)); } if (BAThkey(l)) { if (BATcount(bn) == BATcount(cpy) && (BAThordered(r) & BAThordered(l) & 1)) { ALIGNsetT(bn, cpy); } else { BATkey(BATmirror(bn), BATtkey(cpy)); } } ESTIDEBUG THRprintf(GDKout, "#BATfetchsemijoin: actual resultsize: " SZFMT "\n", BATcount(bn)); return bn; bunins_failed: if (!BAThordered(l)) /* StM: (!(BAThordered(l)&1)) ? */ BATmmap_unpin(r); BBPreclaim(bn); return NULL;}BAT *BATfetch(BAT *l, BAT *r){ return BATfetchsemijoin(r, l, l, FALSE);}@-The BATsemijoin chooses between various alternatives.TODO: this should be moved to MIL.@cBAT *BATsemijoin(BAT *l, BAT *r){ int reverse1, reverse2; size_t countr, countl, i; lng logr, logl; BAT *bn, *tmp = NULL; ERRORcheck(l == NULL, "BATsemijoin"); ERRORcheck(r == NULL, "BATsemijoin"); ERRORcheck(TYPEerror(l->htype, r->htype), "BATsemijoin: type conflict\n");@- algorithm selectionWe have 10 algorithms implementing semijoin. Their conditions are checked in orderof efficiency. Some algorithms reverse the semijoin (loop over r, lookup in l).To do that r should be unique. To that end, doubles may sometimes be eliminated from r.@c for (logr = 4, i = countr = BATcount(r); i > 0; logr++) i >>= 1; for (logl = 4, i = countl = BATcount(l); i > 0; logl++) i >>= 1; reverse1 = countr < countl && (BAThkey(r) || (lng) countr * 8 < (lng) countl); reverse2 = (lng) countr *logl < (lng) countl && (BAThkey(r) || (lng) countr * (logl + 8) < (lng) countl); if (ALIGNsynced(l, r)) { ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATcopy(l);\n"); bn = BATcopy(l, l->htype, l->ttype, FALSE); } else if ((BAThordered(l) & 1) && BAThdense(r)) { oid lo = r->hseqbase; oid hi = r->hseqbase + countr - 1; ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATmirror(BATselect(BATmirror(l), &lo, &hi));\n"); bn = BATmirror(BATselect(BATmirror(l), &lo, &hi)); } else if (BAThdense(r)) { ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATfetchsemijoin(l, r, l);\n"); bn = BATfetchsemijoin(l, r, l, TRUE); } else if (BAThdense(l) && reverse1) { if (!BAThkey(r)) { BAT *v = VIEWhead_(r, BAT_WRITE); tmp = r = BATkunique(v); BBPreclaim(v); } ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATfetchsemijoin(r, l, l);\n"); bn = BATfetchsemijoin(r, l, l, TRUE); } else if (l->hhash && reverse1) { ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATrevsemijoin(l,r);\n"); bn = BATrevsemijoin(l, r); } else if ((BAThordered(r) & 1) && countl * logr < countr) { ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATbinsemijoin(l, r, l);\n"); bn = BATbinsemijoin(l, r, l); } else if ((BAThordered(l) & 1) & reverse2) { if (!BAThkey(r)) { BAT *v = VIEWhead_(r, BAT_WRITE); tmp = r = BATkunique(v); BBPreclaim(v); } ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATbinsemijoin(r, l, l);\n"); bn = BATbinsemijoin(r, l, l); } else { ALGODEBUG THRprintf(GDKout, "#BATsemijoin: BATkintersect(l, r);\n"); bn = BATkintersect(l, r); /* merge-semijoin or nested hashlookup in r */ } if (tmp) { BBPreclaim(tmp); } return bn;}@}@+ AntiJoinThis operation computes the cross product of two BATs, returning only thehead-value from the 'left' operand and then tail-value from the 'right'provided the tail-head pair do not (!) match.@{@= antijoin2static BAT *antijoin_@1_@2(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, r_end, dst; int l_sz, r_sz, bn_sz; int (*cmp) (ptr, ptr) = BATatoms[l->ttype].atomCmp; ptr nil = ATOMnilptr(l->ttype); bn_sz = BUNsize(bn); dst = BUNfirst(bn); ALGODEBUG THRprintf(GDKout, "#BATantijoin: antijoin_@1_@2();\n"); BATloopFast(l, l_cur, l_end, l_sz) { ptr v = (ptr) BUNtail(l, l_cur); BATloopFast(r, r_cur, r_end, r_sz) { ptr w = (ptr) BUNhead(r, r_cur); int c = (*cmp) (v, w); if ((*cmp) (v, nil) != 0 && (*cmp) (w, nil) != 0 && c != 0 ) { @:bunfastins_nocheck_(@1,@2)@ dst += bn_sz; } } } bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/bn_sz); /* Just to silence compilers (Intel's icc) that otherwise might * complain about "declared but never referenced" labels * (condition should never be true). * (A "dead" goto between the return and the label makes (other) * compilers (Sun) complain about never reached code...) */ if (!bn) goto bunins_failed; return bn;bunins_failed: BBPreclaim(bn); return NULL;}@= antijoin1 @:antijoin2(@1,chr)@ @:antijoin2(@1,bte)@ @:antijoin2(@1,sht)@ @:antijoin2(@1,int)@ @:antijoin2(@1,lng)@ @:antijoin2(@1,VATOM)@ @:antijoin2(@1,LATOM)@@c@:antijoin1(chr)@@:antijoin1(bte)@@:antijoin1(sht)@@:antijoin1(int)@@:antijoin1(lng)@@:antijoin1(VATOM)@@:antijoin1(LATOM)@@c@= antijoin_switch_rtt{ int rtt = r->ttype; int rts = ATOMstorage(rtt); if (rts == TYPE_chr) { bn = antijoin_@1_chr(bn,l,r); } else if (rts == TYPE_bte) { bn = antijoin_@1_bte(bn,l,r); } else if (rts == TYPE_sht) { bn = antijoin_@1_sht(bn,l,r); } else if (rtt != TYPE_bat && (rts == TYPE_int || rts == TYPE_flt#if SIZEOF_OID == SIZEOF_INT || rts == TYPE_oid#endif )) { /* ensure use of ATOMput for TYPE_bat */ bn = antijoin_@1_int(bn,l,r); } else if (rts == TYPE_lng || rts == TYPE_dbl#if SIZEOF_OID == SIZEOF_LNG || ATOMstorage(lht) == TYPE_oid#endif) { bn = antijoin_@1_lng(bn,l,r); } else if (r->tvarsized) { bn = antijoin_@1_VATOM(bn,l,r); } else { bn = antijoin_@1_LATOM(bn,l,r); }}@c@= antijoin_switch_lht{ int lht = l->htype; int lhs = ATOMstorage(lht); if (lhs == TYPE_chr) { @:antijoin_switch_rtt(chr)@ } else if (lhs == TYPE_bte) { @:antijoin_switch_rtt(bte)@ } else if (lhs == TYPE_sht) { @:antijoin_switch_rtt(sht)@ } else if (lht != TYPE_bat && (lhs == TYPE_int || lhs == TYPE_flt#if SIZEOF_OID == SIZEOF_INT || lhs == TYPE_oid#endif )) { /* ensure use of ATOMput for TYPE_bat */ @:antijoin_switch_rtt(int)@ } else if (lhs == TYPE_lng || lhs == TYPE_dbl#if SIZEOF_OID == SIZEOF_LNG || lhs == TYPE_oid#endif ) { @:antijoin_switch_rtt(lng)@ } else if (l->hvarsized) { @:antijoin_switch_rtt(VATOM)@ } else { @:antijoin_switch_rtt(LATOM)@ }}@cBAT *BATantijoin(BAT *l, BAT *r){ BAT *bn; size_t lc, rc, sz; ERRORcheck(l == NULL, "BATantijoin: invalid left operand"); ERRORcheck(r == NULL, "BATantijoin: invalid right operand"); lc = BATcount(l); rc = BATcount(r); sz = lc * rc; if (sz > 0) { /* try to keep void columns where possible */ if (rc == 1) return BATconst(l, BATttype(r), BUNtail(r, BUNfirst(r))); if (lc == 1) return BATmirror(BATconst(BATmirror(r), BAThtype(l), BUNhead(l, BUNfirst(l)))); } bn = BATnew(BAThtype(l), BATttype(r), sz); if (bn == NULL) { return bn; } if (sz == 0) return bn; BATmmap_pin(r); @:antijoin_switch_lht@ BATmmap_unpin(r); if (bn) { bn->hsorted = l->hsorted; bn->tsorted = (lc == 1 ? r->tsorted : FALSE); bn->hdense = (rc == 1 ? l->hdense : FALSE); bn->tdense = (lc == 1 ? r->tdense : FALSE); BATkey(bn, (rc == 1 ? l->hkey : FALSE)); BATkey(BATmirror(bn), (lc == 1 ? r->tkey : FALSE)); if (!bn->batDirty) bn->batDirty = TRUE; } return bn;}@+ Cross ProductThis operation computes the cross product of two BATs, returning only thehead-value from the 'left' operand and then tail-value from the 'right'operand.@{@= cross2static BAT *cross_@1_@2(BAT *bn, BAT *l, BAT *r){ BUN l_cur, l_end, r_cur, r_end, dst; int l_sz, r_sz, bn_sz; bn_sz = BUNsize(bn); dst = BUNfirst(bn); ALGODEBUG THRprintf(GDKout, "#BATcross: cross_@1_@2();\n"); BATloopFast(l, l_cur, l_end, l_sz) { BATloopFast(r, r_cur, r_end, r_sz) { @:bunfastins_nocheck_(@1,@2)@ dst += bn_sz; } } bn->batBuns->free = dst - bn->batBuns->base; BATsetcount(bn,bn->batBuns->free/bn_sz); /* Just to silence compilers (Intel's icc) that otherwise might * complain about "declared but never referenced" labels * (condition should never be true). * (A "dead" goto between the return and the label makes (other) * compilers (Sun) complain about never reached code...) */ if (!bn) goto bunins_failed; return bn; bunins_failed: BBPreclaim(bn); return NULL;}@= cross1 @:cross2(@1,chr)@ @:cross2(@1,bte)@ @:cross2(@1,sht)@ @:cross2(@1,int)@ @:cross2(@1,lng)@ @:cross2(@1,VATOM)@ @:cross2(@1,LATOM)@@c@:cross1(chr)@@:cross1(bte)@@:cross1(sht)@@:cross1(int)@@:cross1(lng)@@:cross1(VATOM)@@:cross1(LATOM)@@c@= cross_switch_rtt{ int rtt = r->ttype; int rts = ATOMstorage(rtt); if (rts == TYPE_chr) { bn = cross_@1_chr(bn,l,r); } else if (rts == TYPE_bte) { bn = cross_@1_bte(bn,l,r); } else if (rts == TYPE_sht) { bn = cross_@1_sht(bn,l,r); } else if (rtt != TYPE_bat && (rts == TYPE_int || rts == TYPE_flt#if SIZEOF_OID == SIZEOF_INT || rts == TYPE_oid#endif )) { /* ensure use of ATOMput for TYPE_bat */ bn = cross_@1_int(bn,l,r); } else if (rts == TYPE_lng || rts == TYPE_dbl#if SIZEOF_OID == SIZEOF_LNG || AT
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -