📄 radix.c
字号:
} else { /* ATOMstorage(l->htype) == TYPE_int && ATOMstorage(r->ttype) == TYPE_int hence the result BAT is [int,int], so this works */ int *_dst = (int *) (bn->batBuns->base + bn->batBuns->free); int *_end = (int *) (bn->batBuns->base + bn->batBuns->size); size_t mask = h->mask << rd; if (r->htype == TYPE_oid) { if (cutoff) { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[oid,hloc,tloc,simple,int,hloc,tloc,break;]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhloc(r,rlo); if (!simple_EQ(p, nil, oid)) { hash_t v = hash_simple(oid_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtloc(l,llo); for(yy=h->hash[hash_simple(oid_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (simple_EQ(BUNhloc(r,rlo),v,oid)) { intfastINSERT(bn, BUNhloc(l,llo), BUNtloc(r,rlo), l, llo, r, rlo); break; } } }#line 2605 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[oid,hloc,tloc,simple,int,hloc,tloc,]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhloc(r,rlo); if (!simple_EQ(p, nil, oid)) { hash_t v = hash_simple(oid_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtloc(l,llo); for(yy=h->hash[hash_simple(oid_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (simple_EQ(BUNhloc(r,rlo),v,oid)) { intfastINSERT(bn, BUNhloc(l,llo), BUNtloc(r,rlo), l, llo, r, rlo); } } }#line 2607 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } } else if (ATOMstorage(r->htype) == TYPE_int || ATOMstorage(r->htype) == TYPE_flt) { if (cutoff) { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[int,hloc,tloc,simple,int,hloc,tloc,break;]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhloc(r,rlo); if (!simple_EQ(p, nil, int)) { hash_t v = hash_simple(int_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtloc(l,llo); for(yy=h->hash[hash_simple(int_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (simple_EQ(BUNhloc(r,rlo),v,int)) { intfastINSERT(bn, BUNhloc(l,llo), BUNtloc(r,rlo), l, llo, r, rlo); break; } } }#line 2611 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[int,hloc,tloc,simple,int,hloc,tloc,]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhloc(r,rlo); if (!simple_EQ(p, nil, int)) { hash_t v = hash_simple(int_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtloc(l,llo); for(yy=h->hash[hash_simple(int_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (simple_EQ(BUNhloc(r,rlo),v,int)) { intfastINSERT(bn, BUNhloc(l,llo), BUNtloc(r,rlo), l, llo, r, rlo); } } }#line 2613 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } } else { /* if (ATOMstorage(r->htype) == TYPE_lng || ATOMstorage(r->htype) == TYPE_dbl) */ if (cutoff) { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[lng,hloc,tloc,simple,int,hloc,tloc,break;]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhloc(r,rlo); if (!simple_EQ(p, nil, lng)) { hash_t v = hash_simple(lng_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtloc(l,llo); for(yy=h->hash[hash_simple(lng_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (simple_EQ(BUNhloc(r,rlo),v,lng)) { intfastINSERT(bn, BUNhloc(l,llo), BUNtloc(r,rlo), l, llo, r, rlo); break; } } }#line 2618 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[lng,hloc,tloc,simple,int,hloc,tloc,]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhloc(r,rlo); if (!simple_EQ(p, nil, lng)) { hash_t v = hash_simple(lng_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtloc(l,llo); for(yy=h->hash[hash_simple(lng_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (simple_EQ(BUNhloc(r,rlo),v,lng)) { intfastINSERT(bn, BUNhloc(l,llo), BUNtloc(r,rlo), l, llo, r, rlo); } } }#line 2620 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } } bn->batBuns->free = ((BUN) _dst) - bn->batBuns->base; BATsetcount(bn, bn->batBuns->free/BUNsize(bn)); } return 0; bunins_failed: return -1;}#line 2649 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx"intM4_RDX_phash_join(BAT **res, BAT *l, BAT *r, int *radix, int *hitrate, bit *cutoff){ ALGODEBUG THRprintf(GDKout, "#phash_join(%s(%d,%d),%s(%d,%d),%d,%d,%d) -> ", BATgetId(l), (int) l->batCacheid, BATtordered(l), BATgetId(r), (int) r->batCacheid, BAThordered(r), *radix, *hitrate, *cutoff); if (*radix != (BATtordered(l) >> 1)) { GDKerror("phash_join: tail of %s is radix clustered on %d bits.\n", BATgetId(l), BATtordered(l) >> 1); return GDK_FAIL; } if (*radix != (BAThordered(r) >> 1)) { GDKerror("phash_join: head of %s is radix clustered on %d bits.\n", BATgetId(r), BAThordered(r) >> 1); return GDK_FAIL; } { size_t k, m, rd = (1 << *radix) - 1; size_t estimate = MIN(BATcount(r), BATcount(l)) * (*hitrate); BAT *bn = BATnew(HTYPE(l), TTYPE(r), estimate); BUN r_end, l_cur, l_last; BUN l_end, r_cur, r_last; int xx, yy; int rcut = r->hkey || (*cutoff && (*hitrate == 1)); Hash h; if (bn == NULL) return GDK_FAIL; l_cur = BUNfirst(l); l_last = BUNlast(l); r_cur = BUNfirst(r); r_last = BUNlast(r); xx = BUNsize(l); yy = BUNsize(r); /* alloc hash table */ h.lim = BATcount(r) >> *radix; /* mean cluster size */ k = h.lim / (*hitrate); /* mean number of different elements per cluster */ for (m = 1; m < k; m <<= 1) ; /* perfect hashing */ h.lim <<= 2; /* make lim four times as big for handling some skew */ h.hash = (hash_t *) GDKmalloc((m + h.lim) * sizeof(hash_t)); h.link = h.hash + m; h.type = ATOMtype(r->htype); h.mask = m - 1; h.lim *= BUNsize(r); /* lim is used as a byte offset */ /* set properties on result bat */ bn->hsorted = BAThordered(l); bn->tsorted = 0; if (l->hkey && r->hkey) BATkey(bn, TRUE); if (r->tkey && l->tkey) BATkey(BATmirror(bn), TRUE); if (BATcount(r)) { if (r->htype == TYPE_oid) { #line 2792 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" if (rd == 0) { /* with zero bits, it is fairer not to perform a radix merge */ hash_join(&h, bn, l, l_cur, l_last, r, r_cur, r_last, *radix, rcut); } else { hash_t y = oid_RADIX(BUNhloc(r,r_cur),rd); /* hack alert */ if (!h.mask) h.mask = 1; /* merge join on phash number */ while(l_cur < l_last) { /* find l range */ hash_t x = oid_RADIX(BUNtloc(l,l_cur),rd); for(l_end=l_cur+xx; l_end < l_last; l_end += xx) { ptr v = BUNtloc(l,l_end); if (oid_RADIX(v,rd) != x) break; } /* find matching r */ while (y < x) { ptr v; if ((r_cur += yy) >= r_last) goto xit; v = BUNhloc(r,r_cur); y = oid_RADIX(v,rd); } if (x == y) { /* phash hits found */ /* find r range */ for (r_end=r_cur+yy; r_end < r_last; r_end += yy) { ptr v = BUNhloc(r,r_end); y = oid_RADIX(v,rd); if (y != x) break; } if (hash_join(&h, bn, l, l_cur, l_end, r, r_cur, r_end, *radix, rcut) < 0) { BBPreclaim(bn); bn = NULL; goto xit; } r_cur = r_end; } l_cur = l_end; } }#line 2706 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else if (ATOMstorage(r->htype) == TYPE_int || ATOMstorage(r->htype) == TYPE_flt) { #line 2792 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" if (rd == 0) { /* with zero bits, it is fairer not to perform a radix merge */ hash_join(&h, bn, l, l_cur, l_last, r, r_cur, r_last, *radix, rcut); } else { hash_t y = int_RADIX(BUNhloc(r,r_cur),rd); /* hack alert */ if (!h.mask) h.mask = 1; /* merge join on phash number */ while(l_cur < l_last) { /* find l range */ hash_t x = int_RADIX(BUNtloc(l,l_cur),rd); for(l_end=l_cur+xx; l_end < l_last; l_end += xx) { ptr v = BUNtloc(l,l_end); if (int_RADIX(v,rd) != x) break; } /* find matching r */ while (y < x) { ptr v; if ((r_cur += yy) >= r_last) goto xit; v = BUNhloc(r,r_cur); y = int_RADIX(v,rd); } if (x == y) { /* phash hits found */ /* find r range */ for (r_end=r_cur+yy; r_end < r_last; r_end += yy) { ptr v = BUNhloc(r,r_end); y = int_RADIX(v,rd); if (y != x) break; } if (hash_join(&h, bn, l, l_cur, l_end, r, r_cur, r_end, *radix, rcut) < 0) { BBPreclaim(bn); bn = NULL; goto xit; } r_cur = r_end; } l_cur = l_end; } }#line 2708 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else if (ATOMstorage(r->htype) == TYPE_lng || ATOMstorage(r->htype) == TYPE_dbl) { #line 2792 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" if (rd == 0) { /* with zero bits, it is fairer not to perform a radix merge */ hash_join(&h, bn, l, l_cur, l_last, r, r_cur, r_last, *radix, rcut); } else { hash_t y = lng_RADIX(BUNhloc(r,r_cur),rd); /* hack alert */ if (!h.mask) h.mask = 1; /* merge join on phash number */ while(l_cur < l_last) { /* find l range */ hash_t x = lng_RADIX(BUNtloc(l,l_cur),rd); for(l_end=l_cur+xx; l_end < l_last; l_end += xx) { ptr v = BUNtloc(l,l_end); if (lng_RADIX(v,rd) != x) break; } /* find matching r */ while (y < x) { ptr v; if ((r_cur += yy) >= r_last) goto xit; v = BUNhloc(r,r_cur); y = lng_RADIX(v,rd); } if (x == y) { /* phash hits found */ /* find r range */ for (r_end=r_cur+yy; r_end < r_last; r_end += yy) { ptr v = BUNhloc(r,r_end); y = lng_RADIX(v,rd); if (y != x) break; } if (hash_join(&h, bn, l, l_cur, l_end, r, r_cur, r_end, *radix, rcut) < 0) { BBPreclaim(bn); bn = NULL; goto xit; } r_cur = r_end; } l_cur = l_end; } }#line 2710 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else { int any = r->htype; #line 2792 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" if (rd == 0) { /* with zero bits, it is fairer not to perform a radix merge */ hash_join(&h, bn, l, l_cur, l_last, r, r_cur, r_last, *radix, rcut); } else { hash_t y = any_RADIX(BUNhead(r,r_cur),rd); /* hack alert */ if (!h.mask) h.mask = 1; /* merge join on phash number */ while(l_cur < l_last) { /* find l range */ hash_t x = any_RADIX(BUNtail(l,l_cur),rd); for(l_end=l_cur+xx; l_end < l_last; l_end += xx) { ptr v = BUNtail(l,l_end); if (any_RADIX(v,rd) != x) break; } /* find matching r */ while (y < x) { ptr v; if ((r_cur += yy) >= r_last) goto xit; v = BUNhead(r,r_cur); y = any_RADIX(v,rd); } if (x == y) { /* phash hits found */ /* find r range */ for (r_end=r_cur+yy; r_end < r_last; r_end += yy) { ptr v = BUNhead(r,r_end); y = any_RADIX(v,rd); if (y != x) break; } if (hash_join(&h, bn, l, l_cur, l_end, r, r_cur, r_end, *radix, rcut) < 0) { BBPreclaim(bn); bn = NULL; goto xit; } r_cur = r_end; } l_cur = l_end; } }#line 2713 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } } xit: GDKfree(h.hash); ALGODEBUG THRprintf(GDKout, "%s(%d);\n", BATgetId(bn), (int) bn->batCacheid); *res = bn; return bn ? GDK_SUCCEED : GDK_FAIL; }}intM4_RDX_phash_join_c(BAT **res, BAT *l, BAT *r, int *radix, int *hitrate){ if (l && r) { bit cutoff = r->hkey; return M4_RDX_phash_join(res, l, r, radix, hitrate, &cutoff); } else { return GDK_FAIL; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -