📄 radix.c
字号:
} if (limits &&#if SIZEOF_OID == SIZEOF_INT (limits->htype != TYPE_int || limits->ttype != TYPE_int)#else (limits->htype != TYPE_lng || limits->ttype != TYPE_lng)#endif ) { GDKerror("radix_cluster: limits %s is not of right type.\n", BATgetId(limits)); limits = NULL; } if (limits && (BATcount(limits) == 0 && limits->batRestricted == BAT_READ)) { GDKerror("radix_cluster: limits %s is not empty and appendable.\n", BATgetId(limits)); limits = NULL; } ALGODEBUG THRprintf(GDKout, "#radix_cluster(%s(%d,%d), %s,%g, %d", BATgetId(b), (int) b->batCacheid, BATtordered(b), nme, *perc, *nbits); radix[0] = *nbits; /*va_start(ap, nbits);*/ while ((p = va_arg(ap, int *)) != NULL) { ALGODEBUG THRprintf(GDKout, ",%d", *p); radix[argc] = *p; shift += ABS(radix[argc]); argc++; } /*va_end(ap);*/ total_shift = shift; ALGODEBUG THRprintf(GDKout, ") -> "); if (limits && BATcount(limits)) { /* we can now resume partial clustering by passing in of a radix_count2 bat */ size_t tot = 0; ssize_t prev = -1; int xx; BUN r, q; lim = (size_t *) GDKmalloc(BATcount(limits) * sizeof(size_t)); /* find the destination byte-offsets using the radix-count bat */ n = 0; BATloopFast(limits, r, q, xx) { size_t cnt = *(size_t *) BUNtloc(limits, r); ssize_t cur = *(ssize_t *) BUNhloc(limits, r); if (cur <= prev) { GDKerror("radix_cluster: non ascending bits %d,%d in limits bat %s!\n", prev, cur, BBPname(limitid)); BBPunfix(limitid); GDKfree(lim); return GDK_FAIL; } tot += cnt; lim[n++] = BUNsize(b) * tot; prev = cur; } if (n == 0 || tot != BATcount(b)) { GDKerror("radix_cluster: total number size %d of all limits %s does not end up to size %d of %s!\n", tot, nme, BATcount(b), BATgetId(b)); BBPunfix(limitid); GDKfree(lim); return GDK_FAIL; } if (limits->batRestricted == BAT_WRITE) { BATclear(limits); /* overwrite with resulting bounds */ } else { limits = NULL; } total_shift = 30 + MIN(0, radix[0]); /* just assume the new cluster bits are consecutive.. */ } else { /* just start with one cluster: the entire bat */ lim = (size_t *) GDKmalloc(sizeof(size_t)); lim[0] = BUNlast(b) - BUNfirst(b); n = 1; } while (--argc >= 0 && radix[argc] > 0) { size_t i, h = (size_t) 1 << radix[argc], j = h * sizeof(size_t), *k, *l = lim; size_t *dst = (size_t *) GDKmalloc(sizeof(size_t) * h); size_t *cnt = (size_t *) GDKmalloc(sizeof(size_t) * h); BAT *prev = bn, *lims = NULL; BUN p, q; if (limits && (argc == 0 || radix[argc - 1] <= 0)) { lims = limits; /* only pass limits on last iteration */ } bn = quickbat(prev, cap); bn->batBuns->free = cap * BUNsize(bn); BATsetcount(bn, cap); bn->hdense = b->hdense; bn->tdense = b->tdense; p = BUNfirst(prev); q = BUNfirst(bn); if (argc == 0) { h = 0; /* last radix: we can use the same lim for each i */ } else { j *= n; /* get n lims; next radix reuses them as l[] boundary */ } k = lim = (size_t *) GDKmalloc(j); shift -= radix[argc]; for (j = i = 0; i < n; j = l[i], i++) { BUN r = p + j + (size_t) (prc * (l[i] - j)); size_t *kk = k + h; if (b->ttype == TYPE_oid) { res = radix_chunk_oid(bn, prev, lims, p + j, r, p + l[i], q + j, k, radix[argc], shift, dst, cnt, &maxbits); } else if (ATOMstorage(b->ttype) == TYPE_int || ATOMstorage(b->ttype) == TYPE_flt) { res = radix_chunk_int(bn, prev, lims, p + j, r, p + l[i], q + j, k, radix[argc], shift, dst, cnt, &maxbits); } else if (ATOMstorage(b->ttype) == TYPE_dbl || ATOMstorage(b->ttype) == TYPE_lng) { res = radix_chunk_lng(bn, prev, lims, p + j, r, p + l[i], q + j, k, radix[argc], shift, dst, cnt, &maxbits); } else { res = radix_chunk_any(bn, prev, lims, p + j, r, p + l[i], q + j, k, radix[argc], shift, dst, cnt, &maxbits); } if (res < 0) break; while (k < kk) *(k++) += j; /* make relative limits absolute ones */ } if (prev != b) BBPreclaim(prev); prc = 1.0; n *= h; GDKfree(l); GDKfree(dst); GDKfree(cnt); if (res < 0) break; } GDKfree(lim); ALGODEBUG THRprintf(GDKout, "#(argc=%d,total_shift=%d) ", argc, total_shift); if (argc >= 0) { bn->tsorted = 0; /* partial radix cluster */ } else { if (bn->ttype == TYPE_oid) { /* now check whether we saw any higher bits; if not, its sorted! */ bn->tsorted = (total_shift >= 30) || (maxbits & 0x3FFFFFFF) < ((oid) 1 << total_shift); } bn->tsorted |= total_shift << 1; /* set radix bits */ } if (bn == b) { BBPfix(b->batCacheid); if (limits) { ssize_t zero = 0; size_t cnt = BATcount(b); BUNins(limits, &zero, &cnt, FALSE); } } if (limitid) { BBPunfix(limitid); } ALGODEBUG THRprintf(GDKout, "#%s(%d,%d) = %d;\n", BATgetId(bn), (int) bn->batCacheid, BATtordered(bn), res); if (res < 0) { BBPreclaim(bn); return GDK_FAIL; } *ret = bn; return GDK_SUCCEED;}intM4_RDX_radix_cluster(BAT **ret, BAT *b, str nme, flt *perc, int *nbits, ...){ int rtrn = 0; va_list ap; va_start(ap, nbits); rtrn = radix_cluster_va(ret, b, nme, perc, nbits, ap); va_end(ap); return rtrn;}intM4_RDX_radix_cluster_l(BAT **ret, BAT *b, flt *perc, int *nbits, ...){ str nme = "tmp_0"; int rtrn = 0; va_list ap; va_start(ap, nbits); rtrn = radix_cluster_va(ret, b, nme, perc, nbits, ap); va_end(ap); return rtrn;}intM4_RDX_radix_cluster_p(BAT **ret, BAT *b, str nme, int *nbits, ...){ flt perc = 1.0; int rtrn = 0; va_list ap; va_start(ap, nbits); rtrn = radix_cluster_va(ret, b, nme, &perc, nbits, ap); va_end(ap); return rtrn;}intM4_RDX_radix_cluster_lp(BAT **ret, BAT *b, int *nbits, ...){ str nme = "tmp_0"; flt perc = 1.0; int rtrn = 0; va_list ap; va_start(ap, nbits); rtrn = radix_cluster_va(ret, b, nme, &perc, nbits, ap); va_end(ap); return rtrn;}intM4_RDX_radix_cluster2(BAT **ret, BAT *b, int *p, int *r, int *i){ int npasses = *p, nbits = *r, nignore = abs(*i); int maxpasses = MIN(4, nbits), quota, rest; if (npasses < 1) { GDKerror("radix_cluster2: number of passes must be > 0"); return GDK_FAIL; } if (nbits < 1) { GDKerror("radix_cluster2: number of radix bits must be > 0"); return GDK_FAIL; } npasses = MIN(maxpasses, *p); quota = nbits / npasses; rest = nbits - (quota * (npasses - 1)); if (nignore == 0) { nignore = rest; rest = quota; } else { nignore = -1 * nignore; npasses += 1; } switch (npasses) { case 5: return M4_RDX_radix_cluster_lp(ret, b, &nignore, &rest, "a, "a, "a, NULL); case 4: return M4_RDX_radix_cluster_lp(ret, b, &nignore, &rest, "a, "a, NULL); case 3: return M4_RDX_radix_cluster_lp(ret, b, &nignore, &rest, "a, NULL); case 2: return M4_RDX_radix_cluster_lp(ret, b, &nignore, &rest, NULL); case 1: return M4_RDX_radix_cluster_lp(ret, b, &nignore, NULL); default: assert(1 <= npasses && npasses <= 5); } return GDK_FAIL;}#line 2393 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx"intM4_RDX_radix_bits(int *nbits, BAT *b){ int radix_bits = (BAThordered(b) >> 1); if (BAThordered(b) & 1) if (b->htype > TYPE_void && ATOMstorage(b->htype) <= TYPE_int) { *nbits = ATOMsize(b->htype) << 3; if (*nbits != radix_bits) { b->hsorted = (*nbits << 1) | 1; b->batDirtydesc = TRUE; } return GDK_SUCCEED; } *nbits = radix_bits; return GDK_SUCCEED;}#line 2415 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx"#define RADIX_COUNT_MASK 65535intM4_RDX_radix_count(BAT **res, BAT *b, int *shift, int *radix){ size_t mask = (size_t) 1 << *radix; int off = *shift; BAT *bn = BATnew(TYPE_int, TYPE_int, mask); int bunsize; if (bn == NULL) return GDK_FAIL; *res = bn; bunsize = BUNsize(b); /* assert(0 <= *radix && *radix < 32); */ bn->tsorted = 0; mask = (mask - 1) << off; if (BATcount(b)) { BUN p = BUNfirst(b); BUN q = BUNlast(b); int tcur, tbak = (int) (oid_RADIX(BUNtail(b, p), mask) >> off); oid hcur, hbak = *(oid *) BUNhead(b, p); int cnt = 1; while ((p += bunsize) < q) { tcur = (int) (oid_RADIX(BUNtail(b, p), mask) >> off); hcur = *(oid *) BUNhead(b, p); if (tcur != tbak || hcur < hbak) { BUNfastins(bn, &tbak, &cnt); if (tcur < tbak) bn->hsorted = 0; tbak = tcur; cnt = 1; } else { cnt++; } hbak = hcur; } BUNfastins(bn, &tbak, &cnt); } if (b->halign == 0) { b->halign = OIDnew(1); b->batDirtydesc = TRUE; } /* sign the tail column of the radix_count so we can check later */ bn->talign = (*radix << 24) ^ (*shift << 16) ^ b->halign; return GDK_SUCCEED;}intM4_RDX_radix_count2(BAT **res, BAT *b, int *shift, int *radix){ size_t mask = (size_t) 1 << *radix; int off = *shift; BAT *bn = *res = BATnew(TYPE_int, TYPE_int, mask); size_t xx; int *cnt; BUN p, q; if (bn == NULL) return GDK_FAIL; cnt = (int *) BUNfirst(bn); /* 64bit: assert(0 <= *radix && *radix < 32); */ /* initialize the result BAT with ascending head and zero counts */ for (xx = 0; xx < mask; xx++) { cnt[xx + xx] = (int) xx; cnt[xx + xx + 1] = 0; } bn->batBuns->free = BUNsize(bn) * mask; BATsetcount(bn, mask); mask = (mask - 1) << off; /* compute the result: a histogram of bit patterns */ BATloopFast(b, p, q, xx) { oid idx = oid_RADIX(BUNtail(b, p), mask) >> off; cnt[idx + idx + 1]++; } BATkey(bn, TRUE); bn->hsorted = GDK_SORTED; bn->tsorted = 0; return GDK_SUCCEED;}#line 2516 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx"#define hash_atom(x) ((hash_t)((x)%h->mask))#define hash_simple(x)((hash_t)(x))#define bunfastINSERT(b,h,t,dummy1,dummy2,dummy3,dummy4) bunfastins(b,h,t)#define intfastINSERT(b,h,t,dummy1,dummy2,dummy3,dummy4) {\ if (_dst >= _end) {\ ALGODEBUG THRprintf(GDKout, "#phash_join: intfastINSERT: BATextend!\n");\ b->batBuns->free = ((BUN) _dst) - b->batBuns->base;\ if (BATextend((b), BATgrows(b)) == NULL) goto bunins_failed;\ _dst = (int*) (b->batBuns->base + b->batBuns->free);\ _end = (int*) (b->batBuns->base + b->batBuns->size);\ }\ _dst[0] = *(int*) h;\ _dst[1] = *(int*) t;\ _dst += 2;\}#define NOTSOfastINSERT(b, dummy1, dummy2, hb, hp, tb, tp) { \ REGISTER BUN _p = BUNlast(b);\ REGISTER int _bunsize = BUNsize(b);\ if ((b)->batBuns->free + _bunsize > (b)->batBuns->size) {\ if (BATextend((b), BATgrows(b)) == NULL) goto bunins_failed;\ _p = BUNlast(b);\ }\ (b)->batBuns->free += _bunsize;\ (b)->batCount ++;\ integerCopy((int*) BUNhloc(b,_p), hb, hp, width1, distance1);\ integerCopy((int*) BUNtloc(b,_p), BATmirror(tb), tp, width2, distance2);\ }static inthash_join(Hash *h, BAT *bn, BAT *l, BUN llo, BUN lhi, BAT *r, BUN rlo, BUN rhi, int rd, int cutoff){ int xx; size_t zz = BUNindex(r, rlo); hash_t yy = 0; ptr nil = ATOMnilptr(l->ttype); if ((size_t) (rhi - rlo) > h->lim) { /* simplistic skew handling by holding on to initial hash mask size */ h->lim = rhi - rlo; h->hash = (hash_t *) GDKrealloc(h->hash, (h->mask + 1) * sizeof(hash_t) + h->lim); h->link = h->hash + h->mask + 1; } memset(h->hash, ~0, (h->mask + 1) * sizeof(hash_t));#line 2585 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" if (ATOMstorage(r->htype) < TYPE_int || ATOMstorage(r->htype) > TYPE_dbl || ATOMstorage(l->htype) != TYPE_int || ATOMstorage(r->ttype) != TYPE_int) { int any = ATOMstorage(r->htype); size_t mask = ~(size_t) 0; if (cutoff) { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[any,head,tail,atom,bun,head,tail,break;]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhead(r,rlo); if (!atom_EQ(p, nil, any)) { hash_t v = hash_atom(any_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtail(l,llo); for(yy=h->hash[hash_atom(any_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (atom_EQ(BUNhead(r,rlo),v,any)) { bunfastINSERT(bn, BUNhead(l,llo), BUNtail(r,rlo), l, llo, r, rlo); break; } } }#line 2592 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" } else { #line 2628 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" ALGODEBUG THRprintf(GDKout, "#phash_join: hash_join[any,head,tail,atom,bun,head,tail,]\n"); /* build phase */ for (xx=BUNsize(r); rlo<rhi; rlo+=xx) { ptr p = BUNhead(r,rlo); if (!atom_EQ(p, nil, any)) { hash_t v = hash_atom(any_RADIX(p,mask)>>rd); h->link[yy] = h->hash[v]; h->hash[v] = yy++; } } /* probe phase */ for (xx=BUNsize(l); llo<lhi; llo+=xx) { ptr v = BUNtail(l,llo); for(yy=h->hash[hash_atom(any_RADIX(v,mask)>>rd)]; yy!=~(hash_t)0; yy=h->link[yy]) { rlo = BUNptr(r,yy+zz); if (atom_EQ(BUNhead(r,rlo),v,any)) { bunfastINSERT(bn, BUNhead(l,llo), BUNtail(r,rlo), l, llo, r, rlo); } } }#line 2594 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB5/src/modules/mal/radix.mx" }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -