📄 group.mx
字号:
}#endif bn->batBuns->free = ((BUN) dst) - bn->batBuns->base; BATsetcount(bn, bn->batBuns->free/BUNsize(bn)); bn->tsorted = 0; if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); ALIGNsetH(bn,b); if (hash && !m) GDKfree(hash); return m ? NULL : map2histo(map);bunins_failed: BBPreclaim(bn); if (hash && !m) GDKfree(hash); return NULL;}@cinttailtype(BAT *b, int str_trick){ int tpe = ATOMstorage(b->ttype); /* standard type remappings */ /* more daring remappings possible under simple equality */ if (tpe == TYPE_flt) { return TYPE_int; } else if (tpe == TYPE_dbl) { return TYPE_lng; } else if (tpe == TYPE_str && str_trick && GDK_ELIMDOUBLES((b->theap))) { return TYPE_var; /* string offsets are identifying integers */ } return tpe;}/* Generate both 'normal' CTgroup and clustered CTgroups */@= wrappedgroupinner@:groupAll(chr,tloc,simple,@1,@2)@@:groupAll(sht,tloc,simple,@1,@2)@@:groupAll(int,tloc,simple,@1,@2)@@:groupAll(lng,tloc,simple,@1,@2)@@:groupAll(any,tail,atom,@1,@2)@/* Generate both 'normal' CTgroup and parameterized CTgroups */@= wrappedgroupouter@:wrappedgroupinner(unclustered,@1)@@:wrappedgroupinner(clustered,@1)@@c@:wrappedgroupouter(STANDARD)@@:wrappedgroupouter(CUSTOM)@@= returnvalue @1 =@c#define declare_mask_STANDARD /* fixed */#define declare_mask_CUSTOM hash_t mask = (1 << *N) - 1;intCTgroup(BAT **retval, /* put pointer to BAT[oid,oid] record here. */ BAT **hbat, /* put histogram BAT here */ BAT *b /* pointer to BAT[oid,oid] record. */){ @:CTgroupbody(STANDARD)@}intCTgroup_custom(BAT **retval, /* put pointer to BAT[oid,oid] record here. */ BAT **hbat, /* put histogram BAT here */ BAT *b, /* pointer to BAT[oid,oid] record. */ int *N, /* number of bits for hashmask */ int *rng /* expected number of entries in map */){ @:CTgroupbody(CUSTOM)@}static int bits(size_t i){ int sh; assert(i>0); for (sh = 0; i != 0; sh++) { i >>= 1; } return sh;}@= CTgroupbody BAT *histo = NULL, *bn = NULL; declare_mask_@1 /* b->tkey, simply return mirror(0), and hist = project(reverse(bn),1) */ if (b->tkey) { int one = 1; BAT *v = VIEWcombine(b); bn = v; if (b->batRestricted == BAT_WRITE) { bn = BATcopy(v, v->htype, v->ttype, FALSE); BBPreclaim(v); } histo = BATconst(BATmirror(bn), TYPE_int, &one); } else { bn = BATnew(b->htype, TYPE_oid, BATcount(b)); if (bn == NULL) { return GDK_FAIL; } /* Poor man's clustered test: sorted & !keyed => clustered */ if ( ((b->tsorted)&1) && !(b->tkey) ) { @:choosegroup@1(tailtype(b,TRUE),bn,NULL,clustered,histo)@ } else { @:choosegroup@1(tailtype(b,TRUE),bn,NULL,unclustered,histo)@ } if (histo == NULL) { BBPreclaim(bn); return GDK_FAIL; } bn->tsorted = 0; } if (BATcount(histo) == BATcount(bn)) { BATkey(BATmirror(bn),TRUE); BATkey(BATmirror(histo),TRUE); } ALIGNsetH(bn, b); if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); *retval = bn; *hbat = histo; return grp_new(bn, histo);@= choosegroupSTANDARD /* Choose appropriate @4 CTgroup implementation */ switch(@1) { case TYPE_chr: @?@5:returnvalue(@5)@ CTgroup_chr_@4_STANDARD(b,@2,@3); break; case TYPE_sht: @?@5:returnvalue(@5)@ CTgroup_sht_@4_STANDARD(b,@2,@3); break; case TYPE_int: @?@5:returnvalue(@5)@ CTgroup_int_@4_STANDARD(b,@2,@3); break; case TYPE_lng: @?@5:returnvalue(@5)@ CTgroup_lng_@4_STANDARD(b,@2,@3); break; default: @?@5:returnvalue(@5)@ CTgroup_any_@4_STANDARD(b,@2,@3); break; }@= choosegroupCUSTOM /* Choose appropriate @4 CTgroup implementation */ switch(@1) { case TYPE_chr: @?@5:returnvalue(@5)@ CTgroup_chr_@4_CUSTOM(mask,*rng,b,@2,@3); break; case TYPE_sht: @?@5:returnvalue(@5)@ CTgroup_sht_@4_CUSTOM(mask,*rng,b,@2,@3); break; case TYPE_int: @?@5:returnvalue(@5)@ CTgroup_int_@4_CUSTOM(mask,*rng,b,@2,@3); break; case TYPE_lng: @?@5:returnvalue(@5)@ CTgroup_lng_@4_CUSTOM(mask,*rng,b,@2,@3); break; default: @?@5:returnvalue(@5)@ CTgroup_any_@4_CUSTOM(mask,*rng,b,@2,@3); break; }@= deriveBAT *CTderive_@1_@2_@5(BAT* ct_histo, BAT *ct_map, BAT *b, BAT *bn, map_T *m){ oid *dst = (oid*) BUNfirst(bn); size_t yy = BUNsize(ct_map), zz, mapsize; hash_t xx, *hash; BUN p, q, r, cp = BUNfirst(ct_map) - yy; mapentry_t entry, *e; BAT *map; int n = bits(BATcount(ct_histo)),*N = &n; declare_@4 declare_mask_CUSTOM int custom_rng = BATcount(ct_histo); /* expected number of groups */ hash_t custom_MASK = mask; map_init_CUSTOM(map,hash,mask,entry,mapsize); if (map == NULL) return NULL; /* core hash grouping algorithm */ BATloopFast(b, p, q, xx) { ptr tcur = BUN@3(b,p); hash_t c; oid hcur; declare_@5 /* find corresponding value in 'ct_map' */ match_@1(ct_map, BUNhead(b,p), cp); hcur = *(oid*) BUNtloc(ct_map,cp); /* hash-lookup of [hcur,tcur] in map */ c = (((hash_t) hcur) ^ HASH_@2(tcur)); c = mix_int(c) & mask; chain_@5 { r = BUNptr(map,zz); e = (mapentry_t*) BUNhloc(map,r); if (tst_derive_@5(@4_EQ, BUN@3(map,r), @2)) { if (m == NULL) e->cnt++; goto found; } } /* not found-> insert new element in map (and hash) */ if (m) { zz = mapsize; } else { entry.gid = *(oid*) BUNhead(b,p); } entry.hcur = hcur; entry.link = hash[c]; hash[c] = mapsize++; bunfastins(map, &entry, tcur); e = &entry;found: /* ultra-fast 'insert' of [oid,gid] into result ct */ if (bn->htype) *dst++ = *(oid*) BUNhead(b,p); *dst++ = m?zz:e->gid; } bn->batBuns->free = ((BUN) dst) - bn->batBuns->base; BATsetcount(bn, bn->batBuns->free/BUNsize(bn)); if (hash && !m) GDKfree(hash); if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); return m?NULL:map2histo(map);bunins_failed: if (hash && !m) GDKfree(hash); BBPreclaim(bn); return NULL;}@c/* Generate both 'normal' CTderive and clustered CTderive */@= wrappedderive@:derive(sync,chr,tloc,simple,@1)@@:derive(sync,sht,tloc,simple,@1)@@:derive(sync,int,tloc,simple,@1)@@:derive(sync,lng,tloc,simple,@1)@@:derive(sync,any,tail,atom,@1)@@:derive(hash,chr,tloc,simple,@1)@@:derive(hash,sht,tloc,simple,@1)@@:derive(hash,int,tloc,simple,@1)@@:derive(hash,lng,tloc,simple,@1)@@:derive(hash,any,tail,atom,@1)@@c@:wrappedderive(unclustered)@@:wrappedderive(clustered)@@= choosederive /* Choose appropriate (@1 && @2) CTderive implementation */ switch(tt) { case TYPE_chr: histo = CTderive_@1_chr_@2(ct_histo,ct_map,b,bn,m); break; case TYPE_sht: histo = CTderive_@1_sht_@2(ct_histo,ct_map,b,bn,m); break; case TYPE_int: histo = CTderive_@1_int_@2(ct_histo,ct_map,b,bn,m); break; case TYPE_lng: histo = CTderive_@1_lng_@2(ct_histo,ct_map,b,bn,m); break; default: histo = CTderive_@1_any_@2(ct_histo,ct_map,b,bn,m); break; }@cintderive(BAT **H, BAT **M, BAT *ct_histo, BAT *ct_map, BAT *b, int tt, map_T *m){ BAT *histo = NULL, *bn = NULL; int synced = ALIGNsynced(ct_map, b); /* create the result bat 'bn' */ int ht = (synced && BAThdense(b)) ? TYPE_void : TYPE_oid; if (!ct_map->tkey) { /* cannot derive more groups */ bn = BATnew(ht, TYPE_oid, BATcount(b)); if (bn == NULL) { return GDK_FAIL; } /* CTderive with correct lookup method (hash,synced) and type */ if (synced) { if (((ct_map->tsorted) & 1)) { @:choosederive(sync,clustered)@ } else { @:choosederive(sync,unclustered)@ } } else { if (((ct_map->tsorted) & 1)) { @:choosederive(hash,clustered)@ } else { @:choosederive(hash,unclustered)@ } } if (histo == NULL) { assert(histo); BBPunfix(bn->batCacheid); } /* postprocess the result bat 'bn' */ bn->tsorted = 0; if (BATcount(bn) == BATcount(b)) { ALIGNsetH(bn, b); } else { bn->hsorted = BAThordered(b); if (b->hkey) BATkey(bn, TRUE); } } else { bn = ct_map; histo = ct_histo; if (!synced) { bn = BATsemijoin(ct_map, b); histo = BATsemijoin(ct_histo, BATmirror(bn)); } else { BBPfix(ct_map->batCacheid); BBPfix(ct_histo->batCacheid); } } *M = bn; *H = histo; return grp_new(bn, histo);}intCTderive(BAT **M, BAT **H, BAT *ct_histo, BAT *ct_map, BAT *b){ int ret; ret = derive(H, M, ct_histo, ct_map, b, tailtype(b, TRUE), NULL); return ret;}@-The routine CThistosum takes an grouping and a histogram and producesa new histogram by summing the old values within the same group.@cintCThistosum(BAT **retval, /* put pointer to BAT[oid,int] record here. */ BAT *b, /* pointer to BAT[oid,oid] record. */ BAT *c /* pointer to BAT[oid,int] record. */){ BAT *res = BATnew(TYPE_oid, TYPE_int, BATcount(b)); BUN p, q, qb; int xx, i, *z; oid *ot, *oh; (void) c; if (res == NULL) { return GDK_FAIL; } BATloopFast(b, p, q, xx) { oh = (oid *) BUNhloc(b, p); i = *(int *) BUNtloc(b, p); BUNfndOID(qb, b, oh); if (qb == NULL) { GDKerror("CThistosum: Matching count entry not found\n"); continue; } ot = (oid *) BUNtloc(b, qb); BUNfndOID(qb, res, ot); if (qb == NULL) { BUNins(res, ot, &i, FALSE); } else { z = (int *) BUNtloc(res, qb); *z += i; } } res->hsorted = res->tsorted = 0; if (!(res->batDirty&2)) res = BATsetaccess(res, BAT_READ); *retval = res; return GDK_SUCCEED;}intCTsubhisto(BAT **ret, BAT *sel, BAT *grp, BAT *dom){ bit *filter = (bit *) BUNtloc(sel, BUNfirst(sel)); size_t size = BATcount(dom); int xx, zz; hash_t yy, mask, *hash = NULL; BUN r, p, q; BAT *bn = BATnew(TYPE_idxentry, TYPE_int, size); if (bn == NULL) return GDK_FAIL; /* we know the domain; go for perfect hashing */ for (mask = 1; mask < size; mask <<= 1) ; if (mask < 256) mask = 256; hash = (hash_t *) GDKmalloc(sizeof(hash_t) * mask); if (hash == NULL) { BBPreclaim(bn); return GDK_FAIL; } for (yy = 0; yy < mask; yy++) { hash[yy] = HASH_MAX; } mask--; /* insert all values in the hash table, and in bn with count zero */ r = BUNfirst(bn); yy = 0; BATloopFast(dom, p, q, xx) { oid v = *(oid *) BUNhloc(dom, p); hash_t c = v & mask; ((idxentry_t *) BUNhloc(bn, r))->hcur = v; ((idxentry_t *) BUNhloc(bn, r))->link = hash[c]; *(int *) BUNtloc(bn, r) = 0; r = BUNnext(bn, r); hash[c] = yy; yy++; } bn->batBuns->free = (char *) r - (char *) Bunbase(bn); BATsetcount(bn, bn->batBuns->free/BUNsize(bn)); bn->tsorted = 0; bn->htype = BATmirror(bn)->ttype = TYPE_oid; /* assert(offsetof(idxentry_t,hcur) == 0); ALIGNsetH(bn, dom); */ /* add the counts for this selection using the hash table */ zz = BUNsize(sel); BATloopFast(grp, p, q, xx) { if (*filter == TRUE) { oid v = *(oid *) BUNtloc(grp, p); hash_t c = v & mask; for (yy = hash[c]; yy != HASH_MAX; yy = ((idxentry_t *) BUNhloc(bn, r))->link) { r = BUNptr(bn, yy); if (((idxentry_t *) BUNhloc(bn, r))->hcur == v) { *(int *) BUNtloc(bn, r) += 1; break; } } } filter += zz; } GDKfree(hash); if (!(bn->batDirty&2)) bn = BATsetaccess(bn, BAT_READ); *ret = bn; return GDK_SUCCEED;}@+ Support for Order-by@c#define DEFAULT_SIZE 10000struct refine { var_t off; oid o;};static INLINE oid *sort_flush(struct refine *buf, size_t size, int tpe, BUN base, oid *dst, oid *idp, int reverse){ int (*cmp) (ptr, ptr) = BATatoms[tpe].atomCmp; struct refine *end = buf + size; oid id = *idp + 1; ptr cur, val; /* StM: we don't need to sort voids, do we??? */ if (tpe != TYPE_void) { /* qsort works fine for small amount of tuples; with few duplicates */ if (reverse) { GDKqsort_rev(buf, base, size, (int) sizeof(struct refine), tpe, offsetof(struct refine, off)); } else { GDKqsort(buf, base, size, (int) sizeof(struct refine), tpe, offsetof(struct refine, off)); } } cur = base + buf->off; while (buf < end) { val = base + buf->off; if ((*cmp) (cur, val)) { cur = val;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -