⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gdk_batop.mx

📁 这个是内存数据库中的一个管理工具
💻 MX
📖 第 1 页 / 共 4 页
字号:
#endif#ifndef NOEXPAND_LNG	case TYPE_lng:		@:restrict1(simple,lng,loc)@		break;#endif	default:		if (b->hvarsized) {			@:restrict1(atom,t,var)@		} else {			@:restrict1(atom,t,loc)@		}		break;	}	/* second phase */	if (b->hvarsized) {		switch (ATOMstorage(t = b->ttype)) {#ifndef NOEXPAND_CHR		case TYPE_chr:			@:restrict2(simple,chr,var,loc)@			break;#endif#ifndef NOEXPAND_BTE		case TYPE_bte:			@:restrict2(simple,bte,var,loc)@			break;#endif#ifndef NOEXPAND_SHT		case TYPE_sht:			@:restrict2(simple,sht,var,loc)@			break;#endif#ifndef NOEXPAND_INT		case TYPE_int:			@:restrict2(simple,int,var,loc)@			break;#endif#ifndef NOEXPAND_FLT		case TYPE_flt:			@:restrict2(simple,flt,var,loc)@			break;#endif#ifndef NOEXPAND_DBL		case TYPE_dbl:			@:restrict2(simple,dbl,var,loc)@			break;#endif#ifndef NOEXPAND_LNG		case TYPE_lng:			@:restrict2(simple,lng,var,loc)@			break;#endif		default:			if (b->tvarsized) {				@:restrict2(atom,t,var,var)@			} else {				@:restrict2(atom,t,var,loc)@			}			break;		}	} else {		switch (ATOMstorage(t = b->ttype)) {#ifndef NOEXPAND_CHR		case TYPE_chr:			@:restrict2(simple,chr,loc,loc)@			break;#endif#ifndef NOEXPAND_BTE		case TYPE_bte:			@:restrict2(simple,bte,loc,loc)@			break;#endif#ifndef NOEXPAND_SHT		case TYPE_sht:			@:restrict2(simple,sht,loc,loc)@			break;#endif#ifndef NOEXPAND_INT		case TYPE_int:			@:restrict2(simple,int,loc,loc)@			break;#endif#ifndef NOEXPAND_FLT		case TYPE_flt:			@:restrict2(simple,flt,loc,loc)@			break;#endif#if !defined(NOEXPAND_LNG) || !defined(NOEXPAND_DBL)		case TYPE_dbl:		case TYPE_lng:			@:restrict2(simple,lng,loc,loc)@			break;#endif		default:			if (b->tvarsized) {				@:restrict2(atom,t,loc,var)@			} else {				@:restrict2(atom,t,loc,loc)@			}			break;		}	}	GDKfree(mark);	/* propagate alignment info */	if (BATcount(bn) == BATcount(b))		ALIGNset(bn, b);	ESTIDEBUG THRprintf(GDKout, "#BATrestrict: actual resultsize: " SZFMT "\n", BATcount(bn));	return bn;      bunins_failed:	GDKfree(mark);	BBPreclaim(bn);	return NULL;}@}@+ BAT Sorting@%BATsort@ returns a sorted copy. @%BATorder@ sorts the BAT itself.@{@c#ifdef HAVE_RESTRICT#define __r     restrict#else#ifdef HAVE___RESTRICT__#define __r     __restrict__#else#define __r#endif#endif@:sort(1,,,_rev,>)@@:sort(GDK_SORTED_REV,_REV,_rev,,<)@@= sort_tpestatic intchk_order@3_@1(@1*__r col){	size_t i, r = 1;	for (i = 0; i < 128; i += 4) {		r &= (col[i+1] @2= col[i+0]) &		     (col[i+2] @2= col[i+1]) &		     (col[i+3] @2= col[i+2]) &		     (col[i+4] @2= col[i+3]);	}	return r != 0;}@= sort@:sort_tpe(chr,@5,@3)@@:sort_tpe(bte,@5,@3)@@:sort_tpe(sht,@5,@3)@@:sort_tpe(int,@5,@3)@@:sort_tpe(lng,@5,@3)@@:sort_tpe(flt,@5,@3)@@:sort_tpe(dbl,@5,@3)@static intchk_order@3_oid_oid(oid*__r col){	size_t i, r = 1;	for (i = 0; i < 256; i += 8) {		r &= (col[i+2] @5= col[i+0]) &		     (col[i+4] @5= col[i+2]) &		     (col[i+6] @5= col[i+4]) &		     (col[i+8] @5= col[i+6]);	}	return r != 0;}intBATordered@3(BAT* b){	size_t cnt = BATcount(b);	if ((BAThordered(b) & @1) == 0 && cnt > 0) {		int (*cmp) (ptr, ptr) = BATatoms[b->htype].atomCmp;		char *cur = (char *) BUNhloc(b, BUNfirst(b));		char *end = (char *) BUNhloc(b, BUNlast(b));		int xx = BUNsize(b);		/* we may have negative information already; this saves a scan */		if (b->H->nosorted@3 > BUNindex(b, cur) && b->H->nosorted@3 < BUNindex(b, end) && cmp(BUNhead(b, BUNptr(b, b->H->nosorted@3 - 1)), BUNhead(b, BUNptr(b, b->H->nosorted@3))) @5 0) {			return FALSE;		}		/* for [tpe,void] and [OID,OID] bats, we have fast 128-at-a-time routines */		if (ATOMstorage(b->htype) == ATOMstorage(TYPE_oid) && BUNsize(b) == 2 * ATOMsize(TYPE_oid)) {			while (cur + 256 * sizeof(oid) < end) {				if (!chk_order@3_oid_oid((oid *) cur))					break;				cur += 256 * sizeof(oid);			}		} else if (ATOMstorage(b->htype) == TYPE_chr && BUNsize(b) == sizeof(chr)) {			while (cur + 128 * sizeof(chr) < end) {				if (!chk_order@3_chr((chr *) cur))					break;				cur += 128 * sizeof(chr);			}		} else if (ATOMstorage(b->htype) == TYPE_bte && BUNsize(b) == sizeof(bte)) {			while (cur + 128 * sizeof(bte) < end) {				if (!chk_order@3_bte((bte *) cur))					break;				cur += 128 * sizeof(bte);			}		} else if (ATOMstorage(b->htype) == TYPE_sht && BUNsize(b) == sizeof(sht)) {			while (cur + 128 * sizeof(sht) < end) {				if (!chk_order@3_sht((sht *) cur))					break;				cur += 128 * sizeof(sht);			}		} else if (ATOMstorage(b->htype) == TYPE_int && BUNsize(b) == sizeof(int)) {			while (cur + 128 * sizeof(int) < end) {				if (!chk_order@3_int((int *) cur))					break;				cur += 128 * sizeof(int);			}		} else if (ATOMstorage(b->htype) == TYPE_lng && BUNsize(b) == sizeof(lng)) {			while (cur + 128 * sizeof(lng) < end) {				if (!chk_order@3_lng((lng *) cur))					break;				cur += 128 * sizeof(lng);			}		} else if (ATOMstorage(b->htype) == TYPE_flt && BUNsize(b) == sizeof(flt)) {			while (cur + 128 * sizeof(flt) < end) {				if (!chk_order@3_flt((flt *) cur))					break;				cur += 128 * sizeof(flt);			}		} else if (ATOMstorage(b->htype) == TYPE_dbl && BUNsize(b) == sizeof(dbl)) {			while (cur + 128 * sizeof(dbl) < end) {				if (!chk_order@3_dbl((dbl *) cur))					break;				cur += 128 * sizeof(dbl);			}		}		/* check sortedness tuple-by-tuple */		if (b->hheap) {			BUN base = b->hheap->base;			char *prv = base + *(var_t *) cur;			cur += xx;			while (cur < end) {				char *val = base + *(var_t *) cur;				if (cmp(prv, val) @5 0) {					/* record negative position info */					b->H->nosorted@3 = BUNindex(b, cur);					return FALSE;				}				prv = val;				cur += xx;			}		} else {			char *prv = cur;			cur += xx;			while (cur < end) {				if (cmp(prv, cur) @5 0) {					/* record negative position info */					b->H->nosorted@3 = BUNindex(b, cur);					return FALSE;				}				prv = cur;				cur += xx;			}		}	}	/* it is sorted@3! set the properties */	if ((b->hsorted & (bit) GDK_SORTED@2) == 0) {		b->batDirtydesc = TRUE;	}	b->hsorted |= (bit) GDK_SORTED@2;	return TRUE;}@:sort1(@1,@2,@3,@4,@5,,q)@@:sort1(@1,@2,@3,@4,@5,s,s)@@= sort1BAT *BAT@6sort@3(BAT *b){	BAT *bn;	int tt = b->ttype;	BATcheck(b, "BATsort@3: BAT");	if (b->htype == TYPE_void && b->hseqbase == oid_nil) {		/* b's head is void-nil, hence we return a read-only copy/view of b */		return BATcopy(b, b->htype, b->ttype, FALSE);	}	if ((GDK_SORTED@2 == GDK_SORTED && b->htype == TYPE_void) || (b->htype != TYPE_void && BATordered@3(b))) {		/* b is already ordered as desired, hence we return a read-only copy/view of b */		return BATcopy(b, b->htype, b->ttype, FALSE);	}	if (BATcount(b) < 2) {		/* with fewer than 2 BUNs, b is ordered, hence we return a read-only copy/view of b */		b->hsorted = (bit) GDK_SORTED@2;		if (b->htype == TYPE_oid) {			oid h = * (oid *) BUNhloc(b, BUNfirst(b));			if (h != oid_nil) {				b->hdense = 1;				b->hseqbase = h;			}		}		return BATcopy(b, b->htype, b->ttype, FALSE);	}	/* a void tail column 0,1,2,3,... must be materialized to oid before sorting */	if (tt == TYPE_void && b->tseqbase != oid_nil) {		tt = TYPE_oid;	}	if ((GDK_SORTED@2 == GDK_SORTED_REV && b->htype == TYPE_void) || 	    (b->htype != TYPE_void && (b->hkey || '@7' != 's') && BATordered@4(b))) {		/* b is ordered in the opposite direction, hence we return a reverted copy of b */		/* a void head column must be materialized to oid before reverting */		int ht = b->htype;		if (ht == TYPE_void && b->hseqbase != oid_nil) {			ht = TYPE_oid;		}		bn = BATrevert(BATcopy(b, ht, tt, TRUE));		if (bn == NULL)			return bn;		return bn;	}	bn = BATcopy(b, b->htype, tt, TRUE);	if (bn == NULL)		return bn;	return BAT@6order@3(bn);}BAT *BAT@6order@3(BAT *b){	BATcheck(b, "BATorder@3: BAT");	if (b->htype == TYPE_void && b->hseqbase == oid_nil) {		/* b's head is void-nil, hence we return b as is */		return BATcopy(b, b->htype, b->ttype, FALSE);	}	if ((GDK_SORTED@2 == GDK_SORTED && b->htype == TYPE_void) || (b->htype != TYPE_void && BATordered@3(b))) {		/* b is already ordered as desired, hence we return b as is */		return b;	}	if (BATcount(b) < 2) {		/* with less than 2 BUNs, b is ordered, hence we return b as is */		b->hsorted = (bit) GDK_SORTED@2;		return b;	}	if (b->ttype == TYPE_void && b->tseqbase != oid_nil) {		/* materialize void-tail in-place */		b = BATmaterializet(b, BATcount(b));	}	if ((GDK_SORTED@2 == GDK_SORTED_REV && b->htype == TYPE_void) || 	    (b->htype != TYPE_void && (b->hkey || '@7' != 's') && BATordered@4(b))) {		/* b is ordered in the opposite direction, hence we revert b */		b = BATrevert(b);		return b;	}	GDK@7sort@3(BUNfirst(b), (b->hheap)?b->hheap->base:NULL, BATcount(b), BUNsize(b), b->htype, b->hloc);	HASHdestroy(b);	ALIGNdel(b, "BATorder@3", FALSE);	b->hsorted = (bit) GDK_SORTED@2;	b->tsorted = FALSE;	b->hdense = FALSE;	b->batDirtydesc = b->batDirtybuns = TRUE;	return b;}@}@+ Reverse a BAT@%BATrevert@ rearranges a BAT in reverse order on head.@{@cBAT *BATrevert(BAT *b){	size_t xx;	char *buf;	BUN p, q;	BATcheck(b, "BATrevert");	if ((b->htype == TYPE_void && b->hseqbase != oid_nil) || (b->ttype == TYPE_void && b->tseqbase != oid_nil)) {		/* materialize void columns in-place */		b = BATmaterialize(b, BATcount(b));	}	ALIGNdel(b, "BATrevert", FALSE);	xx = (size_t) BUNsize(b);	buf = (char *) GDKmalloc(xx);	for (p = BUNlast(b) - xx, q = BUNfirst(b); p > q; p -= xx, q += xx) {		memcpy(buf, p, xx);		memcpy(p, q, xx);		memcpy(q, buf, xx);	}	HASHdestroy(b);	if (b->hsorted&GDK_SORTED) 		b->hsorted = GDK_SORTED_REV;	else if (b->hsorted&GDK_SORTED_REV) 		b->hsorted = GDK_SORTED;	else		b->hsorted = FALSE;	if (b->tsorted&GDK_SORTED) 		b->tsorted = GDK_SORTED_REV;	else if (b->tsorted&GDK_SORTED_REV) 		b->tsorted = GDK_SORTED;	else		b->tsorted = FALSE;	GDKfree(buf);	return b;}@@}@+ BAT partitioningFor distributed processing we support hash and rangepartitioning operators: @%BATsplithash@ and @%BATsplitrange@.@{@-The @%part_bat@ function creates a partition BAT.@cstatic BAT *part_bat(BAT *b, int ht, int tt, size_t expected_size, int respect_order){	BAT *bn = BATnew(ht, tt, (size_t) ((double) expected_size * BATMARGIN));	if (bn) {		BATkey(bn, b->hkey);		BATkey(BATmirror(bn), b->tkey);		bn->hsorted = (respect_order && (BAThordered(b) & 1)) ? GDK_SORTED : 0;		bn->tsorted = (respect_order && (BATtordered(b) & 1)) ? GDK_SORTED : 0;	}	return bn;}@- hash partitioning@c#define BUNhash(bx,hx,tx)\	/* assert(n <= 0x40000000); */\	i = (int)(HASHprobe(&h,tx)%n); \	if ((r = BUNfnd(bx, &i)) != NULL){\		bat bid = *(bat*)BUNtloc(bx,r); \		bunfastins(BBPdescriptor(bid), hx, tx);\	}BAT *BAThashsplit(BAT *b, size_t n, int unary){	BAT *metabat, *bn, *bf;	BUN r;	Hash h;	size_t cnt;	int i = 0;	/* assert(n <= 0x40000000); */	BATcheck(b, "BAThashsplit");	if (n > BATcount(b)) {		GDKwarning("BAThashsplit: reduced number of ranges (" SZFMT ") to number of tuples (" SZFMT ").", n, BATcount(b));		n = BATcount(b);	}	if (n < 1) {		GDKerror("BAThashsplit: number of ranges must not be less than 1!\n");		return 0;	}	metabat = BATnew(TYPE_int, TYPE_bat, n);	if (metabat == NULL)		return NULL;	bn = unary ? VIEWhead_(b, b->batRestricted) : b;	if (n <= 1) {		if (BUNins(metabat, &i, &bn->batCacheid, FALSE) == NULL)			goto bunins_failed;	} else {		BUN p, q;		for (i = 2; i < (int) n; i *= 2)			;		h.mask = i - 1;		h.type = BATttype(b);		cnt = (size_t) (BATMARGIN * (double) BATbuncount(b) / (double) n);		for (i = 0; i < (int) n; i++) {			bf = part_bat(bn, BAThtype(bn), BATttype(bn), cnt, TRUE);			if (bf == NULL) {				BBPreclaim(metabat);				return NULL;			}			if (BUNins(metabat, &i, &bf->batCacheid, FALSE) == NULL)				goto bunins_failed;		}		@:updateloop(metabat,b,BUNhash)@		BATloop(metabat, p, q) {			bat bt = *(bat *) BUNtail(metabat, p);			BBPunfix(bt);		}	}	return metabat;      bunins_failed:	BBPreclaim(metabat);	return NULL;}@}@-Range partitioning ensures that identical values appear in onepartition only. The routine also tries to deliver partitions ofuniform size.@{@cBAT *BATrangesplit(BAT *b, size_t n, int unary){	BAT *metabat, *slice, *histo, *bf = NULL, *bn, *m;	int target, tpe, *sizes;	int xx, zz = 0;	size_t yy = 0;	ptr *seps, nilval;	BUN r, s;	dbl scale;	size_t thorough = (n <= 1 || BATtvoid(b) || (BATtordered(b) & 1)) ? 1 : 10;	BATcheck(b, "BATrangesplit");	if (n > BATcount(b)) {		GDKwarning("BATrangesplit: reduced number of ranges (" SZFMT ") to number of tuples (" SZFMT ").", n, BATcount(b));		n = BATcount(b);	}	if (n < 1) {		GDKerror("BAThashsplit: number of ranges must not be less than 1!\n");		return 0;	}	/* assert(BATcount(b)/n <= 0x7fffffff); */	bn = unary ? VIEWhead_(b, b->batRestricted) : b;	m = BATmirror(b);	metabat = BATnew(BATttype(b), TYPE_bat, n);	BATcheck(metabat, "BATrangesplit 2");	nilval = ATOMnilptr(BATttype(b));@-@}We use sampling to determine bucket sizes.Uniform bucket sizes are the ideal to be achieved.If necessary though, we deliver less than n buckets.@{@c	slice = BATsample(b, MIN(MAX(30 * n * thorough, 100 * thorough), BATcount(b)));	histo = BAThistogram(slice);	target = (int) (BATcount(b) / n);	/* see assert above */	scale = ((dbl) BATcount(b)) / ((dbl) BATcount(slice));	BBPreclaim(slice);	sizes = (int *) GDKmalloc(2 * n * sizeof(int));	seps = (ptr *) GDKmalloc(2 * n * sizeof(ptr));@-Use the histogram to determine good split boundaries on b.@c	BATorder(histo);	BATloopFast(histo, r, s, xx) {		int cnt = *(int *) BUNtloc(histo, r);		int add = (int) (scale * cnt);		if (zz + add > target) {			if ((zz + add - target) < (target - zz)) {				sizes[yy] = zz + add;				seps[yy] = ATOMdup(histo->htype, BUNhead(histo, r));				add = 0;			} else {				sizes[yy] = zz;				seps[yy] = ATOMdup(histo->htype, BUNhead(histo, (r - xx)));			}			zz = 0;			yy++;		}		zz += add;	}	if (yy) {		if ((sizes[yy - 1] + zz - target) > (target - zz)) {			sizes[yy] = zz;		} else {			yy--;		/* join with the last */		}	}	seps[yy] = nilval;	BBPreclaim(histo);	if (n > 1 && n != yy + 1) {		GDKwarning("rangesplit: delivering %lu instead of %lu fragments\n", yy + 1, n);		n = yy + 1;	}@-CASE 1: just one bucket.This is done without copying b.@c	if (n <= 1) {		if (BUNins(metabat, nilval, &bn->batCacheid, FALSE) == NULL)			goto bunins_failed;@-CASE 2: sorted on fragmentation column.We can again avoid copying, by giving slices (views) on the source BAT.Virtual oids (void) is a special subcase with positional lookup insteadof binary search.@c

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -