⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gdk_relop.mx

📁 这个是内存数据库中的一个管理工具
💻 MX
📖 第 1 页 / 共 5 页
字号:
			if (!rcount || (lcount<<3) > rcount) 				tpe = TYPE_var;	/* insert double-eliminated strings as ints */		}		bn = BATnew(TYPE_void, ATOMtype(tpe), estimate);		if (bn == NULL)			goto ready;		ESTIDEBUG THRprintf(GDKout, "#BATfetchjoin: estimated resultsize: " SZFMT "\n", lcount);		dst = BUNfirst(bn);		zz = BUNsize(bn);		if (ATOMstorage(tpe) == TYPE_chr) {			@:voidfetchjoin(tloc,chr,SIMPLE)@		} else if (ATOMstorage(tpe) == TYPE_bte) {			@:voidfetchjoin(tloc,bte,SIMPLE)@		} else if (ATOMstorage(tpe) == TYPE_sht) {			@:voidfetchjoin(tloc,sht,SIMPLE)@		} else if (tpe != TYPE_bat && (ATOMstorage(tpe) == TYPE_int || ATOMstorage(tpe) == TYPE_flt)) {			/* ensure use of ATOMput for TYPE_bat */			@:voidfetchjoin(tloc,int,SIMPLE)@		} else if (ATOMstorage(tpe) == TYPE_lng || ATOMstorage(tpe) == TYPE_dbl) {			@:voidfetchjoin(tloc,lng,SIMPLE)@		} else if (r->tvarsized) {			@:voidfetchjoin(tvar,bn->ttype,ATOM)@		} else {			@:voidfetchjoin(tloc,bn->ttype,ATOM)@		}		ret = bn;      	      goto bunins_failed;	      bunins_failed:		bn->batBuns->free = dst - bn->batBuns->base;		BATsetcount(bn,bn->batBuns->free/zz);		if (ret == NULL) {			BBPreclaim(bn);			goto ready;		}		/* handle string trick */		if (tpe == TYPE_var && ATOMstorage(r->ttype) == TYPE_str) {			BAT *bm = BATmirror(bn);			bn->theap = (Heap*)GDKzalloc(sizeof(Heap));			if (bn->theap && r->theap->filename) {				char *nme = BBP_physical(bn->batCacheid);								bn->theap->filename = (str) GDKmalloc(strlen(nme) + 12);				GDKfilepath(bn->theap->filename, NULL, nme, "theap");			}			if (HEAPcopy(bn->theap, r->theap) < 0) {				BBPreclaim(bn);				ret = NULL;				goto ready;			}			bn->ttype = bm->htype = r->ttype;			bn->tvarsized = bm->hvarsized = 1;		}		if (nondense) {			bn->hsorted = (BATtordered(l) & BAThordered(r) & 1) ? BAThordered(l) : 0;		} else {			BATseqbase(bn, seqbase);			if (seqbase != oid_nil)				BATkey(bn, TRUE);			bn->hsorted = GDK_SORTED;		}	} else if (l->tseqbase != oid_nil) {		/* execute using slice */		BAT *v = BATmirror(VIEWhead(BATmirror(r)));		oid lo_val = MAX(l->tseqbase, r->hseqbase);		oid hi_val = MIN(l->tseqbase + lcount, r->hseqbase + rcount);		size_t lo_pos = lo_val - r->hseqbase;		size_t hi_pos = hi_val;		if (hi_pos > r->hseqbase)			hi_pos -= r->hseqbase;		else			hi_pos = 0;		ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BAThvoid(l) && BATtvoid(l) && l->tseqbase != oid_nil  =>  bn = BATslice(BATmirror(VIEWhead(BATmirror(r))), lo_pos=" SZFMT ", hi_pos=" SZFMT ");\n", lo_pos, hi_pos);		bn = BATslice(v, lo_pos, hi_pos);		if (seqbase != oid_nil)			seqbase += lo_val - l->tseqbase;		BATseqbase(bn, seqbase);		BBPunfix(v->batCacheid);	} else {		/* nil join column => empty result */		ALGODEBUG THRprintf(GDKout, "#BATfetchjoin: BAThvoid(l) && BATtvoid(l) && l->tseqbase == oid_nil\n");		bn = BATnew(ATOMtype(l->htype), ATOMtype(r->ttype), 10);		if (bn == NULL)			goto ready;		ESTIDEBUG THRprintf(GDKout, "#BATfetchjoin: estimated resultsize: %d\n", 10);	}	/* property propagation */	if ( BATcount(bn) == lcount) {		ALIGNsetH(bn, l);	/* BAThkey(r), remember? */	} else {		if (hitalways_check) {			GDKerror("BATfetchjoin(%s,%s) does not hit always (|bn|="SZFMT" != "SZFMT"=|l|) => can't use fetchjoin.\n", BATgetId(l), BATgetId(r), BATcount(bn), lcount);		}		bn->hsorted = l->hsorted;	}	bn->tsorted = (BATtordered(l) & BATtordered(r) & 1) ? GDK_SORTED : 0;	if (BATtkey(l)) {		/* if BATtkey(l) elements of r match at most once */		if ((BATtordered(l) & 1) && BATcount(bn) == rcount) {			ALIGNsetT(bn, r);		} else {			BATkey(BATmirror(bn), BATtkey(r));		}	}	ret = bn;      ready:	if (l != l_orig) {		BBPreclaim(l);	/* was created as a temporary (slice) select on l */	}	ESTIDEBUG THRprintf(GDKout, "#BATfetchjoin: actual resultsize: " SZFMT "\n", BATcount(bn));	return ret;}BAT *BATfetchjoin(BAT *l, BAT *r, size_t estimate){	/* fetchjoin now implies that you assure no fetch misses (hitalways) */	/* allows swapping of left and right input for faster processing */	return batfetchjoin(l, r, estimate, TRUE, TRUE);}BAT *BATleftfetchjoin(BAT *l, BAT *r, size_t estimate){	/* fetchjoin now implies that you assure no fetch misses (hitalways) */	/* do not swap left and right input,	   and hence maintain order of left head in result */	return batfetchjoin(l, r, estimate, FALSE, TRUE);}@-This routine does the join optimization. TODO: it should be expressed in MIL.@cstatic BAT *batjoin(BAT *l, BAT *r, size_t estimate, bit swap){	size_t i, lsize, rsize, lcount, rcount;	int lfetch, rfetch, must_hash;	lng logr, logl;	ERRORcheck(l == NULL, "BATjoin: invalid left operand");	ERRORcheck(r == NULL, "BATjoin: invalid right operand");	ERRORcheck(TYPEerror(l->ttype, r->htype), "BATjoin: type conflict\n");	lcount= BATcount(l);	rcount= BATcount(r);	if ( (lcount == 0) || (rcount == 0) ){                BAT *bn;		@:return_empty_join_result(l,r, BATjoin: |l|==0 or |r|==0)@	}@-collect statistics that help us decide what to do@c	lsize = l->batBuns->size + (l->hheap ? l->hheap->size : 0) + (l->theap ? l->theap->size : 0);	rsize = r->batBuns->size + (r->hheap ? r->hheap->size : 0) + (r->theap ? r->theap->size : 0);	for (logr = 4, i = rcount; i > 0; logr++)		i >>= 1;	for (logl = 4, i = lcount; i > 0; logl++)		i >>= 1;	rfetch = BAThdense(r);	lfetch = BATtdense(l);	/* in case of fetchjoin, make sure we propagate a non-join void column */	if (lfetch && rfetch) {		if (BAThvoid(l) && !BATtvoid(r))			lfetch = 0;		if (swap && BATtvoid(r) && !BAThvoid(l))			rfetch = 0;	}	must_hash = (swap && (rsize < lsize)) ? l->thash != NULL : r->hhash != NULL;@-In special cases (equal join columns, void join columns, or orderedjoin columns), we take special action.@c	if (swap && lfetch && !(rfetch && lcount < rcount)) {		ALGODEBUG THRprintf(GDKout, "#BATjoin: BATmirror(BATfetchjoin(BATmirror(r), BATmirror(l), " SZFMT "));\n", estimate);		return BATmirror(batfetchjoin(BATmirror(r), BATmirror(l), estimate, TRUE, FALSE));	} else if (rfetch) {		ALGODEBUG THRprintf(GDKout, "#BATjoin: BATfetchjoin(l, r, " SZFMT ");\n", estimate);		return batfetchjoin(l, r, estimate, swap, FALSE);	}@-If both are ordered we do merge-join, or if hash-join is not possible rightaway and one input is ordered and the other is much smaller, we do nestedloop binary search (both implemented by BATmergejoin).@c	if ((BATtordered(l) & BAThordered(r) & 1) || (must_hash && (((BATtordered(l) & 1) && ((lng) lcount > logl * (lng) rcount) && swap) || ((BAThordered(r) & 1) && ((lng) rcount > logr * (lng) lcount))))) {		ALGODEBUG THRprintf(GDKout, "#BATjoin: BATmergejoin(l,r," SZFMT ");\n", estimate);		return batmergejoin(l, r, estimate, swap);	}@-hash join: the bread&butter join of monet@c	if (swap && (rsize < lsize)) {	/* assume largest fits memory */		ALGODEBUG THRprintf(GDKout, "#BATjoin: BATmirror(BAThashjoin(BATmirror(r), BATmirror(l)," SZFMT "));\n", estimate);		return BATmirror(BAThashjoin(BATmirror(r), BATmirror(l), estimate));	}	ALGODEBUG THRprintf(GDKout, "#BATjoin: BAThashjoin(l,r," SZFMT ");\n", estimate);	return BAThashjoin(l, r, estimate);}BAT *BATjoin(BAT *l, BAT *r, size_t estimate){	/* allows swapping of left and right input for faster processing */	BAT *b = batjoin(l, r, estimate, TRUE);	/* invest in property check, since we cannot easily derive the result properties,	 * but later operations might benefit from / depend on them	 * Disable at runtime via `Mserver --debug=16777216` or	 * `debugmask(or(debugmask(),16777216));` in MIL.	 */	JOINPROPCHK if (b) BATpropcheck(b, BATPROPS_ALL);	return b;}BAT *BATleftjoin(BAT *l, BAT *r, size_t estimate){	/* do not swap left and right input,	   and hence maintain order of left head in result */	BAT *b = batjoin(l, r, estimate, FALSE);	/* invest in property check, since we cannot easily derive the result properties,	 * but later operations might benefit from / depend on them	 * Disable at runtime via `Mserver --debug=16777216` or	 * `debugmask(or(debugmask(),16777216));` in MIL.	 */	JOINPROPCHK if (b) BATpropcheck(b, BATPROPS_ALL);	return b;}@@}@+  OuterjoinThe left outerjoin between two BAT is also supported. The code isidentical to the hashjoin algorithm with the extension to insert a BUNif no match can be found.@{@= outerjoinloop{	ptr v, nilh = ATOMnilptr(r->htype), nilt = ATOMnilptr(r->ttype);	hash_t xx;	int yy;	BUN p, q, w;	BATloopFast(l, p, q, yy) {		size_t i = 0;		v = (ptr) BUNtail(l, p);		if (!@1_EQ(v, nilh, @2))			HASHloop_@2(r, r->hhash, xx, v, w) {				bunfastins(bn, BUNhead(l, p), BUNtail(r, w));				i++;			}		if (i == 0) {			bunfastins(bn, BUNhead(l, p), nilt);		}	}}break;@@-The baseline join algorithm creates a hash on the smallest element andprobes it using the larger one. [TODO]@cBAT *BATouterjoin(BAT *l, BAT *r, size_t estimate){	BAT *bn = NULL;	@:joincheck(BATouterjoin,l->ttype,r->htype)@	if (BAThdense(l) && BAThkey(r)) {		if (estimate == oid_nil)			estimate = BATcount(l);		bn = BATnew(TYPE_void, ATOMtype(r->ttype), estimate);		if (bn == NULL)			return bn;		ESTIDEBUG THRprintf(GDKout, "#BATouterjoin: estimated resultsize: " SZFMT "\n", estimate);		BATseqbase(bn, l->hseqbase);	}	if (BAThdense(r) == FALSE && BAThordered(r) & 1) {		/* use the merge-join; it takes care of the rest */		ALGODEBUG THRprintf(GDKout, "#BATouterjoin: mergejoin(l, r, bn, ATOMnilptr(r->ttype), estimate);\n");		bn = mergejoin(l, r, bn, ATOMnilptr(r->ttype), estimate);		ESTIDEBUG THRprintf(GDKout, "#BATouterjoin: actual resultsize: " SZFMT "\n", BATcount(bn));		/* invest in property check, since we cannot easily derive the result properties,		 * but later operations might benefit from / depend on them		 * Disable at runtime via `Mserver --debug=16777216` or		 * `debugmask(or(debugmask(),16777216));` in MIL.		 */		JOINPROPCHK if (bn) BATpropcheck(bn, BATPROPS_ALL);		return bn;	} else if (bn == NULL) {		@:joinbat(JOIN_EQ,BATouterjoin(l,r,oid_nil),estimate)@	}	BATmmap_pin(r);	if (BAThdense(r)) {		/* positional algorithm: hash on void column would give error and is stupid */		ptr nilt = ATOMnilptr(r->ttype);		bit nonil = TRUE;		BUN p, q, w, s = BUNfirst(bn);		int yy, xx = BUNsize(bn);		BATloopFast(l, p, q, yy) {			oid v = *(oid *) BUNtail(l, p);			ptr t = nilt;			if (v != oid_nil) {				BUNfndVOID(w, r, &v);				if (w)					t = BUNtail(r, w);			}			nonil &= (t != nilt);			bunfastins_nocheck(bn, s, BUNhead(l, p), t, xx);			s += xx;		}		bn->tsorted = ((BATtordered(l) & BATtordered(r) & 1) && nonil) ? GDK_SORTED : 0;	} else {		/* hash based algorithm (default) */		int any = ATOMstorage(r->htype);		if (BATprepareHash(r)) {			BBPreclaim(bn);			return NULL;		}		switch (any) {#ifndef NOEXPAND_CHR		case TYPE_chr:			@:outerjoinloop(simple,chr)@#endif#ifndef NOEXPAND_BTE		case TYPE_bte:			@:outerjoinloop(simple,bte)@#endif#ifndef NOEXPAND_SHT		case TYPE_sht:			@:outerjoinloop(simple,sht)@#endif#if !defined(NOEXPAND_INT) || !defined(NOEXPAND_FLT)		case TYPE_int:		case TYPE_flt:			@:outerjoinloop(simple,int)@#endif#if !defined(NOEXPAND_DBL) || !defined(NOEXPAND_LNG)		case TYPE_dbl:		case TYPE_lng:			@:outerjoinloop(simple,lng)@#endif		default:			@:outerjoinloop(atom,any)@		}		bn->tsorted = 0;	}	BATmmap_unpin(r);	/* set sorted flags by hand, because we used BUNfastins() */	if (r->hkey) {		ALIGNsetH(bn, l);	/* always 1 hit, so columns are equal */	} else {		bn->hsorted = BAThordered(l);	}	ESTIDEBUG THRprintf(GDKout, "#BATouterjoin: actual resultsize: " SZFMT "\n", BATcount(bn));	/* Just to silence compilers (Intel's icc) that otherwise might	 * complain about "declared but never referenced" labels	 * (condition should never be true).	 * (A "dead" goto between the return and the label makes (other)	 * compilers (Sun) complain about never reached code...)	 */	if (!bn)		goto bunins_failed;	/* invest in property check, since we cannot easily derive the result properties,	 * but later operations might benefit from / depend on them	 * Disable at runtime via `Mserver --debug=16777216` or	 * `debugmask(or(debugmask(),16777216));` in MIL.	 */	JOINPROPCHK if (bn) BATpropcheck(bn, BATPROPS_ALL);	return bn;      bunins_failed:	BATmmap_unpin(r);	BBPreclaim(bn);	return NULL;}@@}@+ ThetaJoinCurrent predicates supported are: JOIN_EQ, JOIN_LT,JOIN_GE, JOIN_LE and JOIN_GT. The JOIN_EQ will pass the control to thenormal @:BATjoin@ equijoin. The is and index-based join: if an indexis not present, it will be created on the smallest relation.We do lots of code-inlining: first of all on join type (4), andfurthermore on left-tail (equal right-head) type (5), which are thejoin columns.  We factor out more by splitting on storage strategy(variable-sized/fixed-size) of both the left-head, and right tailcolumns (2*2).In the end, this results in 4*5*2*2 = 80 different inner loops.@{@cBAT *BATthetajoin(BAT *l, BAT *r, int op, size_t estimate){	size_t _lcount = BATcount(l);	size_t _rcount = BATcount(r);	size_t _estimate = _lcount * _rcount;	@:joincheck(BATthetajoin,l->ttype,r->htype)@	if (estimate < _estimate)		_estimate = estimate;	if (op == JOIN_EQ) {		/* exploit all equi-join optimizations */		ALGODEBUG THRprintf(GDKout, "#BATthetajoin(l,r,JOIN_EQ): BATjoin(l, r);\n");		return BATjoin(l, r, _estimate);	}	return BATnlthetajoin(l, r, op, _estimate);}BAT *BATleftthetajoin(BAT *l, BAT *r, int op, size_t estimate){	size_t _lcount = BATcount(l);	size_t _rcount = BATcount(r);	size_t _estimate = _lcount * _rcount;	@:joincheck(BATleftthetajoin,l->ttype,r->htype)@	if (estimate < _estimate)		_estimate = estimate;	if (op == JOIN_EQ) {		/* exploit all equi-join optimizations */		ALGODEBUG THRprintf(GDKout, "#BATleftthetajoin(l,r,JOIN_EQ): BATleftjoin(l, r);\n");		return BATleftjoin(l, r, _estimate);	}	return BATnlthetajoin(l, r, op, _estimate);}/* nested loop join; finally MonetDB can enjoy the virtues of this algorithm as well! */@= nlthetajoin_unroll8	@:nlthetajoin_@2(@1)@ ri += rsz; @:nlthetajoin_@2(@1)@ ri += rsz;	@:nlthetajoin_@2(@1)@ ri += rsz; @:nlthetajoin_@2(@1)@ ri += rsz;	@:nlthetajoin_@2(@1)@ ri += rsz; @:nlthetajoin_@2(@1)@ ri += rsz;	@:nlthetajoin_@2(@1)@ ri += rsz; @:nlthetajoin_@2(@1)@ ri += rsz;@= nlthetajoin_void	{ cur->r = off++; cur += (v @1 ra[ri]); } @= nlthetajoin_oid	{ cur->r = *(oid*) (((BUN)(ra+ri))+off); cur += (v @1 ra[ri]); }@= nlthetajoin_implstatic int nlthetajoin_@2_@1(BAT *bn, BAT *l, BAT *r) {	dst_t *cur = (dst_t*) (bn->batBuns->base + bn->batBuns->free);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -