⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gdk_relop.c

📁 这个是内存数据库中的一个管理工具
💻 C
📖 第 1 页 / 共 5 页
字号:
#line 26 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#include "monetdb_config.h"#include "gdk.h"#define SAMPLE_TRESHOLD_LOG 17#define SAMPLE_SLICE_SIZE 1000#line 32 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"#line 223 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"/* serves both normal equi-join (nil_on_miss==NULL) and outerjoin (nil_on_miss=nil) */static BAT *mergejoin(BAT *l, BAT *r, BAT *bn, ptr nil_on_miss, size_t estimate){	ptr nil = ATOMnilptr(r->htype);	int r_scan = -1;	/* no scanning in r */	BAT *rr = BATmirror(r);	BUN l_last, r_last;	/* last BUN of the BAT */	BUN l_start, r_start;	/* start of current chunk  */	BUN l_end, r_end;	/* end of current chunk */	int r_next = BUNsize(r);	int l_next = BUNsize(l);	int l_key = l->tkey;	int r_key = r->hkey;	BUN r_cur, r_lim;	int loc, var, hasnils = 0;	if (BATtordered(l) & 1) {		size_t i;		int logr = 4;		/* 4*log2(r.count) = estimation of the cost of binary search in units of scan comparisons */		for (i = BATcount(r); i > 0; logr++)			i >>= 1;		r_scan = logr * BUNsize(r);	/* opportunistic scan window in r */	}	if (!(BAThordered(r) & 1)) {		GDKerror("mergejoin: right input is not sorted.\n");		return NULL;	}	if (bn == NULL) {		#line 135 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	{		size_t _estimate = estimate;		#line 55 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	if ( _estimate == (size_t) oid_nil) {		size_t _lcount = BATcount(l);		size_t _rcount = BATcount(r);		size_t _slices = 0;		/* limit estimate with simple bounds first; only spend effort if the join result might be big */		if (JOIN_EQ == JOIN_EQ) {			if (l->tkey)				 _estimate = r->hkey ? MIN(_rcount, _lcount) : _rcount;			else if (r->hkey)				 _estimate = _lcount;		}		if ( _estimate == oid_nil) {			size_t _heuristic = 3 * MIN(_lcount, _rcount);			if (_heuristic <= (1 << SAMPLE_TRESHOLD_LOG))				 _estimate = _heuristic;		}		if ( _estimate == oid_nil) {			size_t _idx;			for (_idx = _lcount; _idx > 0; _idx >>= 1)				_slices++;		}		if (_slices > SAMPLE_TRESHOLD_LOG) {			/* use cheapo sampling by taking a number of slices and joining those with the algo */			size_t _idx = 0, _tot = 0, _step, _lo, _avg, _sample, *_cnt;			BAT *_tmp1 = l, *_tmp2, *_tmp3 = NULL;			_step = _lcount / (_slices -= SAMPLE_TRESHOLD_LOG);			_sample = _slices * SAMPLE_SLICE_SIZE;			_cnt = (size_t *) alloca(_slices * sizeof(size_t));			for (_lo = 0; _idx < _slices; _lo += _step) {				size_t _size = 0, _hi = _lo + SAMPLE_SLICE_SIZE;				l = BATslice(_tmp1, _lo, _hi);	/* slice keeps all parent properties */				if (l == NULL)					return NULL;				_tmp2 =  mergejoin(l,r,NULL,nil_on_miss,oid_nil);	/*  mergejoin(l,r,NULL,nil_on_miss,oid_nil) = e.g. BATXjoin(l,r) */				if (_tmp2) {					_size = BATcount(_tmp2);					BBPreclaim(_tmp2);				}				_tot += (_cnt[_idx++] = _size);				BBPreclaim(l);			}			/* do outlier detection on sampling results; this guards against skew */			if (JOIN_EQ == JOIN_EQ) {				for (_avg = _tot / _slices, _idx = 0; _idx < _slices; _idx++) {					size_t _diff = _cnt[_idx] - _avg;					if (_avg > _cnt[_idx])						_diff = _avg - _cnt[_idx];					if (_diff > MAX(SAMPLE_SLICE_SIZE, _avg))						break;				}				if (_idx < _slices) {					/* outliers detected, compute a real sample on at most 1% of the data */					_sample = MIN(_lcount / 100, (1 << SAMPLE_TRESHOLD_LOG) / 3);					_tmp2 = BATsample(_tmp1, _sample);					if (_tmp2) {						_tmp3 = BATjoin(_tmp2, r, oid_nil);	/* might be expensive */						if (_tmp3) {							_tot = BATcount(_tmp3);							BBPreclaim(_tmp3);						}						BBPreclaim(_tmp2);					}					if (_tmp3 == NULL)						return NULL;				}			}			/* overestimate always by 5% */			 _estimate = (size_t) ((double) (((lng) _tot) * ((lng) _lcount)) / (0.95 * (double) _sample));			l = _tmp1;		} else {			 _estimate = MAX(_lcount,_rcount);		}	}#line 138 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"		bn = BATnew(BAThtype(l), BATttype(r), _estimate);		if (bn == NULL) {			return bn;		}	}#line 254 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	}	/* the algorithm */	loc = ATOMstorage(l->ttype);	l_last = BUNlast(l);	r_last = BUNlast(r);	l_start = l_end = BUNfirst(l);	r_start = r_end = BUNfirst(r);	switch (loc) {#ifndef NOEXPAND_CHR	case TYPE_chr:		#line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	if (((!BATtvoid(l)) || l->tseqbase != oid_nil) &&	    ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) {		assert(r->htype != TYPE_void);		while (l_start < l_last) {			ptr v2, v1 = BUNtloc(l, l_start);			int neq = 1;			/* lookup range in l */			l_end = l_start;			if (l_key) {				l_end += l_next;			} else				do {					if ((l_end += l_next) >= l_last)						break;					v2 = BUNtloc(l, l_end);				} while (simple_EQ(v1, v2, chr));			/* lookup value in r (if not nil, that is) */			if (!simple_EQ(v1, nil, chr)) {				if (r_scan > 0) {					/* first try scanning; but give up after a while */					for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) {						v2 = BUNhloc(r, r_end);						neq = simple_CMP(v1, v2, chr);						if (neq <= 0)							break;					}					r_start = r_end;				}				if (neq == 1) {					/* use binary search after failed scan or if scanning is impossible (l not sorted) */					if (r_scan < 0 || r_start < r_last) {						/* if merge not ended (or if no merge at all) */						r_start = (BUN) SORTfndfirst_chr(rr, v1);					}					if (r_start < r_last) {						v2 = BUNhloc(r, r_start);						neq = !simple_EQ(v1, v2, chr);					} else if (r_scan >= 0) {						/* r is already at end => break off merge join */						break;					}				}			}			if (neq == 0) {				/* lookup range in r */				r_end = r_start + r_next;				if (r_key == 0)					while (r_end < r_last) {						v2 = BUNhloc(r, r_end);						if (!simple_EQ(v1, v2, chr))							break;						r_end += r_next;					}				/* generate match-product as join result */				for (; l_start < l_end; l_start += l_next)					for (r_cur = r_start; r_cur < r_end; r_cur += r_next)						bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur));			} else if (nil_on_miss) {				/* outerjoin inserts nils on a miss */				hasnils = 1;				for (; l_start < l_end; l_start += l_next)					bunfastins(bn, BUNhead(l, l_start), nil_on_miss);			} else {				l_start = l_end;	/* no match found in equi-join */			}		}	}#line 268 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx";		break;#endif#ifndef NOEXPAND_BTE	case TYPE_bte:		#line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	if (((!BATtvoid(l)) || l->tseqbase != oid_nil) &&	    ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) {		assert(r->htype != TYPE_void);		while (l_start < l_last) {			ptr v2, v1 = BUNtloc(l, l_start);			int neq = 1;			/* lookup range in l */			l_end = l_start;			if (l_key) {				l_end += l_next;			} else				do {					if ((l_end += l_next) >= l_last)						break;					v2 = BUNtloc(l, l_end);				} while (simple_EQ(v1, v2, bte));			/* lookup value in r (if not nil, that is) */			if (!simple_EQ(v1, nil, bte)) {				if (r_scan > 0) {					/* first try scanning; but give up after a while */					for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) {						v2 = BUNhloc(r, r_end);						neq = simple_CMP(v1, v2, bte);						if (neq <= 0)							break;					}					r_start = r_end;				}				if (neq == 1) {					/* use binary search after failed scan or if scanning is impossible (l not sorted) */					if (r_scan < 0 || r_start < r_last) {						/* if merge not ended (or if no merge at all) */						r_start = (BUN) SORTfndfirst_bte(rr, v1);					}					if (r_start < r_last) {						v2 = BUNhloc(r, r_start);						neq = !simple_EQ(v1, v2, bte);					} else if (r_scan >= 0) {						/* r is already at end => break off merge join */						break;					}				}			}			if (neq == 0) {				/* lookup range in r */				r_end = r_start + r_next;				if (r_key == 0)					while (r_end < r_last) {						v2 = BUNhloc(r, r_end);						if (!simple_EQ(v1, v2, bte))							break;						r_end += r_next;					}				/* generate match-product as join result */				for (; l_start < l_end; l_start += l_next)					for (r_cur = r_start; r_cur < r_end; r_cur += r_next)						bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur));			} else if (nil_on_miss) {				/* outerjoin inserts nils on a miss */				hasnils = 1;				for (; l_start < l_end; l_start += l_next)					bunfastins(bn, BUNhead(l, l_start), nil_on_miss);			} else {				l_start = l_end;	/* no match found in equi-join */			}		}	}#line 273 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx";		break;#endif#ifndef NOEXPAND_SHT	case TYPE_sht:		#line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	if (((!BATtvoid(l)) || l->tseqbase != oid_nil) &&	    ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) {		assert(r->htype != TYPE_void);		while (l_start < l_last) {			ptr v2, v1 = BUNtloc(l, l_start);			int neq = 1;			/* lookup range in l */			l_end = l_start;			if (l_key) {				l_end += l_next;			} else				do {					if ((l_end += l_next) >= l_last)						break;					v2 = BUNtloc(l, l_end);				} while (simple_EQ(v1, v2, sht));			/* lookup value in r (if not nil, that is) */			if (!simple_EQ(v1, nil, sht)) {				if (r_scan > 0) {					/* first try scanning; but give up after a while */					for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) {						v2 = BUNhloc(r, r_end);						neq = simple_CMP(v1, v2, sht);						if (neq <= 0)							break;					}					r_start = r_end;				}				if (neq == 1) {					/* use binary search after failed scan or if scanning is impossible (l not sorted) */					if (r_scan < 0 || r_start < r_last) {						/* if merge not ended (or if no merge at all) */						r_start = (BUN) SORTfndfirst_sht(rr, v1);					}					if (r_start < r_last) {						v2 = BUNhloc(r, r_start);						neq = !simple_EQ(v1, v2, sht);					} else if (r_scan >= 0) {						/* r is already at end => break off merge join */						break;					}				}			}			if (neq == 0) {				/* lookup range in r */				r_end = r_start + r_next;				if (r_key == 0)					while (r_end < r_last) {						v2 = BUNhloc(r, r_end);						if (!simple_EQ(v1, v2, sht))							break;						r_end += r_next;					}				/* generate match-product as join result */				for (; l_start < l_end; l_start += l_next)					for (r_cur = r_start; r_cur < r_end; r_cur += r_next)						bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur));			} else if (nil_on_miss) {				/* outerjoin inserts nils on a miss */				hasnils = 1;				for (; l_start < l_end; l_start += l_next)					bunfastins(bn, BUNhead(l, l_start), nil_on_miss);			} else {				l_start = l_end;	/* no match found in equi-join */			}		}	}#line 278 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx";		break;#endif#ifndef NOEXPAND_INT	case TYPE_int:		#line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	if (((!BATtvoid(l)) || l->tseqbase != oid_nil) &&	    ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) {		assert(r->htype != TYPE_void);		while (l_start < l_last) {			ptr v2, v1 = BUNtloc(l, l_start);			int neq = 1;			/* lookup range in l */			l_end = l_start;			if (l_key) {				l_end += l_next;			} else				do {					if ((l_end += l_next) >= l_last)						break;					v2 = BUNtloc(l, l_end);				} while (simple_EQ(v1, v2, int));			/* lookup value in r (if not nil, that is) */			if (!simple_EQ(v1, nil, int)) {				if (r_scan > 0) {					/* first try scanning; but give up after a while */					for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) {						v2 = BUNhloc(r, r_end);						neq = simple_CMP(v1, v2, int);						if (neq <= 0)							break;					}					r_start = r_end;				}				if (neq == 1) {					/* use binary search after failed scan or if scanning is impossible (l not sorted) */					if (r_scan < 0 || r_start < r_last) {						/* if merge not ended (or if no merge at all) */						r_start = (BUN) SORTfndfirst_int(rr, v1);					}					if (r_start < r_last) {						v2 = BUNhloc(r, r_start);						neq = !simple_EQ(v1, v2, int);					} else if (r_scan >= 0) {						/* r is already at end => break off merge join */						break;					}				}			}			if (neq == 0) {				/* lookup range in r */				r_end = r_start + r_next;				if (r_key == 0)					while (r_end < r_last) {						v2 = BUNhloc(r, r_end);						if (!simple_EQ(v1, v2, int))							break;						r_end += r_next;					}				/* generate match-product as join result */				for (; l_start < l_end; l_start += l_next)					for (r_cur = r_start; r_cur < r_end; r_cur += r_next)						bunfastins(bn, BUNhead(l, l_start), BUNtail(r, r_cur));			} else if (nil_on_miss) {				/* outerjoin inserts nils on a miss */				hasnils = 1;				for (; l_start < l_end; l_start += l_next)					bunfastins(bn, BUNhead(l, l_start), nil_on_miss);			} else {				l_start = l_end;	/* no match found in equi-join */			}		}	}#line 283 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx";		break;#endif#ifndef NOEXPAND_FLT	case TYPE_flt:		#line 153 "/export/scratch0/monet/monet.GNU.64.64.d.14791/MonetDB/src/gdk/gdk_relop.mx"	if (((!BATtvoid(l)) || l->tseqbase != oid_nil) &&	    ((!BAThvoid(r)) || r->hseqbase != oid_nil || nil_on_miss)) {		assert(r->htype != TYPE_void);		while (l_start < l_last) {			ptr v2, v1 = BUNtloc(l, l_start);			int neq = 1;			/* lookup range in l */			l_end = l_start;			if (l_key) {				l_end += l_next;			} else				do {					if ((l_end += l_next) >= l_last)						break;					v2 = BUNtloc(l, l_end);				} while (simple_EQ(v1, v2, flt));			/* lookup value in r (if not nil, that is) */			if (!simple_EQ(v1, nil, flt)) {				if (r_scan > 0) {					/* first try scanning; but give up after a while */					for (r_lim = MIN(r_last, r_end + r_scan); r_end < r_lim; r_end += r_next) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -