opt_costmodel.mx

来自「一个内存数据库的源代码这是服务器端还有客户端」· MX 代码 · 共 272 行
272 行
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f opt_costModel@a M. L. Kersten@- Costmodel ApproachCost models form the basis for many optimization decisions.The cost parameters are typically the size of the (intermediate)results and response time. Alternatively, they are runningaggregates, e.g. max memory and total execution time,obtained from a simulated run.The current implementation contains a framework and an example forbuilding your own cost-based optimized.The @sc{optimizer.costModel()} works its way through a MAL program in search forrelational operators and estimates their result size. The estimated size is left behind as the property @sc{rows}.@verbatim    r{rows=100} := bat.new(:oid,:int);    s{rows=1000}:= bat.new(:oid,:int);    rs:= algebra.select(s,1,1);    rr:= bat.reverse(r);    j:= algebra.join(rs,rr);    optimizer.costModel();@end verbatimchanges the properties of the instructions as follows:@verbatim    r{rows=100} := bat.new(:oid,:int);    s{rows=1000} := bat.new(:oid,:int);    rs{rows=501} := algebra.select(s,1,1);    rr{rows=100} := bat.reverse(r);    j{rows=100} := algebra.join(rs,rr);@end verbatimThe cost estimation does not use any statistics on the actualdata distribution yet. It relies on the @sc{rows}property provided by the front-end or other optimizers.It just applies a few heuristic cost estimators.However, it ensures that empty results are only taggedwith @sc{rows=0} if the estimate is accurate, otherwiseit assumes at least one result row. This property makesit possible to safely pass the result of the cost estimationto the @sc{emptySet} optimizer for code squeezing.@{@+ Implementation sectionTo work in conjunction with the emptySet optimizer, we shouldassure that we don't estimate empty sets. The row count mayonly become zero if you are absolutely sure.@malpattern optimizer.costModel():straddress OPTcostModel;pattern optimizer.costModel(mod:str, fcn:str):straddress OPTcostModelcomment "Estimate the cost of a relational expression";@h#ifndef _OPT_COSTMODEL_H_#define _OPT_COSTMODEL_H_/* #define DEBUG_OPT_COSTMODEL*/#include "mal.h"#include <math.h>#include "mal_interpreter.h"#include "mal_properties.h"#include "opt_support.h"#include "opt_prelude.h"#endif /* _OPT_COSTMODEL_H_ */@-@c#include "mal_config.h"#include "opt_costModel.h"static voidfixPropertySet(MalBlkPtr mb, int dst){	if (mb->var[dst]->props == 0)		mb->var[dst]->props = newPropertySet();}#define getVarRows(Y) (int*)getPropertyValue(mb->var[Y]->props,"rows")#define getVarSize(Y) (int*)getPropertyValue(mb->var[Y]->props,"size")@-The cost formula are repetative@= newRows{		c1 = getVarRows(getArg(p,@1));		c2 = getVarRows(getArg(p,@2));		if( c1 == NULL) continue;		if( c2 == NULL) continue;		k= (@3);		fixPropertySet(mb, getArg(p,@4));		setProperty(getProps(mb,getArg(p,@4)),"rows","=", TYPE_int, &k);#ifdef DEBUG_OPT_COSTMODEL		stream_printf(GDKout,"COST of @1 @2 into @4: %d\n",k);		printInstruction(GDKout,mb,p,0);#endif}@= newRows1		@:newRows(@1,@1,@2,0)@@= newRows2		@:newRows(@1,@2,@3,0)@@-SQL specific back propagation of table size may be needed to avoidthe empty-set optimizer to through away a BAT we need.@cstatic voidOPTbackpropagate(MalBlkPtr mb, int i, int idx){	int *rows;	InstrPtr p;	for( ; i > 0; i--){		p= getInstrPtr(mb,i);		if( getFunctionId(p) == setWriteModeRef){			rows= getVarRows(getArg(p,1));			if( rows == NULL  || *rows) continue;			*rows = *getVarRows(idx);		} 	}}@-The cost will be used in many places to make decisions.Access should be fast.The SQL front-end also makes the BAT index available as theproperty bid. This can be used to access the BAT and involvemore properties into the decision procedure.[to be done]Also make sure you don't re-use variables, because then therow count becomes non-deterministic.@cstatic intOPTcostModelImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){	int i, *c1, *c2, k;	InstrPtr p;	str sortrevRef= putName("sortReverse",11);	str sortrevTailRef= putName("sortReverseTail",15);	str projectRef= putName("project",7);	(void) stk;	(void) pci;	for (i = 0; i < mb->stop; i++) {		p = getInstrPtr(mb, i);		if (getModuleId(p)==algebraRef) {			if (getFunctionId(p) == markTRef  || 				getFunctionId(p) == markHRef  ||				getFunctionId(p) == selectNotNilRef  ||				getFunctionId(p) == sortRef  ||				getFunctionId(p) == sortTailRef  ||				getFunctionId(p) == sortrevRef  ||				getFunctionId(p) == sortrevTailRef  ||				getFunctionId(p) == projectRef  ){				@:newRows1(1, *c1)@			} else			if(	getFunctionId(p) == unionRef ||				getFunctionId(p) == kunionRef) {				@:newRows2(1,2, *c1+*c2,1)@			} else if (getFunctionId(p)== kdifferenceRef) {				@:newRows2(1, 2, *c1==0? 0: *c2==0? *c1: *c1 - *c2 < 0 ? 1 : *c1 - *c2+1)@			} else if (getFunctionId(p) == joinRef ) {				/* assume 1-1 joins */				@:newRows2(1, 2, *c1 < *c2 ? *c1 : *c2)@			} else if (getFunctionId(p) == semijoinRef ) {				/* assume 1-1 semijoins */				@:newRows2(1, 2, *c1 < *c2 ? *c1 : *c2)@			} else if (getFunctionId(p) == selectRef ||				   getFunctionId(p) == uselectRef) {				@:newRows1(1, *c1 > 100 ? *c1 / 2 +1: *c1)@			} else if (getFunctionId(p) == crossRef) {				@:newRows2(1, 2, (log((double) *c1) + log((double) *c2) > log(INT_MAX) ? INT_MAX : *c1 * *c2 +1))@			} else if (getFunctionId(p) == tuniqueRef ) {				@:newRows1(1, *c1 < 50 ? *c1 : *c1 / 10+1)@			}		} else if (getModuleId(p) == batcalcRef) {			if( isaBatType(getArgType(mb,p,1)) )				@:newRows1(1, *c1)@			else				@:newRows1(2, *c2)@		} else if (getModuleId(p) == batRef) {			if (getFunctionId(p) == reverseRef ||			    getFunctionId(p) == setWriteModeRef  ||			    getFunctionId(p) == hashRef  ||			    getFunctionId(p) == mirrorRef) {				@:newRows1(1, *c1)@			} else if (getFunctionId(p) == appendRef ||				   getFunctionId(p) == insertRef ){@-Updates are a little more complicated, because you have topropagate changes in the expected size up the expression tree.For example, the SQL snippet:    _49:bat[:oid,:oid]{rows=0,bid=622}  := sql.bind_dbat("sys","example",3);    _54 := bat.setWriteMode(_49);    bat.append(_54,_47,true);shows what is produced when it encounters a deletion. If a non-emptyappend is not properly passed back to _49, the emptySet optimizer might remove the complete deletion code.The same holds for replacement operations, which add information toan initially empty insertion BAT.@c				if( isaBatType(getArgType(mb,p,2)) ){					/* insert BAT */					@:newRows(1,2, *c1 + *c2+1,1)@ 					OPTbackpropagate(mb,i,getArg(p,1));				} else {					/* insert scalars */					@:newRows(1,1, *c1 +1,1)@ 					OPTbackpropagate(mb,i,getArg(p,1));				}			} else if (getFunctionId(p) == deleteRef){				if( isaBatType(getArgType(mb,p,2)) ){					/* delete BAT */					@:newRows(1,2, (*c1 - *c2 ==0? 1: *c1-*c2),1)@ 					OPTbackpropagate(mb,i,getArg(p,1));				} else {					/* insert scalars */					@:newRows(1,1, *c1==1?1: *c1-1,1)@ 					OPTbackpropagate(mb,i,getArg(p,1));				}				OPTbackpropagate(mb,i,getArg(p,1));			} else if (	getFunctionId(p) == insertRef){				@:newRows1(1, *c1 + 1)@ /* faked */				OPTbackpropagate(mb,i,getArg(p,1));			}		} else if (getModuleId(p)==groupRef) {			if (getFunctionId(p) ==newRef) {				@:newRows1(1, *c1 / 10+1)@			} else {				@:newRows1(1, *c1)@			}		} else if (getModuleId(p)== aggrRef) {			if (getFunctionId(p) == sumRef || 				getFunctionId(p) == minRef || 				getFunctionId(p) == maxRef || 				getFunctionId(p) == avgRef) {				@:newRows1(1, *c1?*c1:*c1+1)@			} else	if (getFunctionId(p) == countRef){				@:newRows1(1, 1)@			}		} else if( p->token == ASSIGNsymbol && p->argc== 2){				/* copy the rows property */				c1 = getVarRows(getArg(p,1));				if( c1 != NULL){					fixPropertySet(mb, getArg(p,0));					setProperty(getProps(mb,getArg(p,0)),"rows","=", TYPE_int,c1);				}		}	}	return 1;}@include optimizerWrapper.mx@h@:exportOptimizer(costModel)@@-The costModel does not change the program properties,which means we can skip the expensive defensive tests.@c@:wrapOptimizer(costModel,0)@@}
opt_costmodel.mx - 源码说明

本页面展示了「一个内存数据库的源代码这是服务器端还有客户端」中的 opt_costmodel.mx 源码文件，采用 MX 编程语言编写，共 272 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与内存数据库相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?