📄 opt_costmodel.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f opt_costModel@a M. L. Kersten@- Costmodel ApproachCost models form the basis for many optimization decisions.The cost parameters are typically the size of the (intermediate)results and response time. Alternatively, they are runningaggregates, e.g. max memory and total execution time,obtained from a simulated run.The current implementation contains a framework and an example forbuilding your own cost-based optimized.The @sc{optimizer.costModel()} works its way through a MAL program in search forrelational operators and estimates their result size. The estimated size is left behind as the property @sc{rows}.@verbatim r{rows=100} := bat.new(:oid,:int); s{rows=1000}:= bat.new(:oid,:int); rs:= algebra.select(s,1,1); rr:= bat.reverse(r); j:= algebra.join(rs,rr); optimizer.costModel();@end verbatimchanges the properties of the instructions as follows:@verbatim r{rows=100} := bat.new(:oid,:int); s{rows=1000} := bat.new(:oid,:int); rs{rows=501} := algebra.select(s,1,1); rr{rows=100} := bat.reverse(r); j{rows=100} := algebra.join(rs,rr);@end verbatimThe cost estimation does not use any statistics on the actualdata distribution yet. It relies on the @sc{rows}property provided by the front-end or other optimizers.It just applies a few heuristic cost estimators.However, it ensures that empty results are only taggedwith @sc{rows=0} if the estimate is accurate, otherwiseit assumes at least one result row. This property makesit possible to safely pass the result of the cost estimationto the @sc{emptySet} optimizer for code squeezing.@{@+ Implementation sectionTo work in conjunction with the emptySet optimizer, we shouldassure that we don't estimate empty sets. The row count mayonly become zero if you are absolutely sure.@malpattern optimizer.costModel():straddress OPTcostModel;pattern optimizer.costModel(mod:str, fcn:str):straddress OPTcostModelcomment "Estimate the cost of a relational expression";@h#ifndef _OPT_COSTMODEL_H_#define _OPT_COSTMODEL_H_/* #define DEBUG_OPT_COSTMODEL*/#include "mal.h"#include <math.h>#include "mal_interpreter.h"#include "mal_properties.h"#include "opt_support.h"#include "opt_prelude.h"#endif /* _OPT_COSTMODEL_H_ */@-@c#include "mal_config.h"#include "opt_costModel.h"static voidfixPropertySet(MalBlkPtr mb, int dst){ if (mb->var[dst]->props == 0) mb->var[dst]->props = newPropertySet();}#define getVarRows(Y) (int*)getPropertyValue(mb->var[Y]->props,"rows")#define getVarSize(Y) (int*)getPropertyValue(mb->var[Y]->props,"size")@-The cost formula are repetative@= newRows{ c1 = getVarRows(getArg(p,@1)); c2 = getVarRows(getArg(p,@2)); if( c1 == NULL) continue; if( c2 == NULL) continue; k= (@3); fixPropertySet(mb, getArg(p,@4)); setProperty(getProps(mb,getArg(p,@4)),"rows","=", TYPE_int, &k);#ifdef DEBUG_OPT_COSTMODEL stream_printf(GDKout,"COST of @1 @2 into @4: %d\n",k); printInstruction(GDKout,mb,p,0);#endif}@= newRows1 @:newRows(@1,@1,@2,0)@@= newRows2 @:newRows(@1,@2,@3,0)@@-SQL specific back propagation of table size may be needed to avoidthe empty-set optimizer to through away a BAT we need.@cstatic voidOPTbackpropagate(MalBlkPtr mb, int i, int idx){ int *rows; InstrPtr p; for( ; i > 0; i--){ p= getInstrPtr(mb,i); if( getFunctionId(p) == setWriteModeRef){ rows= getVarRows(getArg(p,1)); if( rows == NULL || *rows) continue; *rows = *getVarRows(idx); } }}@-The cost will be used in many places to make decisions.Access should be fast.The SQL front-end also makes the BAT index available as theproperty bid. This can be used to access the BAT and involvemore properties into the decision procedure.[to be done]Also make sure you don't re-use variables, because then therow count becomes non-deterministic.@cstatic intOPTcostModelImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ int i, *c1, *c2, k; InstrPtr p; str sortrevRef= putName("sortReverse",11); str sortrevTailRef= putName("sortReverseTail",15); str projectRef= putName("project",7); (void) stk; (void) pci; for (i = 0; i < mb->stop; i++) { p = getInstrPtr(mb, i); if (getModuleId(p)==algebraRef) { if (getFunctionId(p) == markTRef || getFunctionId(p) == markHRef || getFunctionId(p) == selectNotNilRef || getFunctionId(p) == sortRef || getFunctionId(p) == sortTailRef || getFunctionId(p) == sortrevRef || getFunctionId(p) == sortrevTailRef || getFunctionId(p) == projectRef ){ @:newRows1(1, *c1)@ } else if( getFunctionId(p) == unionRef || getFunctionId(p) == kunionRef) { @:newRows2(1,2, *c1+*c2,1)@ } else if (getFunctionId(p)== kdifferenceRef) { @:newRows2(1, 2, *c1==0? 0: *c2==0? *c1: *c1 - *c2 < 0 ? 1 : *c1 - *c2+1)@ } else if (getFunctionId(p) == joinRef ) { /* assume 1-1 joins */ @:newRows2(1, 2, *c1 < *c2 ? *c1 : *c2)@ } else if (getFunctionId(p) == semijoinRef ) { /* assume 1-1 semijoins */ @:newRows2(1, 2, *c1 < *c2 ? *c1 : *c2)@ } else if (getFunctionId(p) == selectRef || getFunctionId(p) == uselectRef) { @:newRows1(1, *c1 > 100 ? *c1 / 2 +1: *c1)@ } else if (getFunctionId(p) == crossRef) { @:newRows2(1, 2, (log((double) *c1) + log((double) *c2) > log(INT_MAX) ? INT_MAX : *c1 * *c2 +1))@ } else if (getFunctionId(p) == tuniqueRef ) { @:newRows1(1, *c1 < 50 ? *c1 : *c1 / 10+1)@ } } else if (getModuleId(p) == batcalcRef) { if( isaBatType(getArgType(mb,p,1)) ) @:newRows1(1, *c1)@ else @:newRows1(2, *c2)@ } else if (getModuleId(p) == batRef) { if (getFunctionId(p) == reverseRef || getFunctionId(p) == setWriteModeRef || getFunctionId(p) == hashRef || getFunctionId(p) == mirrorRef) { @:newRows1(1, *c1)@ } else if (getFunctionId(p) == appendRef || getFunctionId(p) == insertRef ){@-Updates are a little more complicated, because you have topropagate changes in the expected size up the expression tree.For example, the SQL snippet: _49:bat[:oid,:oid]{rows=0,bid=622} := sql.bind_dbat("sys","example",3); _54 := bat.setWriteMode(_49); bat.append(_54,_47,true);shows what is produced when it encounters a deletion. If a non-emptyappend is not properly passed back to _49, the emptySet optimizer might remove the complete deletion code.The same holds for replacement operations, which add information toan initially empty insertion BAT.@c if( isaBatType(getArgType(mb,p,2)) ){ /* insert BAT */ @:newRows(1,2, *c1 + *c2+1,1)@ OPTbackpropagate(mb,i,getArg(p,1)); } else { /* insert scalars */ @:newRows(1,1, *c1 +1,1)@ OPTbackpropagate(mb,i,getArg(p,1)); } } else if (getFunctionId(p) == deleteRef){ if( isaBatType(getArgType(mb,p,2)) ){ /* delete BAT */ @:newRows(1,2, (*c1 - *c2 ==0? 1: *c1-*c2),1)@ OPTbackpropagate(mb,i,getArg(p,1)); } else { /* insert scalars */ @:newRows(1,1, *c1==1?1: *c1-1,1)@ OPTbackpropagate(mb,i,getArg(p,1)); } OPTbackpropagate(mb,i,getArg(p,1)); } else if ( getFunctionId(p) == insertRef){ @:newRows1(1, *c1 + 1)@ /* faked */ OPTbackpropagate(mb,i,getArg(p,1)); } } else if (getModuleId(p)==groupRef) { if (getFunctionId(p) ==newRef) { @:newRows1(1, *c1 / 10+1)@ } else { @:newRows1(1, *c1)@ } } else if (getModuleId(p)== aggrRef) { if (getFunctionId(p) == sumRef || getFunctionId(p) == minRef || getFunctionId(p) == maxRef || getFunctionId(p) == avgRef) { @:newRows1(1, *c1?*c1:*c1+1)@ } else if (getFunctionId(p) == countRef){ @:newRows1(1, 1)@ } } else if( p->token == ASSIGNsymbol && p->argc== 2){ /* copy the rows property */ c1 = getVarRows(getArg(p,1)); if( c1 != NULL){ fixPropertySet(mb, getArg(p,0)); setProperty(getProps(mb,getArg(p,0)),"rows","=", TYPE_int,c1); } } } return 1;}@include optimizerWrapper.mx@h@:exportOptimizer(costModel)@@-The costModel does not change the program properties,which means we can skip the expensive defensive tests.@c@:wrapOptimizer(costModel,0)@@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -