📄 opt_emptyset.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f opt_emptySet@a M. Kersten@- Emptyset ReductionOne of the key decisions during MAL optimization is to estimatethe size of the BATs produced and consumed. Two cases are of interestfor symbolic processing. Namely when a BAT is known to contain notuples and those that have precisely one element.Such information may come from application domain knowledge or as aside effect from symbolic evaluation. It is associated with theprogram under inspection as properties.The empty set property is used by the reduction algorithmpresented here. Any empty set is propagated through the programto arrive at a smaller and therefore faster evaluation.For example, consider the following MAL test @example@verbatim V1 := bat.new(:oid,:int); V7 := bat.new(:oid,:int); V10@verb{ { }rows=0@verb{ } } := bat.new(:int,:oid); V11 := bat.reverse(V10); V12 := algebra.kdifference(V7,V11); V16 := algebra.markH(V12); V17 := algebra.join(V16,V7); bat.append(V1,V17); optimizer.costModel(); optimizer.emptySet();@end verbatim@end exampleCalling the optimizers replaces this programby the following code snippet. @example V1 := bat.new(:oid,:int); V7 := bat.new(:oid,:int); V10@verb{ { }rows=0@verb{ } } := bat.new(:int,:oid); V11@verb{ { }rows=0@verb{ } } := bat.new(:oid,:int); V12 := V7; V16 := algebra.markH(V12); V17 := algebra.join(V16,V7); bat.append(V1,V17);@end exampleThis block can be further optimized using alias propagationand dead code removal. The final block becomes:@example V1 := bat.new(:oid,:int); V7 := bat.new(:oid,:int); V16 := algebra.markH(V7); V17 := algebra.join(V16,V7); bat.append(V1,V17);@end exampleDuring empty set propagation, new candidates may appear. For example,taking the intersection with an empty set creates a target variablethat is empty too. It becomes an immediate target for optimization.The current implementation is conservative. A limited set ofinstructions is considered, geared at SQL.Any addition to the MonetDB instructionset would call for assessment on their effect.@{The implementation differs slightly from other optimizers in thatwe need first to collect the BAT variables that are designated as being empty@malpattern optimizer.emptySet():straddress OPTemptySet;pattern optimizer.emptySet(mod:str, fcn:str):straddress OPTemptySetcomment "Symbolic evaluation of empty BAT expressions";@h#ifndef _MAL_EMPTYSET_#define _MAL_EMPTYSET_#include "opt_prelude.h"#include "opt_support.h"/* #define DEBUG_OPT_EMPTYSET show partial result */#endif@c#include "mal_config.h"#include "opt_emptySet.h"#include "opt_aliases.h"#include "opt_deadcode.h"#include "mal_interpreter.h" /* for showErrors() */#include "mal_builder.h"#define propagate(X) {p->token= ASSIGNsymbol; setVarUsed(mb,getArg(p,1),TRUE); \ getArg(p,1)= getArg(p,X); p->argc = 2; p->fcn = 0; actions++;\ p->typechk=TYPE_UNKNOWN; p->fcn= NULL; \ if(getArgType(mb,p,0)== getArgType(mb,p,X)){\ setModuleId(p,NULL); setFunctionId(p,NULL); } \ else {actions++;setModuleId(p,NULL); setFunctionId(p,NULL); alias[getArg(p,0)]= getArg(p,1); } }@- else {actions++;setModuleId(p,algebraRef); setFunctionId(p,identityRef); } }Be aware to handle alias mapping.@cstatic intESevaluate(MalBlkPtr mb, char *empty){ int i, j, actions = 0; InstrPtr p; str likeRef = getName("like", 4); str existRef = getName("exist", 5); str uniqueRef = getName("unique", 6); str suniqueRef = getName("sunique", 7); str kuniqueRef = getName("kunique", 7); str intersectRef = getName("intersect", 9); str sintersectRef = getName("sintersect", 10); str kintersectRef = getName("kintersect", 10); str fragmentRef = getName("fragment", 8); int *alias; int limit = mb->stop, ctop=0; InstrPtr *old = mb->stmt, *constraints; newMalBlkStmt(mb, mb->stop); /* a new statement stack */ constraints= (InstrPtr *) alloca(sizeof(InstrPtr)*limit); alias = (int*) alloca(sizeof(int)*mb->vtop); for(i=0;i<mb->vtop; i++) alias[i]=i; /* Symbolic evaluation of the empty BAT variables */ /* by looking at empty BAT arguments */ for (i = 0; i < limit; i++) { char *f; p = old[i]; pushInstruction(mb,p); if( p->token == ENDsymbol){ for(i++; i<limit; i++) pushInstruction(mb,old[i]); break; } for(j=0; j<p->argc; j++) p->argv[j]= alias[getArg(p,j)];@-The bulk of the intelligence lies in inspecting callingsequences to filter and replace calls with empty arguments.@c f = getFunctionId(p); if (getModuleId(p) == sqlRef && empty[getArg(p,0)] && (f == bindRef || f == bindidxRef || f == binddbatRef)){ InstrPtr q;@-The emptyset assertion is only needed once for relational insertions.We assume here that string constants have been matched already.@c if( f == bindRef) { for( j=ctop-1; j>=0; j--){ q= constraints[j]; if( strcmp(getVarConstant(mb,getArg(q,1)).val.sval, getVarConstant(mb,getArg(p,1)).val.sval)==0 && strcmp(getVarConstant(mb,getArg(q,2)).val.sval, getVarConstant(mb,getArg(p,2)).val.sval)==0 && getVarConstant(mb,getArg(p,4)).val.ival<=2 && /* no updates etc */ getVarConstant(mb,getArg(q,4)).val.ival == getVarConstant(mb,getArg(p,4)).val.ival ) /* don't generate the assertion */ goto ignoreConstraint; } q = newStmt1(mb, constraintsRef, "emptySet"); q = pushArgument(mb, q, getArg(p,0) ); constraints[ctop++]= p; } ignoreConstraint: continue; } for (j = p->retc; j < p->argc; j++) { if (empty[getArg(p, j)]) { /* decode operations */ if (getModuleId(p)== algebraRef) { if (f == existRef) { /* always false */ setModuleId(p, NULL); setFunctionId(p, NULL); p->argc = 1; p->token = ASSIGNsymbol; pushBit(mb, p, FALSE); actions++; break; } if ( f == selectRef || f == tuniqueRef || f == likeRef || f == sortRef || f == sortTailRef || f == sortHTRef || f == sortTHRef || f == uniqueRef || f == suniqueRef || f == kuniqueRef || f == intersectRef || f == semijoinRef || f == sintersectRef || f == kintersectRef || f == fragmentRef ){ /* result is empty */ propagate(1); break; } if ( f == differenceRef || f == kdifferenceRef ) { propagate(1); break; } if ( f == sunionRef || f == kunionRef || f == unionRef) { /* copy non-empty argument */ if( j == 1) { propagate(2); } else { propagate(1); } break; } } } } } GDKfree(old); if( actions) { clrAllTypes(mb); /* force a complete resolve */ }#ifdef DEBUG_OPT_EMPTYSET printf("FINAL STAGE errors=%d\n", mb->errors); printFunction(GDKout, mb, LIST_MAL_ALL);#endif return actions;}@-We first have to find all candidates for empty set removal.They are recognized by an estimated zero row count and theyare not the target of an update.@cintOPTemptySetImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){ char *empty; int i; int *rows; PropertySet ps; empty = (char *) alloca(mb->vsize * sizeof(char)); memset(empty,0,mb->vsize); (void) stk; (void) p;#ifdef DEBUG_OPT_EMPTYSET printf("ESoptimizer called, collect empty sets and process them\n"); printFunction(GDKout, mb, LIST_MAL_ALL);#endif for (i = 0; i < mb->vtop; i++) { ps = getVarProperties(mb, i); if ( (rows= (int*) getPropertyValue(ps, "rows")) && *rows == 0) {#ifdef DEBUG_OPT_EMPTYSET stream_printf(GDKout, "START emptyset optimizer %d", i);#endif empty[i] = 1; } }#ifdef DEBUG_OPT_EMPTYSET stream_printf(GDKout, "\n");#endif return ESevaluate(mb, empty);}@include optimizerWrapper.mx@h@:exportOptimizer(emptySet)@@c@:wrapOptimizer(emptySet,OPT_CHECK_ALL)@@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -