📄 opt_joinpath.mx
字号:
@' The contents of this file are subject to the MonetDB Public License@' Version 1.1 (the "License"); you may not use this file except in@' compliance with the License. You may obtain a copy of the License at@' http://monetdb.cwi.nl/Legal/MonetDBLicense-1.1.html@'@' Software distributed under the License is distributed on an "AS IS"@' basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the@' License for the specific language governing rights and limitations@' under the License.@'@' The Original Code is the MonetDB Database System.@'@' The Initial Developer of the Original Code is CWI.@' Portions created by CWI are Copyright (C) 1997-2007 CWI.@' All Rights Reserved.@f opt_joinpath@- Join PathsThe routine @sc{optimizer.joinPath()}walks through the program looking for join operationsand cascades them into multiple join paths.@verbatim a:= bat.new(:oid,:oid); b:= bat.new(:oid,:oid); c:= bat.new(:oid,:str); j1:= algebra.join(a,b); j2:= algebra.join(j1,c); j3:= algebra.join(b,b); j4:= algebra.join(b,j3);@end verbatimThe result includes the expanded join expressions.The deadcode optimizer should take care of superflouspaths.@verbatim a:= bat.new(:oid,:oid); j1:= algebra.join(a,b); j2:= algebra.joinPath(a,b,c); j3:= algebra.join(b,b); j4:= algebra.joinPath(b,b,b);@end verbatim@{@malpattern optimizer.joinPath():straddress OPTjoinPath;pattern optimizer.joinPath(mod:str, fcn:str):straddress OPTjoinPathcomment "Join path constructor";pattern algebra.joinPath(l:bat[:any,:any]...):bat[:any,:any]address ALGjoinPathcomment "internal routine to handle join paths. The type analysis is rather tricky.";@h#ifndef _OPT_JOINPATH_#define _OPT_JOINPATH_#include "opt_prelude.h"#include "opt_support.h"#include "mal_interpreter.h"/* #define DEBUG_OPT_JOINPATH */#endif@-@c#include "mal_config.h"#include "opt_joinpath.h"static intOPTjoinPathImplementation(MalBlkPtr mb, MalStkPtr stk, InstrPtr p){ int i,j,k, actions=0; int *pc, *used; str joinPathRef = putName("joinPath",8); InstrPtr q,r; (void) stk; pc= (int*) alloca(sizeof(int)* mb->vtop); /* to find last assignment */ memset((char*) pc, 0, sizeof(int)* mb->vtop); used= (int*) alloca(sizeof(int)* mb->vtop); /* to find last assignment */ memset((char*) used, 0, sizeof(int)* mb->vtop); for (i = 1; i < mb->stop; i++){ p= getInstrPtr(mb,i); for(j=0; j< p->retc; j++) pc[getArg(p,j)]= i; if( getModuleId(p)== algebraRef && getFunctionId(p)== joinRef ){@-Try to expand each of its argument list@c q= copyInstruction(p); q->argc=1; for(j=p->retc; j<p->argc; j++) if( pc[getArg(p,j)] && used[getArg(p,j)]==0 ){ r= getInstrPtr(mb,pc[getArg(p,j)]);#ifdef DEBUG_OPT_JOINPATH stream_printf(GDKout,"expand with \n"); printInstruction(GDKout,mb, r, LIST_MAL_ALL);#endif if( getModuleId(r)== algebraRef && ( getFunctionId(r)== joinRef || getFunctionId(r)== joinPathRef) ){ for(k= r->retc; k<r->argc; k++) q= pushArgument(mb,q,getArg(r,k)); } else q= pushArgument(mb,q,getArg(p,j)); }#ifdef DEBUG_OPT_JOINPATH stream_printf(GDKout,"new joinPath instruction\n"); printInstruction(GDKout,mb, q, LIST_MAL_ALL);#endif if(q->argc<= p->argc){ /* no change */ nochange: freeInstruction(q); } else {@-Final type check.@c for(j=1; j<q->argc-1; j++) if( getTailType(getArgType(mb,q,j)) != getHeadType(getArgType(mb,q,j+1)) && !( getTailType(getArgType(mb,q,j))== TYPE_oid && getHeadType(getArgType(mb,q,j))== TYPE_void) && !( getTailType(getArgType(mb,q,j))== TYPE_void && getHeadType(getArgType(mb,q,j))== TYPE_oid) ) goto nochange; setFunctionId(q,joinPathRef); getInstrPtr(mb,i)=q; freeInstruction(p); p=q; actions++; } } /* remember its latest use for(j=p->retc; j<p->argc; j++) used[getArg(p,j)]= i; */ } return actions;}@include optimizerWrapper.mx@h@:exportOptimizer(joinPath)@opt_export str ALGjoinPath(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);@c@:wrapOptimizer(joinPath,OPT_CHECK_ALL)@@-The join path optimizer takes a join sequence andattempts to minimize the intermediate result.The choice depends on a good estimate of intermediateresults.For the time being, we use a simplistic model, basedon the assumption that most joins are foreign key joins anyway.@csize_tALGjoinCost(BAT *l, BAT *r){ size_t lc, rc; lc = BATcount(l); rc = BATcount(r); if( l->ttype== TYPE_oid || r->htype== TYPE_oid ) return MIN(lc,rc); return lc * rc;}@-The join path type analysis should also be done at run time,because the expressive power of MAL is insufficient toenforce a proper join type list.@cstrALGjoinPath(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ BAT *joins[MAXARG], *tmp[MAXARG], *b = 0; size_t estimate, e; int fnd, top, ttop, i, j, ret = 0, *bid; (void) mb; (void) ret; ttop= top = 0;#ifdef DEBUG_OPT_JOINPATH printInstruction(GDKout,mb,pci,LIST_MAL_ALL);#endif for (i = pci->retc; i < pci->argc; i++) { bid = (int *) getArgReference(stk, pci, i);#ifdef DEBUG_OPT_JOINPATH printf("bid %d\n", *bid);#endif if ((b = BATdescriptor(*bid)) == NULL) { for( --top; top>=0; top--) BBPreleaseref(joins[top]->batCacheid); throw(MAL, "algebra.joinPath", "Cannot access descriptor"); } joins[top++] = b; } /* solve the join by pairing the smallest first */ while (top > 2) { j = 0; estimate = ALGjoinCost(joins[0],joins[1]);#ifdef DEBUG_OPT_JOINPATH printf("estimate join(%d,%d) %d\n", joins[0]->batCacheid, joins[1]->batCacheid,(int)estimate);#endif for (i = 1; i < top - 1; i++) { e = ALGjoinCost(joins[i], joins[i + 1]);#ifdef DEBUG_OPT_JOINPATH printf("estimate join(%d,%d) %d\n", joins[i]->batCacheid, joins[i+1]->batCacheid,(int)e);#endif if (e < estimate) { estimate = e; j = i; } }@-BEWARE. you may not use a size estimation, because itmay fire a BATproperty check in a few cases.@c b = BATjoin(joins[j], joins[j + 1], oid_nil);#ifdef DEBUG_OPT_JOINPATH printf("%d:= join(%d,%d)\n", b->batCacheid, joins[j]->batCacheid, joins[j + 1]->batCacheid);#endif assert(b);@-The new BAT is not part of the safe/restore list and wemay have to remove it when it is not the final result@= releaseTmp fnd=0; for(i=0; i< ttop; i++) if(tmp[i] && tmp[i] == joins[@1]){ /* stream_printf(GDKout,"release %s\n",BBP_logical(tmp[i]->batCacheid));*/ BBPdecref(tmp[i]->batCacheid, FALSE); tmp[i]=0; fnd++; } if( fnd == 0) BBPreleaseref(joins[@1]->batCacheid);@c @:releaseTmp(j)@ @:releaseTmp(j+1)@ joins[j] = b; tmp[ttop++]= b; top--; for (i = j + 1; i < top; i++) joins[i] = joins[i + 1]; }#ifdef DEBUG_OPT_JOINPATH printf("final join %d and %d\n", joins[0]->batCacheid, joins[1]->batCacheid);#endif b = BATjoin(joins[0], joins[1], oid_nil); @:releaseTmp(0)@ @:releaseTmp(1)@ assert(b); BBPkeepref(b->batCacheid); *(int *) getArgReference(stk, pci, 0) = b->batCacheid; return MAL_SUCCEED;}@}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -