📄 setoperatornode.java
字号:
/*
Derby - Class org.apache.derby.impl.sql.compile.SetOperatorNode
Copyright 2004 The Apache Software Foundation or its licensors, as applicable.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package org.apache.derby.impl.sql.compile;
import org.apache.derby.iapi.services.sanity.SanityManager;
import org.apache.derby.iapi.error.StandardException;
import org.apache.derby.iapi.sql.compile.AccessPath;
import org.apache.derby.iapi.sql.compile.C_NodeTypes;
import org.apache.derby.iapi.sql.compile.CostEstimate;
import org.apache.derby.iapi.sql.compile.Optimizable;
import org.apache.derby.iapi.sql.compile.OptimizablePredicate;
import org.apache.derby.iapi.sql.compile.OptimizablePredicateList;
import org.apache.derby.iapi.sql.dictionary.DataDictionary;
import org.apache.derby.iapi.sql.dictionary.TableDescriptor;
import org.apache.derby.iapi.reference.SQLState;
import org.apache.derby.iapi.types.DataTypeDescriptor;
import org.apache.derby.iapi.util.JBitSet;
import java.util.HashMap;
/**
* A SetOperatorNode represents a UNION, INTERSECT, or EXCEPT in a DML statement. Binding and optimization
* preprocessing is the same for all of these operations, so they share bind methods in this abstract class.
*
* The class contains a boolean telling whether the operation should eliminate
* duplicate rows.
*
* @author Jeff Lichtman
*/
public abstract class SetOperatorNode extends TableOperatorNode
{
/**
** Tells whether to eliminate duplicate rows. all == TRUE means do
** not eliminate duplicates, all == FALSE means eliminate duplicates.
*/
boolean all;
OrderByList orderByList;
// List of scoped predicates for pushing during optimization.
PredicateList leftOptPredicates;
PredicateList rightOptPredicates;
// List of original (unscoped) predicates that we tried to push
// during the most recent phase of optimization.
PredicateList pushedPredicates;
// Mapping of original predicates to scoped predicates, used to
// avoid re-scoping predicates unnecessarily.
HashMap leftScopedPreds;
HashMap rightScopedPreds;
/**
* Initializer for a SetOperatorNode.
*
* @param leftResult The ResultSetNode on the left side of this union
* @param rightResult The ResultSetNode on the right side of this union
* @param all Whether or not this is an ALL.
* @param tableProperties Properties list associated with the table
*
* @exception StandardException Thrown on error
*/
public void init(
Object leftResult,
Object rightResult,
Object all,
Object tableProperties)
throws StandardException
{
super.init(leftResult, rightResult, tableProperties);
this.all = ((Boolean) all).booleanValue();
/* resultColumns cannot be null, so we make a copy of the left RCL
* for now. At bind() time, we need to recopy the list because there
* may have been a "*" in the list. (We will set the names and
* column types at that time, as expected.)
*/
resultColumns = leftResultSet.getResultColumns().copyListAndObjects();
}
/**
* @see Optimizable#modifyAccessPath
*
* @exception StandardException Thrown on error
*/
public Optimizable modifyAccessPath(JBitSet outerTables,
PredicateList predList) throws StandardException
{
// When we optimized this node we attempted to push predicates down to
// the children, which means the best access path for the children
// might depend on those predicates. So now that we're preparing
// to generate the best paths, we have to push those same predicates
// down again (this is the last time) so that the children can use
// them as appropriate. NOTE: If our final choice for join strategy
// is a hash join, then we do not push the predicates because we'll
// need them to be at this level in order to find out which of them
// is the equijoin predicate that is required by hash join.
if ((predList != null) &&
!getTrulyTheBestAccessPath().getJoinStrategy().isHashJoin())
{
for (int i = predList.size() - 1; i >= 0; i--)
if (pushOptPredicate(predList.getOptPredicate(i)))
predList.removeOptPredicate(i);
}
/*
* It's possible that we tried to push a predicate down to this node's
* children but failed to do so. This can happen if this node's
* children both match the criteria for pushing a predicate (namely,
* they reference base tables) but the children's children do not.
* Ex.
* select * from
* (select i,j from t2 UNION
* values (1,1),(2,2),(3,3),(4,4) UNION
* select i,j from t1
* ) x0 (i,j),
* t5 where x0.i = t5.i;
*
* This will yield a tree resembling the following:
*
* UNION
* / \
* UNION SELECT (T1)
* / \
* SELECT (T2) VALUES
*
* In this case the top UNION ("this") will push the predicate down,
* but the second UNION will _not_ push the predicate because
* it can't be pushed to the VALUES clause. This means that
* after we're done modifying the paths for "this" node (the top
* UNION), the predicate will still be sitting in our leftOptPredicates
* list. If that's the case, then we have to make sure the predicate,
* which was _not_ enforced in the left child, is enforced at this
* level. We do that by generating a ProjectRestrictNode above this
* node. Yes, this means the predicate will actually be applied
* twice to the right child (in this case), but that's okay as it
* won't affect the results.
*/
// Get the cost estimate for this node so that we can put it in
// the new ProjectRestrictNode, if one is needed.
CostEstimate ce = getFinalCostEstimate();
// Modify this node's access paths.
ResultSetNode topNode = (ResultSetNode)modifyAccessPath(outerTables);
// Now see if there are any left over predicates; if so, then we
// have to generate a ProjectRestrictNode.
if (((leftOptPredicates != null) && (leftOptPredicates.size() > 0)) ||
((rightOptPredicates != null) && (rightOptPredicates.size() > 0)))
{
// When we generate the project restrict node, we pass in the
// "pushedPredicates" list because that has the predicates in
// _unscoped_ form, which means they are intended for _this_
// node instead of this node's children. That's exactly what
// we want.
ResultSetNode prnRSN = (ResultSetNode) getNodeFactory().getNode(
C_NodeTypes.PROJECT_RESTRICT_NODE,
topNode, // Child ResultSet
topNode.getResultColumns(), // Projection
null, // Restriction
pushedPredicates, // Restriction as PredicateList
null, // Subquerys in Projection
null, // Subquerys in Restriction
null, // Table properties
getContextManager());
prnRSN.costEstimate = ce.cloneMe();
prnRSN.setReferencedTableMap(topNode.getReferencedTableMap());
topNode = prnRSN;
}
return (Optimizable)topNode;
}
/**
* @see org.apache.derby.iapi.sql.compile.Optimizable#pushOptPredicate
*
* Take a predicate and push it down to both the left AND right result
* sets. Return "true" if we successfully pushed it to both sides,
* and "false" otherwise. The assumption is that if we return "true",
* the caller will take the predicate and remove it from its own list
* of predicates to evaluate; if we return false, then the predicate
* will be evaluated at the level of the caller. So returning "false"
* means that the left and right result sets for this node will be fully
* returned, and then the predicate will be evaluated against the
* <set-operator> of those result sets (as of DERBY-805, the only set
* operator calling this method is UnionNode). If we can push the
* predicate down to both children, though, we can evaluate it closer
* to store, which means that each child result set returns only the
* correctly qualified rows, and thus the calling set operator will
* have a smaller result set on which to operate, which can boost
* performance.
*
* That said, if we can't push the predicate to _both_ sides, we don't
* push it at all. The reason is that if we push to one side but not
* to the other, we would have to ask the question of whether we should
* return "true" (meaning that the predicate would be removed from the
* caller's list and thus would _not_ be evaluated at the <set-operator>
* level) or "false" (meaning that the caller would keep the predicate
* and evaluate it at the <set-operator> level). Depending on the query
* in question, both answers could end up returning incorrect results.
*
* For example, if we push it to the right but not to the left, then
* leave it in the caller's list, the optimizer for the caller might
* decide to use the predicate to do a hash join with some outer result
* set (if the predicate is an equijoin predicate). That would result
* in materialization of the calling node and of its children--but since
* we pushed a predicate that depends on the outer table down into the
* right child, materialization of the right child will only return the
* rows that join with the _first_ row of the outer result set, which
* is wrong.
*
* If, on the other hand, we push the predicate to one side and then tell
* the caller to remove it from its list, the side to which we did _not_
* push the predicate could return rows that aren't qualified. Then,
* since the caller removed the predicate from its list, it (the caller)
* will not evaluate the predicate on its own result set--and thus we
* can end up returning rows that we weren't supposed to return.
*
* So all of that said, only push (and return "true") if we think we
* can push the predicate to both sides.
*
* @exception StandardException Thrown on error
*/
public boolean pushOptPredicate(OptimizablePredicate optimizablePredicate)
throws StandardException
{
// This method was added to SetOperatorNode as part of DERBY-805,
// which was only targeted for UnionNodes. So for now, we don't
// do anything if "this" isn't a Union. This check can be removed
// when support for other SetOperators is added.
if (!(this instanceof UnionNode))
return false;
// We only handle certain types of predicates here; if the received
// predicate doesn't qualify, then don't push it.
Predicate pred = (Predicate)optimizablePredicate;
if (!pred.pushableToSubqueries())
return false;
// Check to see if the child nodes reference any base tables; if either
// child does not reference at least one base table, then we don't try
// to push the predicate.
boolean canPush = false;
JBitSet tableNums = new JBitSet(getReferencedTableMap().size());
BaseTableNumbersVisitor btnVis =
new BaseTableNumbersVisitor(tableNums);
// Check the left child.
leftResultSet.accept(btnVis);
canPush = (tableNums.getFirstSetBit() != -1);
/* If we can't push it to _both_ children, then we don't push at all.
* RESOLVE: We can add the ability to push a predicate to one side
* only by putting a ProjectRestrictNode between the union node and
* the child as a place to park the predicate. To make things simple,
* we might want to always put ProjectRestrictNodes under both sides
* of the union during preprocessing (i.e. after binding but before
* optimization). In some cases the extra nodes won't be needed, but
* PRNs (and the corresponding ProjectRestrictResultSets) are cheap.
* Also, we could eliminate unnecessary ProjectRestrictNodes at the
* end of optimization (possibly in modifyAccessPaths()). Until all
* of that is implemented, though, we only push if we can push to
* both sides...
*/
if (!canPush)
return false;
// Check the right child.
tableNums.clearAll();
rightResultSet.accept(btnVis);
canPush = (tableNums.getFirstSetBit() != -1);
if (!canPush)
return false;
BinaryRelationalOperatorNode opNode =
(BinaryRelationalOperatorNode)pred.getAndNode().getLeftOperand();
// Note: we assume we only get here for predicates with col refs on
// both sides; if that ever changes, the following cast will need
// to be updated accordingly.
boolean opWasRemapped =
((ColumnReference)opNode.getLeftOperand()).hasBeenRemapped();
/* If there is a ProjectRestrictNode directly above this node,
* then the predicate in question may have been remapped to this
* SetOperatorNode before we got here (see pushOptPredicate() in
* ProjectRestrictNode). If we leave it mapped when we try to
* get scoped predicates for the left and right result sets, the
* underlying column references of the scoped predicates will
* effectively be doubly-mapped (i.e. mapped more than once), which
* can cause problems at code generation time. So we 1) un-remap
* the predicate here, 2) get the scoped predicates, and then
* 3) remap the predicate again at the end of this method.
*/
RemapCRsVisitor rcrv = null;
if (opWasRemapped)
{
rcrv = new RemapCRsVisitor(false);
pred.getAndNode().accept(rcrv);
}
// Get a list of all of the underlying base tables that this node
// references. We pass this down when scoping so that we can tell
// if the operands are actually supposed to be scoped to _this_
// node's children. Note that in order for the predicate to
// have been pushed this far, at least one of its operands must
// apply to this node--we don't know which one it is, though,
// so we use this tableNums info to figure that out.
tableNums.clearAll();
this.accept(btnVis);
/* What we want to do here is push the predicate to the left/right
* child. That means that we need to find the equivalent column(s)
* in each child.
* Ex:
*
* select * from
* (select i,j from t1 union select i,j from t2) X1,
* (select a,b from t3 union select a,b from t4) X2
* where X1.j = X2.b;
*
* In this example, X1.j maps to "t1" for the left side of the
* union (X1) and "t2" for the right side of the union. So we have
* to get versions of the predicate that are appropriate to each
* side. That's what the call to getPredScopedForResultSet()
* in the following code does.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -