📄 nodehashjoin.c
字号:
hjstate->hj_CurTuple = NULL; /* * Deconstruct the hash clauses into outer and inner argument values, so * that we can evaluate those subexpressions separately. Also make a list * of the hash operator OIDs, in preparation for looking up the hash * functions to use. */ lclauses = NIL; rclauses = NIL; hoperators = NIL; foreach(l, hjstate->hashclauses) { FuncExprState *fstate = (FuncExprState *) lfirst(l); OpExpr *hclause; Assert(IsA(fstate, FuncExprState)); hclause = (OpExpr *) fstate->xprstate.expr; Assert(IsA(hclause, OpExpr)); lclauses = lappend(lclauses, linitial(fstate->args)); rclauses = lappend(rclauses, lsecond(fstate->args)); hoperators = lappend_oid(hoperators, hclause->opno); } hjstate->hj_OuterHashKeys = lclauses; hjstate->hj_InnerHashKeys = rclauses; hjstate->hj_HashOperators = hoperators; /* child Hash node needs to evaluate inner hash keys, too */ ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses; hjstate->js.ps.ps_OuterTupleSlot = NULL; hjstate->js.ps.ps_TupFromTlist = false; hjstate->hj_NeedNewOuter = true; hjstate->hj_MatchedOuter = false; hjstate->hj_OuterNotEmpty = false; return hjstate;}intExecCountSlotsHashJoin(HashJoin *node){ return ExecCountSlotsNode(outerPlan(node)) + ExecCountSlotsNode(innerPlan(node)) + HASHJOIN_NSLOTS;}/* ---------------------------------------------------------------- * ExecEndHashJoin * * clean up routine for HashJoin node * ---------------------------------------------------------------- */voidExecEndHashJoin(HashJoinState *node){ /* * Free hash table */ if (node->hj_HashTable) { ExecHashTableDestroy(node->hj_HashTable); node->hj_HashTable = NULL; } /* * Free the exprcontext */ ExecFreeExprContext(&node->js.ps); /* * clean out the tuple table */ ExecClearTuple(node->js.ps.ps_ResultTupleSlot); ExecClearTuple(node->hj_OuterTupleSlot); ExecClearTuple(node->hj_HashTupleSlot); /* * clean up subtrees */ ExecEndNode(outerPlanState(node)); ExecEndNode(innerPlanState(node));}/* * ExecHashJoinOuterGetTuple * * get the next outer tuple for hashjoin: either by * executing a plan node in the first pass, or from * the temp files for the hashjoin batches. * * Returns a null slot if no more outer tuples. On success, the tuple's * hash value is stored at *hashvalue --- this is either originally computed, * or re-read from the temp file. */static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode, HashJoinState *hjstate, uint32 *hashvalue){ HashJoinTable hashtable = hjstate->hj_HashTable; int curbatch = hashtable->curbatch; TupleTableSlot *slot; if (curbatch == 0) { /* if it is the first pass */ /* * Check to see if first outer tuple was already fetched by * ExecHashJoin() and not used yet. */ slot = hjstate->hj_FirstOuterTupleSlot; if (!TupIsNull(slot)) hjstate->hj_FirstOuterTupleSlot = NULL; else slot = ExecProcNode(outerNode); if (!TupIsNull(slot)) { /* * We have to compute the tuple's hash value. */ ExprContext *econtext = hjstate->js.ps.ps_ExprContext; econtext->ecxt_outertuple = slot; *hashvalue = ExecHashGetHashValue(hashtable, econtext, hjstate->hj_OuterHashKeys); /* remember outer relation is not empty for possible rescan */ hjstate->hj_OuterNotEmpty = true; return slot; } /* * We have just reached the end of the first pass. Try to switch to a * saved batch. */ curbatch = ExecHashJoinNewBatch(hjstate); } /* * Try to read from a temp file. Loop allows us to advance to new batches * as needed. NOTE: nbatch could increase inside ExecHashJoinNewBatch, so * don't try to optimize this loop. */ while (curbatch < hashtable->nbatch) { slot = ExecHashJoinGetSavedTuple(hjstate, hashtable->outerBatchFile[curbatch], hashvalue, hjstate->hj_OuterTupleSlot); if (!TupIsNull(slot)) return slot; curbatch = ExecHashJoinNewBatch(hjstate); } /* Out of batches... */ return NULL;}/* * ExecHashJoinNewBatch * switch to a new hashjoin batch * * Returns the number of the new batch (1..nbatch-1), or nbatch if no more. * We will never return a batch number that has an empty outer batch file. */static intExecHashJoinNewBatch(HashJoinState *hjstate){ HashJoinTable hashtable = hjstate->hj_HashTable; int nbatch; int curbatch; BufFile *innerFile; TupleTableSlot *slot; uint32 hashvalue;start_over: nbatch = hashtable->nbatch; curbatch = hashtable->curbatch; if (curbatch > 0) { /* * We no longer need the previous outer batch file; close it right * away to free disk space. */ if (hashtable->outerBatchFile[curbatch]) BufFileClose(hashtable->outerBatchFile[curbatch]); hashtable->outerBatchFile[curbatch] = NULL; } /* * We can always skip over any batches that are completely empty on both * sides. We can sometimes skip over batches that are empty on only one * side, but there are exceptions: * * 1. In a LEFT JOIN, we have to process outer batches even if the inner * batch is empty. * * 2. If we have increased nbatch since the initial estimate, we have to * scan inner batches since they might contain tuples that need to be * reassigned to later inner batches. * * 3. Similarly, if we have increased nbatch since starting the outer * scan, we have to rescan outer batches in case they contain tuples that * need to be reassigned. */ curbatch++; while (curbatch < nbatch && (hashtable->outerBatchFile[curbatch] == NULL || hashtable->innerBatchFile[curbatch] == NULL)) { if (hashtable->outerBatchFile[curbatch] && hjstate->js.jointype == JOIN_LEFT) break; /* must process due to rule 1 */ if (hashtable->innerBatchFile[curbatch] && nbatch != hashtable->nbatch_original) break; /* must process due to rule 2 */ if (hashtable->outerBatchFile[curbatch] && nbatch != hashtable->nbatch_outstart) break; /* must process due to rule 3 */ /* We can ignore this batch. */ /* Release associated temp files right away. */ if (hashtable->innerBatchFile[curbatch]) BufFileClose(hashtable->innerBatchFile[curbatch]); hashtable->innerBatchFile[curbatch] = NULL; if (hashtable->outerBatchFile[curbatch]) BufFileClose(hashtable->outerBatchFile[curbatch]); hashtable->outerBatchFile[curbatch] = NULL; curbatch++; } if (curbatch >= nbatch) return curbatch; /* no more batches */ hashtable->curbatch = curbatch; /* * Reload the hash table with the new inner batch (which could be empty) */ ExecHashTableReset(hashtable); innerFile = hashtable->innerBatchFile[curbatch]; if (innerFile != NULL) { if (BufFileSeek(innerFile, 0, 0L, SEEK_SET)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not rewind hash-join temporary file: %m"))); while ((slot = ExecHashJoinGetSavedTuple(hjstate, innerFile, &hashvalue, hjstate->hj_HashTupleSlot))) { /* * NOTE: some tuples may be sent to future batches. Also, it is * possible for hashtable->nbatch to be increased here! */ ExecHashTableInsert(hashtable, ExecFetchSlotTuple(slot), hashvalue); } /* * after we build the hash table, the inner batch file is no longer * needed */ BufFileClose(innerFile); hashtable->innerBatchFile[curbatch] = NULL; } /* * If there's no outer batch file, advance to next batch. */ if (hashtable->outerBatchFile[curbatch] == NULL) goto start_over; /* * Rewind outer batch file, so that we can start reading it. */ if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not rewind hash-join temporary file: %m"))); return curbatch;}/* * ExecHashJoinSaveTuple * save a tuple to a batch file. * * The data recorded in the file for each tuple is its hash value, * then an image of its HeapTupleData (with meaningless t_data pointer) * followed by the HeapTupleHeader and tuple data. * * Note: it is important always to call this in the regular executor * context, not in a shorter-lived context; else the temp file buffers * will get messed up. */voidExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue, BufFile **fileptr){ BufFile *file = *fileptr; size_t written; if (file == NULL) { /* First write to this batch file, so open it. */ file = BufFileCreateTemp(false); *fileptr = file; } written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32)); if (written != sizeof(uint32)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to hash-join temporary file: %m"))); written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData)); if (written != sizeof(HeapTupleData)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to hash-join temporary file: %m"))); written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len); if (written != (size_t) heapTuple->t_len) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to hash-join temporary file: %m")));}/* * ExecHashJoinGetSavedTuple * read the next tuple from a batch file. Return NULL if no more. * * On success, *hashvalue is set to the tuple's hash value, and the tuple * itself is stored in the given slot. */static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate, BufFile *file, uint32 *hashvalue, TupleTableSlot *tupleSlot){ HeapTupleData htup; size_t nread; HeapTuple heapTuple; nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32)); if (nread == 0) return NULL; /* end of file */ if (nread != sizeof(uint32)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from hash-join temporary file: %m"))); nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData)); if (nread != sizeof(HeapTupleData)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from hash-join temporary file: %m"))); heapTuple = palloc(HEAPTUPLESIZE + htup.t_len); memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData)); heapTuple->t_datamcxt = CurrentMemoryContext; heapTuple->t_data = (HeapTupleHeader) ((char *) heapTuple + HEAPTUPLESIZE); nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len); if (nread != (size_t) htup.t_len) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from hash-join temporary file: %m"))); return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);}voidExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt){ /* * In a multi-batch join, we currently have to do rescans the hard way, * primarily because batch temp files may have already been released. But * if it's a single-batch join, and there is no parameter change for the * inner subnode, then we can just re-use the existing hash table without * rebuilding it. */ if (node->hj_HashTable != NULL) { if (node->hj_HashTable->nbatch == 1 && ((PlanState *) node)->righttree->chgParam == NULL) { /* * okay to reuse the hash table; needn't rescan inner, either. * * What we do need to do is reset our state about the emptiness * of the outer relation, so that the new scan of the outer will * update it correctly if it turns out to be empty this time. * (There's no harm in clearing it now because ExecHashJoin won't * need the info. In the other cases, where the hash table * doesn't exist or we are destroying it, we leave this state * alone because ExecHashJoin will need it the first time * through.) */ node->hj_OuterNotEmpty = false; } else { /* must destroy and rebuild hash table */ ExecHashTableDestroy(node->hj_HashTable); node->hj_HashTable = NULL; /* * if chgParam of subnode is not null then plan will be re-scanned * by first ExecProcNode. */ if (((PlanState *) node)->righttree->chgParam == NULL) ExecReScan(((PlanState *) node)->righttree, exprCtxt); } } /* Always reset intra-tuple state */ node->hj_CurHashValue = 0; node->hj_CurBucketNo = 0; node->hj_CurTuple = NULL; node->js.ps.ps_OuterTupleSlot = NULL; node->js.ps.ps_TupFromTlist = false; node->hj_NeedNewOuter = true; node->hj_MatchedOuter = false; node->hj_FirstOuterTupleSlot = NULL; /* * if chgParam of subnode is not null then plan will be re-scanned by * first ExecProcNode. */ if (((PlanState *) node)->lefttree->chgParam == NULL) ExecReScan(((PlanState *) node)->lefttree, exprCtxt);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -