📄 nodehash.c
字号:
/*------------------------------------------------------------------------- * * nodeHash.c * Routines to hash relations for hashjoin * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.79 2003/08/04 02:39:59 momjian Exp $ * *------------------------------------------------------------------------- *//* * INTERFACE ROUTINES * ExecHash - generate an in-memory hash table of the relation * ExecInitHash - initialize node and subnodes * ExecEndHash - shutdown node and subnodes */#include "postgres.h"#include "executor/execdebug.h"#include "executor/nodeHash.h"#include "executor/nodeHashjoin.h"#include "miscadmin.h"#include "parser/parse_expr.h"#include "utils/memutils.h"#include "utils/lsyscache.h"/* ---------------------------------------------------------------- * ExecHash * * build hash table for hashjoin, all do partitioning if more * than one batches are required. * ---------------------------------------------------------------- */TupleTableSlot *ExecHash(HashState *node){ EState *estate; PlanState *outerNode; List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; int nbatch; int i; /* * get state info from node */ estate = node->ps.state; outerNode = outerPlanState(node); hashtable = node->hashtable; nbatch = hashtable->nbatch; if (nbatch > 0) { /* * Open temp files for inner batches, if needed. Note that file * buffers are palloc'd in regular executor context. */ for (i = 0; i < nbatch; i++) hashtable->innerBatchFile[i] = BufFileCreateTemp(false); } /* * set expression context */ hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* * get all inner tuples and insert into the hash table (or temp files) */ for (;;) { slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; econtext->ecxt_innertuple = slot; ExecHashTableInsert(hashtable, econtext, hashkeys); ExecClearTuple(slot); } /* * Return the slot so that we have the tuple descriptor when we need * to save/restore them. -Jeff 11 July 1991 */ return slot;}/* ---------------------------------------------------------------- * ExecInitHash * * Init routine for Hash node * ---------------------------------------------------------------- */HashState *ExecInitHash(Hash *node, EState *estate){ HashState *hashstate; SO_printf("ExecInitHash: initializing hash node\n"); /* * create state structure */ hashstate = makeNode(HashState); hashstate->ps.plan = (Plan *) node; hashstate->ps.state = estate; hashstate->hashtable = NULL; /* * Miscellaneous initialization * * create expression context for node */ ExecAssignExprContext(estate, &hashstate->ps);#define HASH_NSLOTS 1 /* * initialize our result slot */ ExecInitResultTupleSlot(estate, &hashstate->ps); /* * initialize child expressions */ hashstate->ps.targetlist = (List *) ExecInitExpr((Expr *) node->plan.targetlist, (PlanState *) hashstate); hashstate->ps.qual = (List *) ExecInitExpr((Expr *) node->plan.qual, (PlanState *) hashstate); /* * initialize child nodes */ outerPlanState(hashstate) = ExecInitNode(outerPlan(node), estate); /* * initialize tuple type. no need to initialize projection info * because this node doesn't do projections */ ExecAssignResultTypeFromOuterPlan(&hashstate->ps); hashstate->ps.ps_ProjInfo = NULL; return hashstate;}intExecCountSlotsHash(Hash *node){ return ExecCountSlotsNode(outerPlan(node)) + ExecCountSlotsNode(innerPlan(node)) + HASH_NSLOTS;}/* --------------------------------------------------------------- * ExecEndHash * * clean up routine for Hash node * ---------------------------------------------------------------- */voidExecEndHash(HashState *node){ PlanState *outerPlan; /* * free exprcontext */ ExecFreeExprContext(&node->ps); /* * shut down the subplan */ outerPlan = outerPlanState(node); ExecEndNode(outerPlan);}/* ---------------------------------------------------------------- * ExecHashTableCreate * * create a hashtable in shared memory for hashjoin. * ---------------------------------------------------------------- */HashJoinTableExecHashTableCreate(Hash *node, List *hashOperators){ HashJoinTable hashtable; Plan *outerNode; int totalbuckets; int nbuckets; int nbatch; int nkeys; int i; List *ho; MemoryContext oldcxt; /* * Get information about the size of the relation to be hashed (it's * the "outer" subtree of this node, but the inner relation of the * hashjoin). Compute the appropriate size of the hash table. */ outerNode = outerPlan(node); ExecChooseHashTableSize(outerNode->plan_rows, outerNode->plan_width, &totalbuckets, &nbuckets, &nbatch);#ifdef HJDEBUG printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n", nbatch, totalbuckets, nbuckets);#endif /* * Initialize the hash table control block. * * The hashtable control block is just palloc'd from the executor's * per-query memory context. */ hashtable = (HashJoinTable) palloc(sizeof(HashTableData)); hashtable->nbuckets = nbuckets; hashtable->totalbuckets = totalbuckets; hashtable->buckets = NULL; hashtable->nbatch = nbatch; hashtable->curbatch = 0; hashtable->innerBatchFile = NULL; hashtable->outerBatchFile = NULL; hashtable->innerBatchSize = NULL; hashtable->outerBatchSize = NULL; /* * Get info about the hash functions to be used for each hash key. */ nkeys = length(hashOperators); hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo)); i = 0; foreach(ho, hashOperators) { Oid hashfn; hashfn = get_op_hash_function(lfirsto(ho)); if (!OidIsValid(hashfn)) elog(ERROR, "could not find hash function for hash operator %u", lfirsto(ho)); fmgr_info(hashfn, &hashtable->hashfunctions[i]); i++; } /* * Create temporary memory contexts in which to keep the hashtable * working storage. See notes in executor/hashjoin.h. */ hashtable->hashCxt = AllocSetContextCreate(CurrentMemoryContext, "HashTableContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); hashtable->batchCxt = AllocSetContextCreate(hashtable->hashCxt, "HashBatchContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* Allocate data that will live for the life of the hashjoin */ oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); if (nbatch > 0) { /* * allocate and initialize the file arrays in hashCxt */ hashtable->innerBatchFile = (BufFile **) palloc0(nbatch * sizeof(BufFile *)); hashtable->outerBatchFile = (BufFile **) palloc0(nbatch * sizeof(BufFile *)); hashtable->innerBatchSize = (long *) palloc0(nbatch * sizeof(long)); hashtable->outerBatchSize = (long *) palloc0(nbatch * sizeof(long)); /* The files will not be opened until later... */ } /* * Prepare context for the first-scan space allocations; allocate the * hashbucket array therein, and set each bucket "empty". */ MemoryContextSwitchTo(hashtable->batchCxt); hashtable->buckets = (HashJoinTuple *) palloc0(nbuckets * sizeof(HashJoinTuple)); MemoryContextSwitchTo(oldcxt); return hashtable;}/* * Compute appropriate size for hashtable given the estimated size of the * relation to be hashed (number of rows and average row width). * * Caution: the input is only the planner's estimates, and so can't be * trusted too far. Apply a healthy fudge factor. * * This is exported so that the planner's costsize.c can use it. *//* Target bucket loading (tuples per bucket) */#define NTUP_PER_BUCKET 10/* Fudge factor to allow for inaccuracy of input estimates */#define FUDGE_FAC 2.0voidExecChooseHashTableSize(double ntuples, int tupwidth, int *virtualbuckets, int *physicalbuckets, int *numbatches){ int tupsize; double inner_rel_bytes; long hash_table_bytes; double dtmp; int nbatch; int nbuckets; int totalbuckets; int bucketsize; /* Force a plausible relation size if no info */ if (ntuples <= 0.0) ntuples = 1000.0; /* * Estimate tupsize based on footprint of tuple in hashtable... but * what about palloc overhead? */ tupsize = MAXALIGN(tupwidth) + MAXALIGN(sizeof(HashJoinTupleData)); inner_rel_bytes = ntuples * tupsize * FUDGE_FAC; /*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -