📄 tuplestore.c
字号:
/*------------------------------------------------------------------------- * * tuplestore.c * Generalized routines for temporary tuple storage. * * This module handles temporary storage of tuples for purposes such * as Materialize nodes, hashjoin batch files, etc. It is essentially * a dumbed-down version of tuplesort.c; it does no sorting of tuples * but can only store and regurgitate a sequence of tuples. However, * because no sort is required, it is allowed to start reading the sequence * before it has all been written. This is particularly useful for cursors, * because it allows random access within the already-scanned portion of * a query without having to process the underlying scan to completion. * A temporary file is used to handle the data if it exceeds the * space limit specified by the caller. * * The (approximate) amount of memory allowed to the tuplestore is specified * in kilobytes by the caller. We absorb tuples and simply store them in an * in-memory array as long as we haven't exceeded maxKBytes. If we do exceed * maxKBytes, we dump all the tuples into a temp file and then read from that * when needed. * * When the caller requests random access to the data, we write the temp file * in a format that allows either forward or backward scan. Otherwise, only * forward scan is allowed. But rewind and markpos/restorepos are allowed * in any case. * * Because we allow reading before writing is complete, there are two * interesting positions in the temp file: the current read position and * the current write position. At any given instant, the temp file's seek * position corresponds to one of these, and the other one is remembered in * the Tuplestore's state. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/sort/tuplestore.c,v 1.23.2.1 2005/11/22 18:23:25 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/heapam.h"#include "storage/buffile.h"#include "utils/memutils.h"#include "utils/tuplestore.h"/* * Possible states of a Tuplestore object. These denote the states that * persist between calls of Tuplestore routines. */typedef enum{ TSS_INMEM, /* Tuples still fit in memory */ TSS_WRITEFILE, /* Writing to temp file */ TSS_READFILE /* Reading from temp file */} TupStoreStatus;/* * Private state of a Tuplestore operation. */struct Tuplestorestate{ TupStoreStatus status; /* enumerated value as shown above */ bool randomAccess; /* did caller request random access? */ bool interXact; /* keep open through transactions? */ long availMem; /* remaining memory available, in bytes */ BufFile *myfile; /* underlying file, or NULL if none */ /* * These function pointers decouple the routines that must know what kind * of tuple we are handling from the routines that don't need to know it. * They are set up by the tuplestore_begin_xxx routines. * * (Although tuplestore.c currently only supports heap tuples, I've copied * this part of tuplesort.c so that extension to other kinds of objects * will be easy if it's ever needed.) * * Function to copy a supplied input tuple into palloc'd space. (NB: we * assume that a single pfree() is enough to release the tuple later, so * the representation must be "flat" in one palloc chunk.) state->availMem * must be decreased by the amount of space used. */ void *(*copytup) (Tuplestorestate *state, void *tup); /* * Function to write a stored tuple onto tape. The representation of the * tuple on tape need not be the same as it is in memory; requirements on * the tape representation are given below. After writing the tuple, * pfree() it, and increase state->availMem by the amount of memory space * thereby released. */ void (*writetup) (Tuplestorestate *state, void *tup); /* * Function to read a stored tuple from tape back into memory. 'len' is * the already-read length of the stored tuple. Create and return a * palloc'd copy, and decrease state->availMem by the amount of memory * space consumed. */ void *(*readtup) (Tuplestorestate *state, unsigned int len); /* * This array holds pointers to tuples in memory if we are in state INMEM. * In states WRITEFILE and READFILE it's not used. */ void **memtuples; /* array of pointers to palloc'd tuples */ int memtupcount; /* number of tuples currently present */ int memtupsize; /* allocated length of memtuples array */ /* * These variables are used to keep track of the current position. * * In state WRITEFILE, the current file seek position is the write point, * and the read position is remembered in readpos_xxx; in state READFILE, * the current file seek position is the read point, and the write * position is remembered in writepos_xxx. (The write position is the * same as EOF, but since BufFileSeek doesn't currently implement * SEEK_END, we have to remember it explicitly.) * * Special case: if we are in WRITEFILE state and eof_reached is true, * then the read position is implicitly equal to the write position (and * hence to the file seek position); this way we need not update the * readpos_xxx variables on each write. */ bool eof_reached; /* read reached EOF (always valid) */ int current; /* next array index (valid if INMEM) */ int readpos_file; /* file# (valid if WRITEFILE and not eof) */ long readpos_offset; /* offset (valid if WRITEFILE and not eof) */ int writepos_file; /* file# (valid if READFILE) */ long writepos_offset; /* offset (valid if READFILE) */ /* markpos_xxx holds marked position for mark and restore */ int markpos_current; /* saved "current" */ int markpos_file; /* saved "readpos_file" */ long markpos_offset; /* saved "readpos_offset" */};#define COPYTUP(state,tup) ((*(state)->copytup) (state, tup))#define WRITETUP(state,tup) ((*(state)->writetup) (state, tup))#define READTUP(state,len) ((*(state)->readtup) (state, len))#define LACKMEM(state) ((state)->availMem < 0)#define USEMEM(state,amt) ((state)->availMem -= (amt))#define FREEMEM(state,amt) ((state)->availMem += (amt))/*-------------------- * * NOTES about on-tape representation of tuples: * * We require the first "unsigned int" of a stored tuple to be the total size * on-tape of the tuple, including itself (so it is never zero). * The remainder of the stored tuple * may or may not match the in-memory representation of the tuple --- * any conversion needed is the job of the writetup and readtup routines. * * If state->randomAccess is true, then the stored representation of the * tuple must be followed by another "unsigned int" that is a copy of the * length --- so the total tape space used is actually sizeof(unsigned int) * more than the stored length value. This allows read-backwards. When * randomAccess is not true, the write/read routines may omit the extra * length word. * * writetup is expected to write both length words as well as the tuple * data. When readtup is called, the tape is positioned just after the * front length word; readtup must read the tuple data and advance past * the back length word (if present). * * The write/read routines can make use of the tuple description data * stored in the Tuplestorestate record, if needed. They are also expected * to adjust state->availMem by the amount of memory space (not tape space!) * released or consumed. There is no error return from either writetup * or readtup; they should ereport() on failure. * * * NOTES about memory consumption calculations: * * We count space allocated for tuples against the maxKBytes limit, * plus the space used by the variable-size array memtuples. * Fixed-size space (primarily the BufFile I/O buffer) is not counted. * * Note that we count actual space used (as shown by GetMemoryChunkSpace) * rather than the originally-requested size. This is important since * palloc can add substantial overhead. It's not a complete answer since * we won't count any wasted space in palloc allocation blocks, but it's * a lot better than what we were doing before 7.3. * *-------------------- */static Tuplestorestate *tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes);static void dumptuples(Tuplestorestate *state);static unsigned int getlen(Tuplestorestate *state, bool eofOK);static void *copytup_heap(Tuplestorestate *state, void *tup);static void writetup_heap(Tuplestorestate *state, void *tup);static void *readtup_heap(Tuplestorestate *state, unsigned int len);/* * tuplestore_begin_xxx * * Initialize for a tuple store operation. */static Tuplestorestate *tuplestore_begin_common(bool randomAccess, bool interXact, int maxKBytes){ Tuplestorestate *state; state = (Tuplestorestate *) palloc0(sizeof(Tuplestorestate)); state->status = TSS_INMEM; state->randomAccess = randomAccess; state->interXact = interXact; state->availMem = maxKBytes * 1024L; state->myfile = NULL; state->memtupcount = 0; state->memtupsize = 1024; /* initial guess */ state->memtuples = (void **) palloc(state->memtupsize * sizeof(void *)); USEMEM(state, GetMemoryChunkSpace(state->memtuples)); state->eof_reached = false; state->current = 0; return state;}/* * tuplestore_begin_heap * * Create a new tuplestore; other types of tuple stores (other than * "heap" tuple stores, for heap tuples) are possible, but not presently * implemented. * * randomAccess: if true, both forward and backward accesses to the * tuple store are allowed. * * interXact: if true, the files used for on-disk storage persist beyond the * end of the current transaction. NOTE: It's the caller's responsibility to * create such a tuplestore in a memory context that will also survive * transaction boundaries, and to ensure the tuplestore is closed when it's * no longer wanted. * * maxKBytes: how much data to store in memory (any data beyond this * amount is paged to disk). When in doubt, use work_mem. */Tuplestorestate *tuplestore_begin_heap(bool randomAccess, bool interXact, int maxKBytes){ Tuplestorestate *state = tuplestore_begin_common(randomAccess, interXact, maxKBytes); state->copytup = copytup_heap; state->writetup = writetup_heap; state->readtup = readtup_heap; return state;}/* * tuplestore_end * * Release resources and clean up. */voidtuplestore_end(Tuplestorestate *state){ int i; if (state->myfile) BufFileClose(state->myfile); if (state->memtuples) { for (i = 0; i < state->memtupcount; i++) pfree(state->memtuples[i]); pfree(state->memtuples); }}/* * tuplestore_ateof * * Returns the current eof_reached state. */booltuplestore_ateof(Tuplestorestate *state){ return state->eof_reached;}/* * Accept one tuple and append it to the tuplestore. * * Note that the input tuple is always copied; the caller need not save it. * * If the read status is currently "AT EOF" then it remains so (the read * pointer advances along with the write pointer); otherwise the read * pointer is unchanged. This is for the convenience of nodeMaterial.c. */voidtuplestore_puttuple(Tuplestorestate *state, void *tuple){ /* * Copy the tuple. (Must do this even in WRITEFILE case.) */ tuple = COPYTUP(state, tuple); switch (state->status) { case TSS_INMEM: /* Grow the array as needed */ if (state->memtupcount >= state->memtupsize) { FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); state->memtupsize *= 2; state->memtuples = (void **) repalloc(state->memtuples, state->memtupsize * sizeof(void *)); USEMEM(state, GetMemoryChunkSpace(state->memtuples)); } /* Stash the tuple in the in-memory array */ state->memtuples[state->memtupcount++] = tuple; /* If eof_reached, keep read position in sync */ if (state->eof_reached) state->current = state->memtupcount; /* * Done if we still fit in available memory. */ if (!LACKMEM(state)) return; /* * Nope; time to switch to tape-based operation. */ state->myfile = BufFileCreateTemp(state->interXact); state->status = TSS_WRITEFILE; dumptuples(state); break; case TSS_WRITEFILE: WRITETUP(state, tuple); break; case TSS_READFILE: /* * Switch from reading to writing. */ if (!state->eof_reached) BufFileTell(state->myfile, &state->readpos_file, &state->readpos_offset); if (BufFileSeek(state->myfile,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -