tuplesort.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 1,982 行 · 第 1/5 页
C
1,982 行
* or readtup; they should ereport() on failure. * * * NOTES about memory consumption calculations: * * We count space allocated for tuples against the workMem limit, plus * the space used by the variable-size memtuples array. Fixed-size space * is not counted; it's small enough to not be interesting. * * Note that we count actual space used (as shown by GetMemoryChunkSpace) * rather than the originally-requested size. This is important since * palloc can add substantial overhead. It's not a complete answer since * we won't count any wasted space in palloc allocation blocks, but it's * a lot better than what we were doing before 7.3. */static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess);static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);static void inittapes(Tuplesortstate *state);static void selectnewtape(Tuplesortstate *state);static void mergeruns(Tuplesortstate *state);static void mergeonerun(Tuplesortstate *state);static void beginmerge(Tuplesortstate *state);static void mergepreread(Tuplesortstate *state);static void mergeprereadone(Tuplesortstate *state, int srcTape);static void dumptuples(Tuplesortstate *state, bool alltuples);static void make_bounded_heap(Tuplesortstate *state);static void sort_bounded_heap(Tuplesortstate *state);static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple, int tupleindex, bool checkIndex);static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex);static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK);static void markrunend(Tuplesortstate *state, int tapenum);static int comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state);static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup);static void writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup);static void readtup_heap(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len);static void reversedirection_heap(Tuplesortstate *state);static int comparetup_index(const SortTuple *a, const SortTuple *b, Tuplesortstate *state);static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup);static void writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup);static void readtup_index(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len);static void reversedirection_index(Tuplesortstate *state);static int comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state);static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup);static void writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup);static void readtup_datum(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len);static void reversedirection_datum(Tuplesortstate *state);static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup);/* * tuplesort_begin_xxx * * Initialize for a tuple sort operation. * * After calling tuplesort_begin, the caller should call tuplesort_putXXX * zero or more times, then call tuplesort_performsort when all the tuples * have been supplied. After performsort, retrieve the tuples in sorted * order by calling tuplesort_getXXX until it returns false/NULL. (If random * access was requested, rescan, markpos, and restorepos can also be called.) * Call tuplesort_end to terminate the operation and release memory/disk space. * * Each variant of tuplesort_begin has a workMem parameter specifying the * maximum number of kilobytes of RAM to use before spilling data to disk. * (The normal value of this parameter is work_mem, but some callers use * other values.) Each variant also has a randomAccess parameter specifying * whether the caller needs non-sequential access to the sort result. */static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess){ Tuplesortstate *state; MemoryContext sortcontext; MemoryContext oldcontext; /* * Create a working memory context for this sort operation. All data * needed by the sort will live inside this context. */ sortcontext = AllocSetContextCreate(CurrentMemoryContext, "TupleSort", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * Make the Tuplesortstate within the per-sort context. This way, we * don't need a separate pfree() operation for it at shutdown. */ oldcontext = MemoryContextSwitchTo(sortcontext); state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate));#ifdef TRACE_SORT if (trace_sort) pg_rusage_init(&state->ru_start);#endif state->status = TSS_INITIAL; state->randomAccess = randomAccess; state->bounded = false; state->boundUsed = false; state->allowedMem = workMem * 1024L; state->availMem = state->allowedMem; state->sortcontext = sortcontext; state->tapeset = NULL; state->memtupcount = 0; state->memtupsize = 1024; /* initial guess */ state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); USEMEM(state, GetMemoryChunkSpace(state->memtuples)); /* workMem must be large enough for the minimal memtuples array */ if (LACKMEM(state)) elog(ERROR, "insufficient memory allowed for sort"); state->currentRun = 0; /* * maxTapes, tapeRange, and Algorithm D variables will be initialized by * inittapes(), if needed */ state->result_tape = -1; /* flag that result tape has not been formed */ MemoryContextSwitchTo(oldcontext); return state;}Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, bool *nullsFirstFlags, int workMem, bool randomAccess){ Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); MemoryContext oldcontext; int i; oldcontext = MemoryContextSwitchTo(state->sortcontext); AssertArg(nkeys > 0);#ifdef TRACE_SORT if (trace_sort) elog(LOG, "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", nkeys, workMem, randomAccess ? 't' : 'f');#endif state->nKeys = nkeys; state->comparetup = comparetup_heap; state->copytup = copytup_heap; state->writetup = writetup_heap; state->readtup = readtup_heap; state->reversedirection = reversedirection_heap; state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ state->scanKeys = (ScanKey) palloc0(nkeys * sizeof(ScanKeyData)); for (i = 0; i < nkeys; i++) { Oid sortFunction; bool reverse; AssertArg(attNums[i] != 0); AssertArg(sortOperators[i] != 0); if (!get_compare_function_for_ordering_op(sortOperators[i], &sortFunction, &reverse)) elog(ERROR, "operator %u is not a valid ordering operator", sortOperators[i]); /* * We needn't fill in sk_strategy or sk_subtype since these scankeys * will never be passed to an index. */ ScanKeyInit(&state->scanKeys[i], attNums[i], InvalidStrategy, sortFunction, (Datum) 0); /* However, we use btree's conventions for encoding directionality */ if (reverse) state->scanKeys[i].sk_flags |= SK_BT_DESC; if (nullsFirstFlags[i]) state->scanKeys[i].sk_flags |= SK_BT_NULLS_FIRST; } MemoryContextSwitchTo(oldcontext); return state;}Tuplesortstate *tuplesort_begin_index(Relation indexRel, bool enforceUnique, int workMem, bool randomAccess){ Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->sortcontext);#ifdef TRACE_SORT if (trace_sort) elog(LOG, "begin index sort: unique = %c, workMem = %d, randomAccess = %c", enforceUnique ? 't' : 'f', workMem, randomAccess ? 't' : 'f');#endif state->nKeys = RelationGetNumberOfAttributes(indexRel); state->comparetup = comparetup_index; state->copytup = copytup_index; state->writetup = writetup_index; state->readtup = readtup_index; state->reversedirection = reversedirection_index; state->indexRel = indexRel; /* see comments below about btree dependence of this code... */ state->indexScanKey = _bt_mkscankey_nodata(indexRel); state->enforceUnique = enforceUnique; MemoryContextSwitchTo(oldcontext); return state;}Tuplesortstate *tuplesort_begin_datum(Oid datumType, Oid sortOperator, bool nullsFirstFlag, int workMem, bool randomAccess){ Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); MemoryContext oldcontext; Oid sortFunction; bool reverse; int16 typlen; bool typbyval; oldcontext = MemoryContextSwitchTo(state->sortcontext);#ifdef TRACE_SORT if (trace_sort) elog(LOG, "begin datum sort: workMem = %d, randomAccess = %c", workMem, randomAccess ? 't' : 'f');#endif state->nKeys = 1; /* always a one-column sort */ state->comparetup = comparetup_datum; state->copytup = copytup_datum; state->writetup = writetup_datum; state->readtup = readtup_datum; state->reversedirection = reversedirection_datum; state->datumType = datumType; /* lookup the ordering function */ if (!get_compare_function_for_ordering_op(sortOperator, &sortFunction, &reverse)) elog(ERROR, "operator %u is not a valid ordering operator", sortOperator); fmgr_info(sortFunction, &state->sortOpFn); /* set ordering flags */ state->sortFnFlags = reverse ? SK_BT_DESC : 0; if (nullsFirstFlag) state->sortFnFlags |= SK_BT_NULLS_FIRST; /* lookup necessary attributes of the datum type */ get_typlenbyval(datumType, &typlen, &typbyval); state->datumTypeLen = typlen; state->datumTypeByVal = typbyval; MemoryContextSwitchTo(oldcontext); return state;}/* * tuplesort_set_bound * * Advise tuplesort that at most the first N result tuples are required. * * Must be called before inserting any tuples. (Actually, we could allow it * as long as the sort hasn't spilled to disk, but there seems no need for * delayed calls at the moment.) * * This is a hint only. The tuplesort may still return more tuples than * requested. */voidtuplesort_set_bound(Tuplesortstate *state, int64 bound){ /* Assert we're called before loading any tuples */ Assert(state->status == TSS_INITIAL); Assert(state->memtupcount == 0); Assert(!state->bounded);#ifdef DEBUG_BOUNDED_SORT /* Honor GUC setting that disables the feature (for easy testing) */ if (!optimize_bounded_sort) return;#endif /* We want to be able to compute bound * 2, so limit the setting */ if (bound > (int64) (INT_MAX / 2)) return; state->bounded = true; state->bound = (int) bound;}/* * tuplesort_end * * Release resources and clean up. * * NOTE: after calling this, any pointers returned by tuplesort_getXXX are * pointing to garbage. Be careful not to attempt to use or free such * pointers afterwards! */voidtuplesort_end(Tuplesortstate *state){ /* context swap probably not needed, but let's be safe */ MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);#ifdef TRACE_SORT long spaceUsed; if (state->tapeset) spaceUsed = LogicalTapeSetBlocks(state->tapeset); else spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024;#endif /* * Delete temporary "tape" files, if any. * * Note: want to include this in reported total cost of sort, hence need * for two #ifdef TRACE_SORT sections. */ if (state->tapeset) LogicalTapeSetClose(state->tapeset);#ifdef TRACE_SORT if (trace_sort) { if (state->tapeset) elog(LOG, "external sort ended, %ld disk blocks used: %s", spaceUsed, pg_rusage_show(&state->ru_start)); else elog(LOG, "internal sort ended, %ld KB used: %s", spaceUsed, pg_rusage_show(&state->ru_start)); }#endif MemoryContextSwitchTo(oldcontext); /* * Free the per-sort memory context, thereby releasing all working memory, * including the Tuplesortstate struct itself. */ MemoryContextDelete(state->sortcontext);}/* * Grow the memtuples[] array, if possible within our memory constraint. * Return TRUE if able to enlarge the array, FALSE if not. * * At each increment we double the size of the array. When we are short * on memory we could consider smaller increases, but because availMem * moves around with tuple addition/removal, this might result in thrashing. * Small increases in the array size are likely to be pretty inefficient. */static bool
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?