📄 nbtsort.c
字号:
Datum d = index_getattr(&(tmpbti->bti_itup), 1, index->rd_att, &isnull); printf("_bt_buildadd: moved <%x> to offset %d at level %d\n", d, n, state->btps_level); }#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */#endif } /* * this loop is backward because PageIndexTupleDelete shuffles the * tuples to fill holes in the page -- by starting at the end and * working back, we won't create holes (and thereby avoid * shuffling). */ for (o = last_off; o > first_off; o = OffsetNumberPrev(o)) PageIndexTupleDelete(opage, o); hii = PageGetItemId(opage, P_HIKEY); ii = PageGetItemId(opage, first_off); *hii = *ii; ii->lp_flags &= ~LP_USED; ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData); first_off = P_FIRSTKEY; last_off = PageGetMaxOffsetNumber(npage); last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, last_off)); /* * set the page (side link) pointers. */ { BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage); BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage); oopaque->btpo_next = BufferGetBlockNumber(nbuf); nopaque->btpo_prev = BufferGetBlockNumber(obuf); nopaque->btpo_next = P_NONE; if (_bt_itemcmp(index, _bt_nattr, (BTItem) PageGetItem(opage, PageGetItemId(opage, P_HIKEY)), (BTItem) PageGetItem(opage, PageGetItemId(opage, P_FIRSTKEY)), BTEqualStrategyNumber)) oopaque->btpo_flags |= BTP_CHAIN; } /* * copy the old buffer's minimum key to its parent. if we don't * have a parent, we have to create one; this adds a new btree * level. */ if (state->btps_doupper) { BTItem nbti; if (state->btps_next == (BTPageState *) NULL) { state->btps_next = _bt_pagestate(index, 0, state->btps_level + 1, true); } nbti = _bt_minitem(opage, BufferGetBlockNumber(obuf), 0); _bt_buildadd(index, state->btps_next, nbti, 0); pfree((void *) nbti); } /* * write out the old stuff. we never want to see it again, so we * can give up our lock (if we had one; BuildingBtree is set, so * we aren't locking). */ _bt_wrtbuf(index, obuf); } /* * if this item is different from the last item added, we start a new * chain of duplicates. */ off = OffsetNumberNext(last_off); if (PageAddItem(npage, (Item) bti, btisz, off, LP_USED) == InvalidOffsetNumber) elog(FATAL, "btree: failed to add item to the page in _bt_sort (2)");#ifdef NOT_USED#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) { bool isnull; Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull); printf("_bt_buildadd: inserted <%x> at offset %d at level %d\n", d, off, state->btps_level); }#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */#endif if (last_bti == (BTItem) NULL) first_off = P_FIRSTKEY; else if (!_bt_itemcmp(index, _bt_nattr, bti, last_bti, BTEqualStrategyNumber)) first_off = off; last_off = off; last_bti = (BTItem) PageGetItem(npage, PageGetItemId(npage, off)); state->btps_buf = nbuf; state->btps_page = npage; state->btps_lastbti = last_bti; state->btps_lastoff = last_off; state->btps_firstoff = first_off; return last_bti;}static void_bt_uppershutdown(Relation index, BTPageState *state){ BTPageState *s; BlockNumber blkno; BTPageOpaque opaque; BTItem bti; for (s = state; s != (BTPageState *) NULL; s = s->btps_next) { blkno = BufferGetBlockNumber(s->btps_buf); opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page); /* * if this is the root, attach it to the metapage. otherwise, * stick the minimum key of the last page on this level (which has * not been split, or else it wouldn't be the last page) into its * parent. this may cause the last page of upper levels to split, * but that's not a problem -- we haven't gotten to them yet. */ if (s->btps_doupper) { if (s->btps_next == (BTPageState *) NULL) { opaque->btpo_flags |= BTP_ROOT; _bt_metaproot(index, blkno, s->btps_level + 1); } else { bti = _bt_minitem(s->btps_page, blkno, 0); _bt_buildadd(index, s->btps_next, bti, 0); pfree((void *) bti); } } /* * this is the rightmost page, so the ItemId array needs to be * slid back one slot. */ _bt_slideleft(index, s->btps_buf, s->btps_page); _bt_wrtbuf(index, s->btps_buf); }}/* * take the input tapes stored by 'btspool' and perform successive * merging passes until at most one run is left in each tape. at that * point, merge the final tape runs into a set of btree leaves. * * XXX three nested loops? gross. cut me up into smaller routines. */static void_bt_merge(Relation index, BTSpool *btspool){ BTPageState *state; BTPriQueue q; BTPriQueueElem e; BTSortKey btsk; BTItem bti; BTTapeBlock *itape; BTTapeBlock *otape; char *tapepos[MAXTAPES]; int tapedone[MAXTAPES]; int t; int goodtapes; int npass; int nruns; Size btisz; bool doleaf = false; /* * initialize state needed for the merge into the btree leaf pages. */ state = (BTPageState *) _bt_pagestate(index, BTP_LEAF, 0, true); npass = 0; do { /* pass */ /* * each pass starts by flushing the previous outputs and swapping * inputs and outputs. flushing sets End-of-Run for any dirty * output tapes. swapping clears the new output tapes and rewinds * the new input tapes. */ btspool->bts_tape = btspool->bts_ntapes - 1; _bt_spoolflush(btspool); _bt_spoolswap(btspool); ++npass; nruns = 0; for (;;) { /* run */ /* * each run starts by selecting a new output tape. the merged * results of a given run are always sent to this one tape. */ btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes; otape = btspool->bts_otape[btspool->bts_tape]; /* * initialize the priority queue by loading it with the first * element of the given run in each tape. since we are * starting a new run, we reset the tape (clearing the * End-Of-Run marker) before reading it. this means that * _bt_taperead will return 0 only if the tape is actually at * EOF. */ MemSet((char *) &q, 0, sizeof(BTPriQueue)); goodtapes = 0; for (t = 0; t < btspool->bts_ntapes; ++t) { itape = btspool->bts_itape[t]; tapepos[t] = itape->bttb_data; tapedone[t] = 0; _bt_tapereset(itape); do { if (_bt_taperead(itape) == 0) tapedone[t] = 1; } while (!tapedone[t] && EMPTYTAPE(itape)); if (!tapedone[t]) { ++goodtapes; e.btpqe_tape = t; _bt_setsortkey(index, _bt_tapenext(itape, &tapepos[t]), &(e.btpqe_item)); if (e.btpqe_item.btsk_item != (BTItem) NULL) _bt_pqadd(&q, &e); } } /* * if we don't have any tapes with any input (i.e., they are * all at EOF), there is no work to do in this run -- we must * be done with this pass. */ if (goodtapes == 0) { break; /* for */ } ++nruns; /* * output the smallest element from the queue until there are * no more. */ while (_bt_pqnext(&q, &e) >= 0) { /* item */ /* * replace the element taken from priority queue, fetching * a new block if needed. a tape can run out if it hits * either End-Of-Run or EOF. */ t = e.btpqe_tape; btsk = e.btpqe_item; bti = btsk.btsk_item; if (bti != (BTItem) NULL) { btisz = BTITEMSZ(bti); btisz = MAXALIGN(btisz); if (doleaf) { _bt_buildadd(index, state, bti, BTP_LEAF);#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) { bool isnull; Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull); printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into block %d\n", npass, nruns, d, t, BufferGetBlockNumber(state->btps_buf)); }#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ } else { if (SPCLEFT(otape) < btisz) { /* * if it's full, write it out and add the item * to the next block. (since we will be * adding another tuple immediately after * this, we can be sure that there will be at * least one more block in this run and so we * know we do *not* want to set End-Of-Run * here.) */ _bt_tapewrite(otape, 0); } _bt_tapeadd(otape, bti, btisz);#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) { bool isnull; Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull); printf("_bt_merge: [pass %d run %d] inserted <%x> from tape %d into output tape %d\n", npass, nruns, d, t, btspool->bts_tape); }#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ } if (btsk.btsk_datum != (Datum *) NULL) pfree((void *) (btsk.btsk_datum)); if (btsk.btsk_nulls != (char *) NULL) pfree((void *) (btsk.btsk_nulls)); } itape = btspool->bts_itape[t]; if (!tapedone[t]) { BTItem newbti = _bt_tapenext(itape, &tapepos[t]); if (newbti == (BTItem) NULL) { do { if (_bt_taperead(itape) == 0) tapedone[t] = 1; } while (!tapedone[t] && EMPTYTAPE(itape)); if (!tapedone[t]) { tapepos[t] = itape->bttb_data; newbti = _bt_tapenext(itape, &tapepos[t]); } } if (newbti != (BTItem) NULL) { BTPriQueueElem nexte; nexte.btpqe_tape = t; _bt_setsortkey(index, newbti, &(nexte.btpqe_item)); _bt_pqadd(&q, &nexte); } } } /* item */ /* * that's it for this run. flush the output tape, marking * End-of-Run. */ _bt_tapewrite(otape, 1); } /* run */ /* * we are here because we ran out of input on all of the input * tapes. * * if this pass did not generate more actual output runs than we have * tapes, we know we have at most one run in each tape. this * means that we are ready to merge into the final btree leaf * pages instead of merging into a tape file. */ if (nruns <= btspool->bts_ntapes) doleaf = true; } while (nruns > 0); /* pass */ _bt_uppershutdown(index, state);}/* * given the (appropriately side-linked) leaf pages of a btree, * construct the corresponding upper levels. we do this by inserting * minimum keys from each page into parent pages as needed. the * format of the internal pages is otherwise the same as for leaf * pages. * * this routine is not called during conventional bulk-loading (in * which case we can just build the upper levels as we create the * sorted bottom level). it is only used for index recycling. */#ifdef NOT_USEDvoid_bt_upperbuild(Relation index){ Buffer rbuf; BlockNumber blk; Page rpage; BTPageOpaque ropaque; BTPageState *state; BTItem nbti; /* * find the first leaf block. while we're at it, clear the BTP_ROOT * flag that we set while building it (so we could find it later). */ rbuf = _bt_getroot(index, BT_WRITE); blk = BufferGetBlockNumber(rbuf); rpage = BufferGetPage(rbuf); ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); ropaque->btpo_flags &= ~BTP_ROOT; _bt_wrtbuf(index, rbuf); state = (BTPageState *) _bt_pagestate(index, 0, 0, true); /* for each page... */ do {#ifdef NOT_USED printf("\t\tblk=%d\n", blk);#endif rbuf = _bt_getbuf(index, blk, BT_READ); rpage = BufferGetPage(rbuf); ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage); /* for each item... */ if (!PageIsEmpty(rpage)) { /* * form a new index tuple corresponding to the minimum key of * the lower page and insert it into a page at this level. */ nbti = _bt_minitem(rpage, blk, P_RIGHTMOST(ropaque));#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) { bool isnull; Datum d = index_getattr(&(nbti->bti_itup), 1, index->rd_att, &isnull); printf("_bt_upperbuild: inserting <%x> at %d\n", d, state->btps_level); }#endif /* FASTBUILD_DEBUG && FASTBUILD_MERGE */ _bt_buildadd(index, state, nbti, 0); pfree((void *) nbti); } blk = ropaque->btpo_next; _bt_relbuf(index, rbuf, BT_READ); } while (blk != P_NONE); _bt_uppershutdown(index, state);}#endif/* * given a spool loading by successive calls to _bt_spool, create an * entire btree. */void_bt_leafbuild(Relation index, void *spool){ _bt_isortcmpinit(index, (BTSpool *) spool);#ifdef BTREE_BUILD_STATS if (ShowExecutorStats) { fprintf(stderr, "! BtreeBuild (Spool) Stats:\n"); ShowUsage(); ResetUsage(); }#endif _bt_merge(index, (BTSpool *) spool);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -