📄 nbtsort.c
字号:
return 1;}/* * get the next BTItem from a tape block. * * returns: * - NULL if we have run out of BTItems * - a pointer to the BTItemData in the block otherwise * * side effects: * - sets 'pos' to the current position within the block. */static BTItem_bt_tapenext(BTTapeBlock *tape, char **pos){ Size itemsz; BTItem bti; if (*pos >= tape->bttb_data + tape->bttb_top) return (BTItem) NULL; bti = (BTItem) *pos; itemsz = BTITEMSZ(bti); *pos += MAXALIGN(itemsz); return bti;}/* * copy a BTItem into a tape block. * * assumes that we have already checked to see if the block has enough * space for the item. * * side effects: * * - advances the 'top' pointer in the tape block header to point to * the beginning of free space. */static void_bt_tapeadd(BTTapeBlock *tape, BTItem item, int itemsz){ memcpy(tape->bttb_data + tape->bttb_top, item, itemsz); ++tape->bttb_ntup; tape->bttb_top += MAXALIGN(itemsz);}/*------------------------------------------------------------------------- * spool methods *------------------------------------------------------------------------- *//* * create and initialize a spool structure, including the underlying * files. */void *_bt_spoolinit(Relation index, int ntapes, bool isunique){ BTSpool *btspool = (BTSpool *) palloc(sizeof(BTSpool)); int i; if (btspool == (BTSpool *) NULL) elog(ERROR, "_bt_spoolinit: out of memory"); MemSet((char *) btspool, 0, sizeof(BTSpool)); btspool->bts_ntapes = ntapes; btspool->bts_tape = 0; btspool->isunique = isunique; btspool->bts_itape = (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes); btspool->bts_otape = (BTTapeBlock **) palloc(sizeof(BTTapeBlock *) * ntapes); if (btspool->bts_itape == (BTTapeBlock **) NULL || btspool->bts_otape == (BTTapeBlock **) NULL) elog(ERROR, "_bt_spoolinit: out of memory"); for (i = 0; i < ntapes; ++i) { btspool->bts_itape[i] = _bt_tapecreate(); btspool->bts_otape[i] = _bt_tapecreate(); } _bt_isortcmpinit(index, btspool); return (void *) btspool;}/* * clean up a spool structure and its substructures. */void_bt_spooldestroy(void *spool){ BTSpool *btspool = (BTSpool *) spool; int i; for (i = 0; i < btspool->bts_ntapes; ++i) { _bt_tapedestroy(btspool->bts_otape[i]); _bt_tapedestroy(btspool->bts_itape[i]); } pfree((void *) btspool);}/* * flush out any dirty output tape blocks */static void_bt_spoolflush(BTSpool *btspool){ int i; for (i = 0; i < btspool->bts_ntapes; ++i) { if (!EMPTYTAPE(btspool->bts_otape[i])) _bt_tapewrite(btspool->bts_otape[i], 1); }}/* * swap input tapes and output tapes by swapping their file * descriptors. additional preparation for the next merge pass * includes rewinding the new input tapes and clearing out the new * output tapes. */static void_bt_spoolswap(BTSpool *btspool){ File tmpfd; BTTapeBlock *itape; BTTapeBlock *otape; int i; for (i = 0; i < btspool->bts_ntapes; ++i) { itape = btspool->bts_itape[i]; otape = btspool->bts_otape[i]; /* * swap the input and output VFDs. */ tmpfd = itape->bttb_fd; itape->bttb_fd = otape->bttb_fd; otape->bttb_fd = tmpfd; /* * rewind the new input tape. */ _bt_taperewind(itape); _bt_tapereset(itape); /* * clear the new output tape -- it's ok to throw away the old * inputs. */ _bt_tapeclear(otape); }}/*------------------------------------------------------------------------- * sorting routines *------------------------------------------------------------------------- *//* * spool 'btitem' into an initial run. as tape blocks are filled, the * block BTItems are qsorted and written into some output tape (it * doesn't matter which; we go round-robin for simplicity). the * initial runs are therefore always just one block. */void_bt_spool(Relation index, BTItem btitem, void *spool){ BTSpool *btspool = (BTSpool *) spool; BTTapeBlock *itape; Size itemsz; _bt_isortcmpinit(index, btspool); itape = btspool->bts_itape[btspool->bts_tape]; itemsz = BTITEMSZ(btitem); itemsz = MAXALIGN(itemsz); /* * if this buffer is too full for this BTItemData, or if we have run * out of BTItems, we need to sort the buffer and write it out. in * this case, the BTItemData will go into the next tape's buffer. */ if (btitem == (BTItem) NULL || SPCLEFT(itape) < itemsz) { BTSortKey *parray = (BTSortKey *) NULL; BTTapeBlock *otape; BTItem bti; char *pos; int btisz; int it_ntup = itape->bttb_ntup; int i; /* * build an array of pointers to the BTItemDatas on the input * block. */ if (it_ntup > 0) { parray = (BTSortKey *) palloc(it_ntup * sizeof(BTSortKey)); pos = itape->bttb_data; for (i = 0; i < it_ntup; ++i) _bt_setsortkey(index, _bt_tapenext(itape, &pos), &(parray[i])); /* * qsort the pointer array. */ qsort((void *) parray, it_ntup, sizeof(BTSortKey), (int (*) (const void *, const void *)) _bt_isortcmp); } /* * write the spooled run into the output tape. we copy the * BTItemDatas in the order dictated by the sorted array of * BTItems, not the original order. * * (since everything was MAXALIGN'd and is all on a single tape * block, everything had *better* still fit on one tape block..) */ otape = btspool->bts_otape[btspool->bts_tape]; for (i = 0; i < it_ntup; ++i) { bti = parray[i].btsk_item; btisz = BTITEMSZ(bti); btisz = MAXALIGN(btisz); _bt_tapeadd(otape, bti, btisz);#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_SPOOL) { bool isnull; Datum d = index_getattr(&(bti->bti_itup), 1, index->rd_att, &isnull); printf("_bt_spool: inserted <%x> into output tape %d\n", d, btspool->bts_tape); }#endif /* FASTBUILD_DEBUG && FASTBUILD_SPOOL */ } /* * the initial runs are always single tape blocks. flush the * output block, marking End-Of-Run. */ _bt_tapewrite(otape, 1); /* * reset the input buffer for the next run. we don't have to * write it out or anything -- we only use it to hold the unsorted * BTItemDatas, the output tape contains all the sorted stuff. * * changing bts_tape changes the output tape and input tape; we * change itape for the code below. */ _bt_tapereset(itape); btspool->bts_tape = (btspool->bts_tape + 1) % btspool->bts_ntapes; itape = btspool->bts_itape[btspool->bts_tape]; /* * destroy the pointer array. */ if (parray != (BTSortKey *) NULL) { for (i = 0; i < it_ntup; i++) { if (parray[i].btsk_datum != (Datum *) NULL) pfree((void *) (parray[i].btsk_datum)); if (parray[i].btsk_nulls != (char *) NULL) pfree((void *) (parray[i].btsk_nulls)); } pfree((void *) parray); } } /* insert this item into the current buffer */ if (btitem != (BTItem) NULL) _bt_tapeadd(itape, btitem, itemsz);}/* * allocate a new, clean btree page, not linked to any siblings. */static void_bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags){ BTPageOpaque opaque; *buf = _bt_getbuf(index, P_NEW, BT_WRITE);#ifdef NOT_USED printf("\tblk=%d\n", BufferGetBlockNumber(*buf));#endif *page = BufferGetPage(*buf); _bt_pageinit(*page, BufferGetPageSize(*buf)); opaque = (BTPageOpaque) PageGetSpecialPointer(*page); opaque->btpo_prev = opaque->btpo_next = P_NONE; opaque->btpo_flags = flags;}/* * slide an array of ItemIds back one slot (from P_FIRSTKEY to * P_HIKEY, overwriting P_HIKEY). we need to do this when we discover * that we have built an ItemId array in what has turned out to be a * P_RIGHTMOST page. */static void_bt_slideleft(Relation index, Buffer buf, Page page){ OffsetNumber off; OffsetNumber maxoff; ItemId previi; ItemId thisii; if (!PageIsEmpty(page)) { maxoff = PageGetMaxOffsetNumber(page); previi = PageGetItemId(page, P_HIKEY); for (off = P_FIRSTKEY; off <= maxoff; off = OffsetNumberNext(off)) { thisii = PageGetItemId(page, off); *previi = *thisii; previi = thisii; } ((PageHeader) page)->pd_lower -= sizeof(ItemIdData); }}/* * allocate and initialize a new BTPageState. the returned structure * is suitable for immediate use by _bt_buildadd. */static void *_bt_pagestate(Relation index, int flags, int level, bool doupper){ BTPageState *state = (BTPageState *) palloc(sizeof(BTPageState)); MemSet((char *) state, 0, sizeof(BTPageState)); _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags); state->btps_firstoff = InvalidOffsetNumber; state->btps_lastoff = P_HIKEY; state->btps_lastbti = (BTItem) NULL; state->btps_next = (BTPageState *) NULL; state->btps_level = level; state->btps_doupper = doupper; return (void *) state;}/* * return a copy of the minimum (P_HIKEY or P_FIRSTKEY) item on * 'opage'. the copy is modified to point to 'opage' (as opposed to * the page to which the item used to point, e.g., a heap page if * 'opage' is a leaf page). */static BTItem_bt_minitem(Page opage, BlockNumber oblkno, int atend){ OffsetNumber off; BTItem obti; BTItem nbti; off = atend ? P_HIKEY : P_FIRSTKEY; obti = (BTItem) PageGetItem(opage, PageGetItemId(opage, off)); nbti = _bt_formitem(&(obti->bti_itup)); ItemPointerSet(&(nbti->bti_itup.t_tid), oblkno, P_HIKEY); return nbti;}/* * add an item to a disk page from a merge tape block. * * we must be careful to observe the following restrictions, placed * upon us by the conventions in nbtsearch.c: * - rightmost pages start data items at P_HIKEY instead of at * P_FIRSTKEY. * - duplicates cannot be split among pages unless the chain of * duplicates starts at the first data item. * * a leaf page being built looks like: * * +----------------+---------------------------------+ * | PageHeaderData | linp0 linp1 linp2 ... | * +-----------+----+---------------------------------+ * | ... linpN | ^ first | * +-----------+--------------------------------------+ * | ^ last | * | | * | v last | * +-------------+------------------------------------+ * | | itemN ... | * +-------------+------------------+-----------------+ * | ... item3 item2 item1 | "special space" | * +--------------------------------+-----------------+ * ^ first * * contrast this with the diagram in bufpage.h; note the mismatch * between linps and items. this is because we reserve linp0 as a * placeholder for the pointer to the "high key" item; when we have * filled up the page, we will set linp0 to point to itemN and clear * linpN. * * 'last' pointers indicate the last offset/item added to the page. * 'first' pointers indicate the first offset/item that is part of a * chain of duplicates extending from 'first' to 'last'. * * if all keys are unique, 'first' will always be the same as 'last'. */static BTItem_bt_buildadd(Relation index, void *pstate, BTItem bti, int flags){ BTPageState *state = (BTPageState *) pstate; Buffer nbuf; Page npage; BTItem last_bti; OffsetNumber first_off; OffsetNumber last_off; OffsetNumber off; Size pgspc; Size btisz; nbuf = state->btps_buf; npage = state->btps_page; first_off = state->btps_firstoff; last_off = state->btps_lastoff; last_bti = state->btps_lastbti; pgspc = PageGetFreeSpace(npage); btisz = BTITEMSZ(bti); btisz = MAXALIGN(btisz); if (pgspc < btisz) { Buffer obuf = nbuf; Page opage = npage; OffsetNumber o, n; ItemId ii; ItemId hii; _bt_blnewpage(index, &nbuf, &npage, flags); /* * if 'last' is part of a chain of duplicates that does not start * at the beginning of the old page, the entire chain is copied to * the new page; we delete all of the duplicates from the old page * except the first, which becomes the high key item of the old * page. * * if the chain starts at the beginning of the page or there is no * chain ('first' == 'last'), we need only copy 'last' to the new * page. again, 'first' (== 'last') becomes the high key of the * old page. * * note that in either case, we copy at least one item to the new * page, so 'last_bti' will always be valid. 'bti' will never be * the first data item on the new page. */ if (first_off == P_FIRSTKEY) { Assert(last_off != P_FIRSTKEY); first_off = last_off; } for (o = first_off, n = P_FIRSTKEY; o <= last_off; o = OffsetNumberNext(o), n = OffsetNumberNext(n)) { ii = PageGetItemId(opage, o); if (PageAddItem(npage, PageGetItem(opage, ii), ii->lp_len, n, LP_USED) == InvalidOffsetNumber) elog(FATAL, "btree: failed to add item to the page in _bt_sort (1)");#ifdef NOT_USED#if defined(FASTBUILD_DEBUG) && defined(FASTBUILD_MERGE) { bool isnull; BTItem tmpbti = (BTItem) PageGetItem(npage, PageGetItemId(npage, n));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -