📄 nbtsort.c
字号:
* A leaf page being built looks like: * * +----------------+---------------------------------+ * | PageHeaderData | linp0 linp1 linp2 ... | * +-----------+----+---------------------------------+ * | ... linpN | | * +-----------+--------------------------------------+ * | ^ last | * | | * +-------------+------------------------------------+ * | | itemN ... | * +-------------+------------------+-----------------+ * | ... item3 item2 item1 | "special space" | * +--------------------------------+-----------------+ * * Contrast this with the diagram in bufpage.h; note the mismatch * between linps and items. This is because we reserve linp0 as a * placeholder for the pointer to the "high key" item; when we have * filled up the page, we will set linp0 to point to itemN and clear * linpN. On the other hand, if we find this is the last (rightmost) * page, we leave the items alone and slide the linp array over. * * 'last' pointer indicates the last offset added to the page. *---------- */static void_bt_buildadd(Relation index, BTPageState *state, BTItem bti){ Buffer nbuf; Page npage; OffsetNumber last_off; Size pgspc; Size btisz; nbuf = state->btps_buf; npage = state->btps_page; last_off = state->btps_lastoff; pgspc = PageGetFreeSpace(npage); btisz = BTITEMSZ(bti); btisz = MAXALIGN(btisz); /* * Check whether the item can fit on a btree page at all. (Eventually, * we ought to try to apply TOAST methods if not.) We actually need to * be able to fit three items on every page, so restrict any one item * to 1/3 the per-page available space. Note that at this point, btisz * doesn't include the ItemId. * * NOTE: similar code appears in _bt_insertonpg() to defend against * oversize items being inserted into an already-existing index. But * during creation of an index, we don't go through there. */ if (btisz > BTMaxItemSize(npage)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row size %lu exceeds btree maximum, %lu", (unsigned long) btisz, (unsigned long) BTMaxItemSize(npage)))); if (pgspc < btisz || pgspc < state->btps_full) { /* * Item won't fit on this page, or we feel the page is full enough * already. Finish off the page and write it out. */ Buffer obuf = nbuf; Page opage = npage; ItemId ii; ItemId hii; BTItem obti; /* Create new page on same level */ _bt_blnewpage(index, &nbuf, &npage, state->btps_level); /* * We copy the last item on the page into the new page, and then * rearrange the old page so that the 'last item' becomes its high * key rather than a true data item. There had better be at least * two items on the page already, else the page would be empty of * useful data. (Hence, we must allow pages to be packed at least * 2/3rds full; the 70% figure used above is close to minimum.) */ Assert(last_off > P_FIRSTKEY); ii = PageGetItemId(opage, last_off); obti = (BTItem) PageGetItem(opage, ii); _bt_sortaddtup(npage, ItemIdGetLength(ii), obti, P_FIRSTKEY); /* * Move 'last' into the high key position on opage */ hii = PageGetItemId(opage, P_HIKEY); *hii = *ii; ii->lp_flags &= ~LP_USED; ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData); /* * Link the old buffer into its parent, using its minimum key. If * we don't have a parent, we have to create one; this adds a new * btree level. */ if (state->btps_next == (BTPageState *) NULL) state->btps_next = _bt_pagestate(index, state->btps_level + 1); Assert(state->btps_minkey != NULL); ItemPointerSet(&(state->btps_minkey->bti_itup.t_tid), BufferGetBlockNumber(obuf), P_HIKEY); _bt_buildadd(index, state->btps_next, state->btps_minkey); pfree((void *) state->btps_minkey); /* * Save a copy of the minimum key for the new page. We have to * copy it off the old page, not the new one, in case we are not * at leaf level. */ state->btps_minkey = _bt_formitem(&(obti->bti_itup)); /* * Set the sibling links for both pages. */ { BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage); BTPageOpaque nopaque = (BTPageOpaque) PageGetSpecialPointer(npage); oopaque->btpo_next = BufferGetBlockNumber(nbuf); nopaque->btpo_prev = BufferGetBlockNumber(obuf); nopaque->btpo_next = P_NONE; /* redundant */ } /* * Write out the old page. We never want to see it again, so we * can give up our lock. */ _bt_blwritepage(index, obuf); /* * Reset last_off to point to new page */ last_off = P_FIRSTKEY; } /* * If the new item is the first for its page, stash a copy for later. * Note this will only happen for the first item on a level; on later * pages, the first item for a page is copied from the prior page in * the code above. */ if (last_off == P_HIKEY) { Assert(state->btps_minkey == NULL); state->btps_minkey = _bt_formitem(&(bti->bti_itup)); } /* * Add the new item into the current page. */ last_off = OffsetNumberNext(last_off); _bt_sortaddtup(npage, btisz, bti, last_off); state->btps_buf = nbuf; state->btps_page = npage; state->btps_lastoff = last_off;}/* * Finish writing out the completed btree. */static void_bt_uppershutdown(Relation index, BTPageState *state){ BTPageState *s; BlockNumber rootblkno = P_NONE; uint32 rootlevel = 0; /* * Each iteration of this loop completes one more level of the tree. */ for (s = state; s != (BTPageState *) NULL; s = s->btps_next) { BlockNumber blkno; BTPageOpaque opaque; blkno = BufferGetBlockNumber(s->btps_buf); opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page); /* * We have to link the last page on this level to somewhere. * * If we're at the top, it's the root, so attach it to the metapage. * Otherwise, add an entry for it to its parent using its minimum * key. This may cause the last page of the parent level to * split, but that's not a problem -- we haven't gotten to it yet. */ if (s->btps_next == (BTPageState *) NULL) { opaque->btpo_flags |= BTP_ROOT; rootblkno = blkno; rootlevel = s->btps_level; } else { Assert(s->btps_minkey != NULL); ItemPointerSet(&(s->btps_minkey->bti_itup.t_tid), blkno, P_HIKEY); _bt_buildadd(index, s->btps_next, s->btps_minkey); pfree((void *) s->btps_minkey); s->btps_minkey = NULL; } /* * This is the rightmost page, so the ItemId array needs to be * slid back one slot. Then we can dump out the page. */ _bt_slideleft(index, s->btps_buf, s->btps_page); _bt_blwritepage(index, s->btps_buf); } /* * As the last step in the process, update the metapage to point to * the new root (unless we had no data at all, in which case it's * left pointing to "P_NONE"). This changes the index to the "valid" * state by updating its magic number. */ _bt_metaproot(index, rootblkno, rootlevel);}/* * Read tuples in correct sort order from tuplesort, and load them into * btree leaves. */static void_bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2){ BTPageState *state = NULL; bool merge = (btspool2 != NULL); BTItem bti, bti2 = NULL; bool should_free, should_free2, load1; TupleDesc tupdes = RelationGetDescr(index); int i, keysz = RelationGetNumberOfAttributes(index); ScanKey indexScanKey = NULL; if (merge) { /* * Another BTSpool for dead tuples exists. Now we have to merge * btspool and btspool2. */ ScanKey entry; Datum attrDatum1, attrDatum2; bool isFirstNull, isSecondNull; int32 compare; /* the preparation of merge */ bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, true, &should_free); bti2 = (BTItem) tuplesort_getindextuple(btspool2->sortstate, true, &should_free2); indexScanKey = _bt_mkscankey_nodata(index); for (;;) { load1 = true; /* load BTSpool next ? */ if (NULL == bti2) { if (NULL == bti) break; } else if (NULL != bti) { for (i = 1; i <= keysz; i++) { entry = indexScanKey + i - 1; attrDatum1 = index_getattr((IndexTuple) bti, i, tupdes, &isFirstNull); attrDatum2 = index_getattr((IndexTuple) bti2, i, tupdes, &isSecondNull); if (isFirstNull) { if (!isSecondNull) { load1 = false; break; } } else if (isSecondNull) break; else { compare = DatumGetInt32(FunctionCall2(&entry->sk_func, attrDatum1, attrDatum2)); if (compare > 0) { load1 = false; break; } else if (compare < 0) break; } } } else load1 = false; /* When we see first tuple, create first index page */ if (state == NULL) state = _bt_pagestate(index, 0); if (load1) { _bt_buildadd(index, state, bti); if (should_free) pfree((void *) bti); bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, true, &should_free); } else { _bt_buildadd(index, state, bti2); if (should_free2) pfree((void *) bti2); bti2 = (BTItem) tuplesort_getindextuple(btspool2->sortstate, true, &should_free2); } } _bt_freeskey(indexScanKey); } else { /* merge is unnecessary */ while (bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, true, &should_free), bti != (BTItem) NULL) { /* When we see first tuple, create first index page */ if (state == NULL) state = _bt_pagestate(index, 0); _bt_buildadd(index, state, bti); if (should_free) pfree((void *) bti); } } /* Close down final pages and rewrite the metapage */ _bt_uppershutdown(index, state);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -