📄 hashovfl.c
字号:
* Remove this overflow page from its bucket's chain, and mark the page as * free. On entry, ovflbuf is write-locked; it is released before exiting. * * Returns the block number of the page that followed the given page * in the bucket, or InvalidBlockNumber if no following page. * * NB: caller must not hold lock on metapage, nor on either page that's * adjacent in the bucket chain. The caller had better hold exclusive lock * on the bucket, too. */BlockNumber_hash_freeovflpage(Relation rel, Buffer ovflbuf){ HashMetaPage metap; Buffer metabuf; Buffer mapbuf; BlockNumber ovflblkno; BlockNumber prevblkno; BlockNumber blkno; BlockNumber nextblkno; HashPageOpaque ovflopaque; Page ovflpage; Page mappage; uint32 *freep; uint32 ovflbitno; int32 bitmappage, bitmapbit; Bucket bucket; /* Get information from the doomed page */ ovflblkno = BufferGetBlockNumber(ovflbuf); ovflpage = BufferGetPage(ovflbuf); _hash_checkpage(rel, ovflpage, LH_OVERFLOW_PAGE); ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage); nextblkno = ovflopaque->hasho_nextblkno; prevblkno = ovflopaque->hasho_prevblkno; bucket = ovflopaque->hasho_bucket; /* Zero the page for debugging's sake; then write and release it */ MemSet(ovflpage, 0, BufferGetPageSize(ovflbuf)); _hash_wrtbuf(rel, ovflbuf); /* * Fix up the bucket chain. this is a doubly-linked list, so we must fix * up the bucket chain members behind and ahead of the overflow page being * deleted. No concurrency issues since we hold exclusive lock on the * entire bucket. */ if (BlockNumberIsValid(prevblkno)) { Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE); Page prevpage = BufferGetPage(prevbuf); HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage); _hash_checkpage(rel, prevpage, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); Assert(prevopaque->hasho_bucket == bucket); prevopaque->hasho_nextblkno = nextblkno; _hash_wrtbuf(rel, prevbuf); } if (BlockNumberIsValid(nextblkno)) { Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE); Page nextpage = BufferGetPage(nextbuf); HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage); _hash_checkpage(rel, nextpage, LH_OVERFLOW_PAGE); Assert(nextopaque->hasho_bucket == bucket); nextopaque->hasho_prevblkno = prevblkno; _hash_wrtbuf(rel, nextbuf); } /* Read the metapage so we can determine which bitmap page to use */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); metap = (HashMetaPage) BufferGetPage(metabuf); _hash_checkpage(rel, (Page) metap, LH_META_PAGE); /* Identify which bit to set */ ovflbitno = blkno_to_bitno(metap, ovflblkno); bitmappage = ovflbitno >> BMPG_SHIFT(metap); bitmapbit = ovflbitno & BMPG_MASK(metap); if (bitmappage >= metap->hashm_nmaps) elog(ERROR, "invalid overflow bit number %u", ovflbitno); blkno = metap->hashm_mapp[bitmappage]; /* Release metapage lock while we access the bitmap page */ _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK); /* Clear the bitmap bit to indicate that this overflow page is free */ mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE); mappage = BufferGetPage(mapbuf); _hash_checkpage(rel, mappage, LH_BITMAP_PAGE); freep = HashPageGetBitmap(mappage); Assert(ISSET(freep, bitmapbit)); CLRBIT(freep, bitmapbit); _hash_wrtbuf(rel, mapbuf); /* Get write-lock on metapage to update firstfree */ _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE); /* if this is now the first free page, update hashm_firstfree */ if (ovflbitno < metap->hashm_firstfree) { metap->hashm_firstfree = ovflbitno; _hash_wrtbuf(rel, metabuf); } else { /* no need to change metapage */ _hash_relbuf(rel, metabuf); } return nextblkno;}/* * _hash_initbitmap() * * Initialize a new bitmap page. The metapage has a write-lock upon * entering the function, and must be written by caller after return. * * 'blkno' is the block number of the new bitmap page. * * All bits in the new bitmap page are set to "1", indicating "in use". */void_hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno){ Buffer buf; Page pg; HashPageOpaque op; uint32 *freep; /* * It is okay to write-lock the new bitmap page while holding metapage * write lock, because no one else could be contending for the new page. * * There is some loss of concurrency in possibly doing I/O for the new * page while holding the metapage lock, but this path is taken so seldom * that it's not worth worrying about. */ buf = _hash_getbuf(rel, blkno, HASH_WRITE); pg = BufferGetPage(buf); /* initialize the page */ _hash_pageinit(pg, BufferGetPageSize(buf)); op = (HashPageOpaque) PageGetSpecialPointer(pg); op->hasho_prevblkno = InvalidBlockNumber; op->hasho_nextblkno = InvalidBlockNumber; op->hasho_bucket = -1; op->hasho_flag = LH_BITMAP_PAGE; op->hasho_filler = HASHO_FILL; /* set all of the bits to 1 */ freep = HashPageGetBitmap(pg); MemSet(freep, 0xFF, BMPGSZ_BYTE(metap)); /* write out the new bitmap page (releasing write lock and pin) */ _hash_wrtbuf(rel, buf); /* add the new bitmap page to the metapage's list of bitmaps */ /* metapage already has a write lock */ if (metap->hashm_nmaps >= HASH_MAX_BITMAPS) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("out of overflow pages in hash index \"%s\"", RelationGetRelationName(rel)))); metap->hashm_mapp[metap->hashm_nmaps] = blkno; metap->hashm_nmaps++;}/* * _hash_squeezebucket(rel, bucket) * * Try to squeeze the tuples onto pages occurring earlier in the * bucket chain in an attempt to free overflow pages. When we start * the "squeezing", the page from which we start taking tuples (the * "read" page) is the last bucket in the bucket chain and the page * onto which we start squeezing tuples (the "write" page) is the * first page in the bucket chain. The read page works backward and * the write page works forward; the procedure terminates when the * read page and write page are the same page. * * At completion of this procedure, it is guaranteed that all pages in * the bucket are nonempty, unless the bucket is totally empty (in * which case all overflow pages will be freed). The original implementation * required that to be true on entry as well, but it's a lot easier for * callers to leave empty overflow pages and let this guy clean it up. * * Caller must hold exclusive lock on the target bucket. This allows * us to safely lock multiple pages in the bucket. */void_hash_squeezebucket(Relation rel, Bucket bucket, BlockNumber bucket_blkno){ Buffer wbuf; Buffer rbuf = 0; BlockNumber wblkno; BlockNumber rblkno; Page wpage; Page rpage; HashPageOpaque wopaque; HashPageOpaque ropaque; OffsetNumber woffnum; OffsetNumber roffnum; HashItem hitem; Size itemsz; /* * start squeezing into the base bucket page. */ wblkno = bucket_blkno; wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); wpage = BufferGetPage(wbuf); _hash_checkpage(rel, wpage, LH_BUCKET_PAGE); wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); /* * if there aren't any overflow pages, there's nothing to squeeze. */ if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) { _hash_relbuf(rel, wbuf); return; } /* * find the last page in the bucket chain by starting at the base bucket * page and working forward. */ ropaque = wopaque; do { rblkno = ropaque->hasho_nextblkno; if (ropaque != wopaque) _hash_relbuf(rel, rbuf); rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); rpage = BufferGetPage(rbuf); _hash_checkpage(rel, rpage, LH_OVERFLOW_PAGE); ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); Assert(ropaque->hasho_bucket == bucket); } while (BlockNumberIsValid(ropaque->hasho_nextblkno)); /* * squeeze the tuples. */ roffnum = FirstOffsetNumber; for (;;) { /* this test is needed in case page is empty on entry */ if (roffnum <= PageGetMaxOffsetNumber(rpage)) { hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum)); itemsz = IndexTupleDSize(hitem->hash_itup) + (sizeof(HashItemData) - sizeof(IndexTupleData)); itemsz = MAXALIGN(itemsz); /* * Walk up the bucket chain, looking for a page big enough for * this item. Exit if we reach the read page. */ while (PageGetFreeSpace(wpage) < itemsz) { Assert(!PageIsEmpty(wpage)); wblkno = wopaque->hasho_nextblkno; Assert(BlockNumberIsValid(wblkno)); _hash_wrtbuf(rel, wbuf); if (rblkno == wblkno) { /* wbuf is already released */ _hash_wrtbuf(rel, rbuf); return; } wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE); wpage = BufferGetPage(wbuf); _hash_checkpage(rel, wpage, LH_OVERFLOW_PAGE); wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage); Assert(wopaque->hasho_bucket == bucket); } /* * we have found room so insert on the "write" page. */ woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage)); if (PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel)); /* * delete the tuple from the "read" page. PageIndexTupleDelete * repacks the ItemId array, so 'roffnum' will be "advanced" to * the "next" ItemId. */ PageIndexTupleDelete(rpage, roffnum); } /* * if the "read" page is now empty because of the deletion (or because * it was empty when we got to it), free it. * * Tricky point here: if our read and write pages are adjacent in the * bucket chain, our write lock on wbuf will conflict with * _hash_freeovflpage's attempt to update the sibling links of the * removed page. However, in that case we are done anyway, so we can * simply drop the write lock before calling _hash_freeovflpage. */ if (PageIsEmpty(rpage)) { rblkno = ropaque->hasho_prevblkno; Assert(BlockNumberIsValid(rblkno)); /* are we freeing the page adjacent to wbuf? */ if (rblkno == wblkno) { /* yes, so release wbuf lock first */ _hash_wrtbuf(rel, wbuf); /* free this overflow page (releases rbuf) */ _hash_freeovflpage(rel, rbuf); /* done */ return; } /* free this overflow page, then get the previous one */ _hash_freeovflpage(rel, rbuf); rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE); rpage = BufferGetPage(rbuf); _hash_checkpage(rel, rpage, LH_OVERFLOW_PAGE); ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage); Assert(ropaque->hasho_bucket == bucket); roffnum = FirstOffsetNumber; } } /* NOTREACHED */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -