📄 hashpage.c
字号:
* The caller should hold no locks on the hash index. * * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state. */void_hash_expandtable(Relation rel, Buffer metabuf){ HashMetaPage metap; Bucket old_bucket; Bucket new_bucket; uint32 spare_ndx; BlockNumber start_oblkno; BlockNumber start_nblkno; uint32 maxbucket; uint32 highmask; uint32 lowmask; /* * Obtain the page-zero lock to assert the right to begin a split (see * README). * * Note: deadlock should be impossible here. Our own backend could only be * holding bucket sharelocks due to stopped indexscans; those will not * block other holders of the page-zero lock, who are only interested in * acquiring bucket sharelocks themselves. Exclusive bucket locks are * only taken here and in hashbulkdelete, and neither of these operations * needs any additional locks to complete. (If, due to some flaw in this * reasoning, we manage to deadlock anyway, it's okay to error out; the * index will be left in a consistent state.) */ _hash_getlock(rel, 0, HASH_EXCLUSIVE); /* Write-lock the meta page */ _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE); metap = (HashMetaPage) BufferGetPage(metabuf); _hash_checkpage(rel, (Page) metap, LH_META_PAGE); /* * Check to see if split is still needed; someone else might have already * done one while we waited for the lock. * * Make sure this stays in sync with _hash_doinsert() */ if (metap->hashm_ntuples <= (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1)) goto fail; /* * Determine which bucket is to be split, and attempt to lock the old * bucket. If we can't get the lock, give up. * * The lock protects us against other backends, but not against our own * backend. Must check for active scans separately. * * Ideally we would lock the new bucket too before proceeding, but if we * are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping isn't * correct yet. For simplicity we update the metapage first and then * lock. This should be okay because no one else should be trying to lock * the new bucket yet... */ new_bucket = metap->hashm_maxbucket + 1; old_bucket = (new_bucket & metap->hashm_lowmask); start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket); if (_hash_has_active_scan(rel, old_bucket)) goto fail; if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE)) goto fail; /* * Okay to proceed with split. Update the metapage bucket mapping info. * * Since we are scribbling on the metapage data right in the shared * buffer, any failure in this next little bit leaves us with a big * problem: the metapage is effectively corrupt but could get written back * to disk. We don't really expect any failure, but just to be sure, * establish a critical section. */ START_CRIT_SECTION(); metap->hashm_maxbucket = new_bucket; if (new_bucket > metap->hashm_highmask) { /* Starting a new doubling */ metap->hashm_lowmask = metap->hashm_highmask; metap->hashm_highmask = new_bucket | metap->hashm_lowmask; } /* * If the split point is increasing (hashm_maxbucket's log base 2 * increases), we need to adjust the hashm_spares[] array and * hashm_ovflpoint so that future overflow pages will be created beyond * this new batch of bucket pages. * * XXX should initialize new bucket pages to prevent out-of-order page * creation? Don't wanna do it right here though. */ spare_ndx = _hash_log2(metap->hashm_maxbucket + 1); if (spare_ndx > metap->hashm_ovflpoint) { Assert(spare_ndx == metap->hashm_ovflpoint + 1); metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint]; metap->hashm_ovflpoint = spare_ndx; } /* now we can compute the new bucket's primary block number */ start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket); Assert(!_hash_has_active_scan(rel, new_bucket)); if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE)) elog(PANIC, "could not get lock on supposedly new bucket"); /* Done mucking with metapage */ END_CRIT_SECTION(); /* * Copy bucket mapping info now; this saves re-accessing the meta page * inside _hash_splitbucket's inner loop. Note that once we drop the * split lock, other splits could begin, so these values might be out of * date before _hash_splitbucket finishes. That's okay, since all it * needs is to tell which of these two buckets to map hashkeys into. */ maxbucket = metap->hashm_maxbucket; highmask = metap->hashm_highmask; lowmask = metap->hashm_lowmask; /* Write out the metapage and drop lock, but keep pin */ _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK); /* Release split lock; okay for other splits to occur now */ _hash_droplock(rel, 0, HASH_EXCLUSIVE); /* Relocate records to the new bucket */ _hash_splitbucket(rel, metabuf, old_bucket, new_bucket, start_oblkno, start_nblkno, maxbucket, highmask, lowmask); /* Release bucket locks, allowing others to access them */ _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE); _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE); return; /* Here if decide not to split or fail to acquire old bucket lock */fail: /* We didn't write the metapage, so just drop lock */ _hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK); /* Release split lock */ _hash_droplock(rel, 0, HASH_EXCLUSIVE);}/* * _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket' * * We are splitting a bucket that consists of a base bucket page and zero * or more overflow (bucket chain) pages. We must relocate tuples that * belong in the new bucket, and compress out any free space in the old * bucket. * * The caller must hold exclusive locks on both buckets to ensure that * no one else is trying to access them (see README). * * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state. (The metapage is only * touched if it becomes necessary to add or remove overflow pages.) */static void_hash_splitbucket(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, BlockNumber start_nblkno, uint32 maxbucket, uint32 highmask, uint32 lowmask){ Bucket bucket; Buffer obuf; Buffer nbuf; BlockNumber oblkno; BlockNumber nblkno; bool null; Datum datum; HashItem hitem; HashPageOpaque oopaque; HashPageOpaque nopaque; IndexTuple itup; Size itemsz; OffsetNumber ooffnum; OffsetNumber noffnum; OffsetNumber omaxoffnum; Page opage; Page npage; TupleDesc itupdesc = RelationGetDescr(rel); /* * It should be okay to simultaneously write-lock pages from each bucket, * since no one else can be trying to acquire buffer lock on pages of * either bucket. */ oblkno = start_oblkno; nblkno = start_nblkno; obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE); opage = BufferGetPage(obuf); npage = BufferGetPage(nbuf); _hash_checkpage(rel, opage, LH_BUCKET_PAGE); oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); /* initialize the new bucket's primary page */ _hash_pageinit(npage, BufferGetPageSize(nbuf)); nopaque = (HashPageOpaque) PageGetSpecialPointer(npage); nopaque->hasho_prevblkno = InvalidBlockNumber; nopaque->hasho_nextblkno = InvalidBlockNumber; nopaque->hasho_bucket = nbucket; nopaque->hasho_flag = LH_BUCKET_PAGE; nopaque->hasho_filler = HASHO_FILL; /* * Partition the tuples in the old bucket between the old bucket and the * new bucket, advancing along the old bucket's overflow bucket chain and * adding overflow pages to the new bucket as needed. */ ooffnum = FirstOffsetNumber; omaxoffnum = PageGetMaxOffsetNumber(opage); for (;;) { /* * at each iteration through this loop, each of these variables should * be up-to-date: obuf opage oopaque ooffnum omaxoffnum */ /* check if we're at the end of the page */ if (ooffnum > omaxoffnum) { /* at end of page, but check for an(other) overflow page */ oblkno = oopaque->hasho_nextblkno; if (!BlockNumberIsValid(oblkno)) break; /* * we ran out of tuples on this particular page, but we have more * overflow pages; advance to next page. */ _hash_wrtbuf(rel, obuf); obuf = _hash_getbuf(rel, oblkno, HASH_WRITE); opage = BufferGetPage(obuf); _hash_checkpage(rel, opage, LH_OVERFLOW_PAGE); oopaque = (HashPageOpaque) PageGetSpecialPointer(opage); ooffnum = FirstOffsetNumber; omaxoffnum = PageGetMaxOffsetNumber(opage); continue; } /* * Re-hash the tuple to determine which bucket it now belongs in. * * It is annoying to call the hash function while holding locks, but * releasing and relocking the page for each tuple is unappealing too. */ hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum)); itup = &(hitem->hash_itup); datum = index_getattr(itup, 1, itupdesc, &null); Assert(!null); bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum), maxbucket, highmask, lowmask); if (bucket == nbucket) { /* * insert the tuple into the new bucket. if it doesn't fit on the * current page in the new bucket, we must allocate a new overflow * page and place the tuple on that page instead. */ itemsz = IndexTupleDSize(hitem->hash_itup) + (sizeof(HashItemData) - sizeof(IndexTupleData)); itemsz = MAXALIGN(itemsz); if (PageGetFreeSpace(npage) < itemsz) { /* write out nbuf and drop lock, but keep pin */ _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK); /* chain to a new overflow page */ nbuf = _hash_addovflpage(rel, metabuf, nbuf); npage = BufferGetPage(nbuf); _hash_checkpage(rel, npage, LH_OVERFLOW_PAGE); /* we don't need nopaque within the loop */ } noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage)); if (PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED) == InvalidOffsetNumber) elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(rel)); /* * now delete the tuple from the old bucket. after this section * of code, 'ooffnum' will actually point to the ItemId to which * we would point if we had advanced it before the deletion * (PageIndexTupleDelete repacks the ItemId array). this also * means that 'omaxoffnum' is exactly one less than it used to be, * so we really can just decrement it instead of calling * PageGetMaxOffsetNumber. */ PageIndexTupleDelete(opage, ooffnum); omaxoffnum = OffsetNumberPrev(omaxoffnum); } else { /* * the tuple stays on this page. we didn't move anything, so we * didn't delete anything and therefore we don't have to change * 'omaxoffnum'. */ Assert(bucket == obucket); ooffnum = OffsetNumberNext(ooffnum); } } /* * We're at the end of the old bucket chain, so we're done partitioning * the tuples. Before quitting, call _hash_squeezebucket to ensure the * tuples remaining in the old bucket (including the overflow pages) are * packed as tightly as possible. The new bucket is already tight. */ _hash_wrtbuf(rel, obuf); _hash_wrtbuf(rel, nbuf); _hash_squeezebucket(rel, obucket, start_oblkno);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -