📄 hashpage.c

📁 PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
 * The caller should hold no locks on the hash index. * * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state. */void_hash_expandtable(Relation rel, Buffer metabuf){	HashMetaPage metap;	Bucket		old_bucket;	Bucket		new_bucket;	uint32		spare_ndx;	BlockNumber start_oblkno;	BlockNumber start_nblkno;	uint32		maxbucket;	uint32		highmask;	uint32		lowmask;	/*	 * Obtain the page-zero lock to assert the right to begin a split (see	 * README).	 *	 * Note: deadlock should be impossible here. Our own backend could only be	 * holding bucket sharelocks due to stopped indexscans; those will not	 * block other holders of the page-zero lock, who are only interested in	 * acquiring bucket sharelocks themselves.	Exclusive bucket locks are	 * only taken here and in hashbulkdelete, and neither of these operations	 * needs any additional locks to complete.	(If, due to some flaw in this	 * reasoning, we manage to deadlock anyway, it's okay to error out; the	 * index will be left in a consistent state.)	 */	_hash_getlock(rel, 0, HASH_EXCLUSIVE);	/* Write-lock the meta page */	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);	metap = (HashMetaPage) BufferGetPage(metabuf);	_hash_checkpage(rel, (Page) metap, LH_META_PAGE);	/*	 * Check to see if split is still needed; someone else might have already	 * done one while we waited for the lock.	 *	 * Make sure this stays in sync with _hash_doinsert()	 */	if (metap->hashm_ntuples <=		(double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))		goto fail;	/*	 * Determine which bucket is to be split, and attempt to lock the old	 * bucket.	If we can't get the lock, give up.	 *	 * The lock protects us against other backends, but not against our own	 * backend.  Must check for active scans separately.	 *	 * Ideally we would lock the new bucket too before proceeding, but if we	 * are about to cross a splitpoint then the BUCKET_TO_BLKNO mapping isn't	 * correct yet.  For simplicity we update the metapage first and then	 * lock.  This should be okay because no one else should be trying to lock	 * the new bucket yet...	 */	new_bucket = metap->hashm_maxbucket + 1;	old_bucket = (new_bucket & metap->hashm_lowmask);	start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);	if (_hash_has_active_scan(rel, old_bucket))		goto fail;	if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))		goto fail;	/*	 * Okay to proceed with split.	Update the metapage bucket mapping info.	 *	 * Since we are scribbling on the metapage data right in the shared	 * buffer, any failure in this next little bit leaves us with a big	 * problem: the metapage is effectively corrupt but could get written back	 * to disk.  We don't really expect any failure, but just to be sure,	 * establish a critical section.	 */	START_CRIT_SECTION();	metap->hashm_maxbucket = new_bucket;	if (new_bucket > metap->hashm_highmask)	{		/* Starting a new doubling */		metap->hashm_lowmask = metap->hashm_highmask;		metap->hashm_highmask = new_bucket | metap->hashm_lowmask;	}	/*	 * If the split point is increasing (hashm_maxbucket's log base 2	 * increases), we need to adjust the hashm_spares[] array and	 * hashm_ovflpoint so that future overflow pages will be created beyond	 * this new batch of bucket pages.	 *	 * XXX should initialize new bucket pages to prevent out-of-order page	 * creation?  Don't wanna do it right here though.	 */	spare_ndx = _hash_log2(metap->hashm_maxbucket + 1);	if (spare_ndx > metap->hashm_ovflpoint)	{		Assert(spare_ndx == metap->hashm_ovflpoint + 1);		metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];		metap->hashm_ovflpoint = spare_ndx;	}	/* now we can compute the new bucket's primary block number */	start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);	Assert(!_hash_has_active_scan(rel, new_bucket));	if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))		elog(PANIC, "could not get lock on supposedly new bucket");	/* Done mucking with metapage */	END_CRIT_SECTION();	/*	 * Copy bucket mapping info now; this saves re-accessing the meta page	 * inside _hash_splitbucket's inner loop.  Note that once we drop the	 * split lock, other splits could begin, so these values might be out of	 * date before _hash_splitbucket finishes.	That's okay, since all it	 * needs is to tell which of these two buckets to map hashkeys into.	 */	maxbucket = metap->hashm_maxbucket;	highmask = metap->hashm_highmask;	lowmask = metap->hashm_lowmask;	/* Write out the metapage and drop lock, but keep pin */	_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);	/* Release split lock; okay for other splits to occur now */	_hash_droplock(rel, 0, HASH_EXCLUSIVE);	/* Relocate records to the new bucket */	_hash_splitbucket(rel, metabuf, old_bucket, new_bucket,					  start_oblkno, start_nblkno,					  maxbucket, highmask, lowmask);	/* Release bucket locks, allowing others to access them */	_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);	_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);	return;	/* Here if decide not to split or fail to acquire old bucket lock */fail:	/* We didn't write the metapage, so just drop lock */	_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);	/* Release split lock */	_hash_droplock(rel, 0, HASH_EXCLUSIVE);}/* * _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket' * * We are splitting a bucket that consists of a base bucket page and zero * or more overflow (bucket chain) pages.  We must relocate tuples that * belong in the new bucket, and compress out any free space in the old * bucket. * * The caller must hold exclusive locks on both buckets to ensure that * no one else is trying to access them (see README). * * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state.  (The metapage is only * touched if it becomes necessary to add or remove overflow pages.) */static void_hash_splitbucket(Relation rel,				  Buffer metabuf,				  Bucket obucket,				  Bucket nbucket,				  BlockNumber start_oblkno,				  BlockNumber start_nblkno,				  uint32 maxbucket,				  uint32 highmask,				  uint32 lowmask){	Bucket		bucket;	Buffer		obuf;	Buffer		nbuf;	BlockNumber oblkno;	BlockNumber nblkno;	bool		null;	Datum		datum;	HashItem	hitem;	HashPageOpaque oopaque;	HashPageOpaque nopaque;	IndexTuple	itup;	Size		itemsz;	OffsetNumber ooffnum;	OffsetNumber noffnum;	OffsetNumber omaxoffnum;	Page		opage;	Page		npage;	TupleDesc	itupdesc = RelationGetDescr(rel);	/*	 * It should be okay to simultaneously write-lock pages from each bucket,	 * since no one else can be trying to acquire buffer lock on pages of	 * either bucket.	 */	oblkno = start_oblkno;	nblkno = start_nblkno;	obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);	nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);	opage = BufferGetPage(obuf);	npage = BufferGetPage(nbuf);	_hash_checkpage(rel, opage, LH_BUCKET_PAGE);	oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);	/* initialize the new bucket's primary page */	_hash_pageinit(npage, BufferGetPageSize(nbuf));	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);	nopaque->hasho_prevblkno = InvalidBlockNumber;	nopaque->hasho_nextblkno = InvalidBlockNumber;	nopaque->hasho_bucket = nbucket;	nopaque->hasho_flag = LH_BUCKET_PAGE;	nopaque->hasho_filler = HASHO_FILL;	/*	 * Partition the tuples in the old bucket between the old bucket and the	 * new bucket, advancing along the old bucket's overflow bucket chain and	 * adding overflow pages to the new bucket as needed.	 */	ooffnum = FirstOffsetNumber;	omaxoffnum = PageGetMaxOffsetNumber(opage);	for (;;)	{		/*		 * at each iteration through this loop, each of these variables should		 * be up-to-date: obuf opage oopaque ooffnum omaxoffnum		 */		/* check if we're at the end of the page */		if (ooffnum > omaxoffnum)		{			/* at end of page, but check for an(other) overflow page */			oblkno = oopaque->hasho_nextblkno;			if (!BlockNumberIsValid(oblkno))				break;			/*			 * we ran out of tuples on this particular page, but we have more			 * overflow pages; advance to next page.			 */			_hash_wrtbuf(rel, obuf);			obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);			opage = BufferGetPage(obuf);			_hash_checkpage(rel, opage, LH_OVERFLOW_PAGE);			oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);			ooffnum = FirstOffsetNumber;			omaxoffnum = PageGetMaxOffsetNumber(opage);			continue;		}		/*		 * Re-hash the tuple to determine which bucket it now belongs in.		 *		 * It is annoying to call the hash function while holding locks, but		 * releasing and relocking the page for each tuple is unappealing too.		 */		hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));		itup = &(hitem->hash_itup);		datum = index_getattr(itup, 1, itupdesc, &null);		Assert(!null);		bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),									  maxbucket, highmask, lowmask);		if (bucket == nbucket)		{			/*			 * insert the tuple into the new bucket.  if it doesn't fit on the			 * current page in the new bucket, we must allocate a new overflow			 * page and place the tuple on that page instead.			 */			itemsz = IndexTupleDSize(hitem->hash_itup)				+ (sizeof(HashItemData) - sizeof(IndexTupleData));			itemsz = MAXALIGN(itemsz);			if (PageGetFreeSpace(npage) < itemsz)			{				/* write out nbuf and drop lock, but keep pin */				_hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);				/* chain to a new overflow page */				nbuf = _hash_addovflpage(rel, metabuf, nbuf);				npage = BufferGetPage(nbuf);				_hash_checkpage(rel, npage, LH_OVERFLOW_PAGE);				/* we don't need nopaque within the loop */			}			noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));			if (PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED)				== InvalidOffsetNumber)				elog(ERROR, "failed to add index item to \"%s\"",					 RelationGetRelationName(rel));			/*			 * now delete the tuple from the old bucket.  after this section			 * of code, 'ooffnum' will actually point to the ItemId to which			 * we would point if we had advanced it before the deletion			 * (PageIndexTupleDelete repacks the ItemId array).  this also			 * means that 'omaxoffnum' is exactly one less than it used to be,			 * so we really can just decrement it instead of calling			 * PageGetMaxOffsetNumber.			 */			PageIndexTupleDelete(opage, ooffnum);			omaxoffnum = OffsetNumberPrev(omaxoffnum);		}		else		{			/*			 * the tuple stays on this page.  we didn't move anything, so we			 * didn't delete anything and therefore we don't have to change			 * 'omaxoffnum'.			 */			Assert(bucket == obucket);			ooffnum = OffsetNumberNext(ooffnum);		}	}	/*	 * We're at the end of the old bucket chain, so we're done partitioning	 * the tuples.	Before quitting, call _hash_squeezebucket to ensure the	 * tuples remaining in the old bucket (including the overflow pages) are	 * packed as tightly as possible.  The new bucket is already tight.	 */	_hash_wrtbuf(rel, obuf);	_hash_wrtbuf(rel, nbuf);	_hash_squeezebucket(rel, obucket, start_oblkno);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -