📄 hashpage.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
/* *	_hash_pageinit() -- Initialize a new hash index page. */void_hash_pageinit(Page page, Size size){	Assert(PageIsNew(page));	PageInit(page, size, sizeof(HashPageOpaqueData));}/* * Attempt to expand the hash table by creating one new bucket. * * This will silently do nothing if it cannot get the needed locks. * * The caller should hold no locks on the hash index. * * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state. */void_hash_expandtable(Relation rel, Buffer metabuf){	HashMetaPage metap;	Bucket		old_bucket;	Bucket		new_bucket;	uint32		spare_ndx;	BlockNumber start_oblkno;	BlockNumber start_nblkno;	uint32		maxbucket;	uint32		highmask;	uint32		lowmask;	/*	 * Obtain the page-zero lock to assert the right to begin a split (see	 * README).	 *	 * Note: deadlock should be impossible here. Our own backend could only be	 * holding bucket sharelocks due to stopped indexscans; those will not	 * block other holders of the page-zero lock, who are only interested in	 * acquiring bucket sharelocks themselves.	Exclusive bucket locks are	 * only taken here and in hashbulkdelete, and neither of these operations	 * needs any additional locks to complete.	(If, due to some flaw in this	 * reasoning, we manage to deadlock anyway, it's okay to error out; the	 * index will be left in a consistent state.)	 */	_hash_getlock(rel, 0, HASH_EXCLUSIVE);	/* Write-lock the meta page */	_hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);	_hash_checkpage(rel, metabuf, LH_META_PAGE);	metap = (HashMetaPage) BufferGetPage(metabuf);	/*	 * Check to see if split is still needed; someone else might have already	 * done one while we waited for the lock.	 *	 * Make sure this stays in sync with _hash_doinsert()	 */	if (metap->hashm_ntuples <=		(double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))		goto fail;	/*	 * Can't split anymore if maxbucket has reached its maximum possible	 * value.	 *	 * Ideally we'd allow bucket numbers up to UINT_MAX-1 (no higher because	 * the calculation maxbucket+1 mustn't overflow).  Currently we restrict	 * to half that because of overflow looping in _hash_log2() and	 * insufficient space in hashm_spares[].  It's moot anyway because an	 * index with 2^32 buckets would certainly overflow BlockNumber and hence	 * _hash_alloc_buckets() would fail, but if we supported buckets smaller	 * than a disk block then this would be an independent constraint.	 */	if (metap->hashm_maxbucket >= (uint32) 0x7FFFFFFE)		goto fail;	/*	 * Determine which bucket is to be split, and attempt to lock the old	 * bucket.	If we can't get the lock, give up.	 *	 * The lock protects us against other backends, but not against our own	 * backend.  Must check for active scans separately.	 */	new_bucket = metap->hashm_maxbucket + 1;	old_bucket = (new_bucket & metap->hashm_lowmask);	start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);	if (_hash_has_active_scan(rel, old_bucket))		goto fail;	if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))		goto fail;	/*	 * Likewise lock the new bucket (should never fail).	 *	 * Note: it is safe to compute the new bucket's blkno here, even though we	 * may still need to update the BUCKET_TO_BLKNO mapping.  This is because	 * the current value of hashm_spares[hashm_ovflpoint] correctly shows	 * where we are going to put a new splitpoint's worth of buckets.	 */	start_nblkno = BUCKET_TO_BLKNO(metap, new_bucket);	if (_hash_has_active_scan(rel, new_bucket))		elog(ERROR, "scan in progress on supposedly new bucket");	if (!_hash_try_getlock(rel, start_nblkno, HASH_EXCLUSIVE))		elog(ERROR, "could not get lock on supposedly new bucket");	/*	 * If the split point is increasing (hashm_maxbucket's log base 2	 * increases), we need to allocate a new batch of bucket pages.	 */	spare_ndx = _hash_log2(new_bucket + 1);	if (spare_ndx > metap->hashm_ovflpoint)	{		Assert(spare_ndx == metap->hashm_ovflpoint + 1);		/*		 * The number of buckets in the new splitpoint is equal to the total		 * number already in existence, i.e. new_bucket.  Currently this maps		 * one-to-one to blocks required, but someday we may need a more		 * complicated calculation here.		 */		if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))		{			/* can't split due to BlockNumber overflow */			_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);			_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);			goto fail;		}	}	/*	 * Okay to proceed with split.	Update the metapage bucket mapping info.	 *	 * Since we are scribbling on the metapage data right in the shared	 * buffer, any failure in this next little bit leaves us with a big	 * problem: the metapage is effectively corrupt but could get written back	 * to disk.  We don't really expect any failure, but just to be sure,	 * establish a critical section.	 */	START_CRIT_SECTION();	metap->hashm_maxbucket = new_bucket;	if (new_bucket > metap->hashm_highmask)	{		/* Starting a new doubling */		metap->hashm_lowmask = metap->hashm_highmask;		metap->hashm_highmask = new_bucket | metap->hashm_lowmask;	}	/*	 * If the split point is increasing (hashm_maxbucket's log base 2	 * increases), we need to adjust the hashm_spares[] array and	 * hashm_ovflpoint so that future overflow pages will be created beyond	 * this new batch of bucket pages.	 */	if (spare_ndx > metap->hashm_ovflpoint)	{		metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];		metap->hashm_ovflpoint = spare_ndx;	}	/* Done mucking with metapage */	END_CRIT_SECTION();	/*	 * Copy bucket mapping info now; this saves re-accessing the meta page	 * inside _hash_splitbucket's inner loop.  Note that once we drop the	 * split lock, other splits could begin, so these values might be out of	 * date before _hash_splitbucket finishes.	That's okay, since all it	 * needs is to tell which of these two buckets to map hashkeys into.	 */	maxbucket = metap->hashm_maxbucket;	highmask = metap->hashm_highmask;	lowmask = metap->hashm_lowmask;	/* Write out the metapage and drop lock, but keep pin */	_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);	/* Release split lock; okay for other splits to occur now */	_hash_droplock(rel, 0, HASH_EXCLUSIVE);	/* Relocate records to the new bucket */	_hash_splitbucket(rel, metabuf, old_bucket, new_bucket,					  start_oblkno, start_nblkno,					  maxbucket, highmask, lowmask);	/* Release bucket locks, allowing others to access them */	_hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);	_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);	return;	/* Here if decide not to split or fail to acquire old bucket lock */fail:	/* We didn't write the metapage, so just drop lock */	_hash_chgbufaccess(rel, metabuf, HASH_READ, HASH_NOLOCK);	/* Release split lock */	_hash_droplock(rel, 0, HASH_EXCLUSIVE);}/* * _hash_alloc_buckets -- allocate a new splitpoint's worth of bucket pages * * This does not need to initialize the new bucket pages; we'll do that as * each one is used by _hash_expandtable().  But we have to extend the logical * EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in * sync with ours, so that we don't get complaints from smgr. * * We do this by writing a page of zeroes at the end of the splitpoint range. * We expect that the filesystem will ensure that the intervening pages read * as zeroes too.  On many filesystems this "hole" will not be allocated * immediately, which means that the index file may end up more fragmented * than if we forced it all to be allocated now; but since we don't scan * hash indexes sequentially anyway, that probably doesn't matter. * * XXX It's annoying that this code is executed with the metapage lock held. * We need to interlock against _hash_getovflpage() adding a new overflow page * concurrently, but it'd likely be better to use LockRelationForExtension * for the purpose.  OTOH, adding a splitpoint is a very infrequent operation, * so it may not be worth worrying about. * * Returns TRUE if successful, or FALSE if allocation failed due to * BlockNumber overflow. */static bool_hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks){	BlockNumber lastblock;	char		zerobuf[BLCKSZ];	lastblock = firstblock + nblocks - 1;	/*	 * Check for overflow in block number calculation; if so, we cannot extend	 * the index anymore.	 */	if (lastblock < firstblock || lastblock == InvalidBlockNumber)		return false;	MemSet(zerobuf, 0, sizeof(zerobuf));	RelationOpenSmgr(rel);	smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);	return true;}/* * _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket' * * We are splitting a bucket that consists of a base bucket page and zero * or more overflow (bucket chain) pages.  We must relocate tuples that * belong in the new bucket, and compress out any free space in the old * bucket. * * The caller must hold exclusive locks on both buckets to ensure that * no one else is trying to access them (see README). * * The caller must hold a pin, but no lock, on the metapage buffer. * The buffer is returned in the same state.  (The metapage is only * touched if it becomes necessary to add or remove overflow pages.) */static void_hash_splitbucket(Relation rel,				  Buffer metabuf,				  Bucket obucket,				  Bucket nbucket,				  BlockNumber start_oblkno,				  BlockNumber start_nblkno,				  uint32 maxbucket,				  uint32 highmask,				  uint32 lowmask){	Bucket		bucket;	Buffer		obuf;	Buffer		nbuf;	BlockNumber oblkno;	BlockNumber nblkno;	bool		null;	Datum		datum;	HashPageOpaque oopaque;	HashPageOpaque nopaque;	IndexTuple	itup;	Size		itemsz;	OffsetNumber ooffnum;	OffsetNumber noffnum;	OffsetNumber omaxoffnum;	Page		opage;	Page		npage;	TupleDesc	itupdesc = RelationGetDescr(rel);	/*	 * It should be okay to simultaneously write-lock pages from each bucket,	 * since no one else can be trying to acquire buffer lock on pages of	 * either bucket.	 */	oblkno = start_oblkno;	obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_BUCKET_PAGE);	opage = BufferGetPage(obuf);	oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);	nblkno = start_nblkno;	nbuf = _hash_getnewbuf(rel, nblkno);	npage = BufferGetPage(nbuf);	/* initialize the new bucket's primary page */	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);	nopaque->hasho_prevblkno = InvalidBlockNumber;	nopaque->hasho_nextblkno = InvalidBlockNumber;	nopaque->hasho_bucket = nbucket;	nopaque->hasho_flag = LH_BUCKET_PAGE;	nopaque->hasho_page_id = HASHO_PAGE_ID;	/*	 * Partition the tuples in the old bucket between the old bucket and the	 * new bucket, advancing along the old bucket's overflow bucket chain and	 * adding overflow pages to the new bucket as needed.	 */	ooffnum = FirstOffsetNumber;	omaxoffnum = PageGetMaxOffsetNumber(opage);	for (;;)	{		/*		 * at each iteration through this loop, each of these variables should		 * be up-to-date: obuf opage oopaque ooffnum omaxoffnum		 */		/* check if we're at the end of the page */		if (ooffnum > omaxoffnum)		{			/* at end of page, but check for an(other) overflow page */			oblkno = oopaque->hasho_nextblkno;			if (!BlockNumberIsValid(oblkno))				break;			/*			 * we ran out of tuples on this particular page, but we have more			 * overflow pages; advance to next page.			 */			_hash_wrtbuf(rel, obuf);			obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);			opage = BufferGetPage(obuf);			oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);			ooffnum = FirstOffsetNumber;			omaxoffnum = PageGetMaxOffsetNumber(opage);			continue;		}		/*		 * Re-hash the tuple to determine which bucket it now belongs in.		 *		 * It is annoying to call the hash function while holding locks, but		 * releasing and relocking the page for each tuple is unappealing too.		 */		itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));		datum = index_getattr(itup, 1, itupdesc, &null);		Assert(!null);		bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),									  maxbucket, highmask, lowmask);		if (bucket == nbucket)		{			/*			 * insert the tuple into the new bucket.  if it doesn't fit on the			 * current page in the new bucket, we must allocate a new overflow			 * page and place the tuple on that page instead.			 */			itemsz = IndexTupleDSize(*itup);			itemsz = MAXALIGN(itemsz);			if (PageGetFreeSpace(npage) < itemsz)			{				/* write out nbuf and drop lock, but keep pin */				_hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);				/* chain to a new overflow page */				nbuf = _hash_addovflpage(rel, metabuf, nbuf);				npage = BufferGetPage(nbuf);				/* we don't need nopaque within the loop */			}			noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));			if (PageAddItem(npage, (Item) itup, itemsz, noffnum, false, false)				== InvalidOffsetNumber)				elog(ERROR, "failed to add index item to \"%s\"",					 RelationGetRelationName(rel));			/*			 * now delete the tuple from the old bucket.  after this section			 * of code, 'ooffnum' will actually point to the ItemId to which			 * we would point if we had advanced it before the deletion			 * (PageIndexTupleDelete repacks the ItemId array).  this also			 * means that 'omaxoffnum' is exactly one less than it used to be,			 * so we really can just decrement it instead of calling			 * PageGetMaxOffsetNumber.			 */			PageIndexTupleDelete(opage, ooffnum);			omaxoffnum = OffsetNumberPrev(omaxoffnum);		}		else		{			/*			 * the tuple stays on this page.  we didn't move anything, so we			 * didn't delete anything and therefore we don't have to change			 * 'omaxoffnum'.			 */			Assert(bucket == obucket);			ooffnum = OffsetNumberNext(ooffnum);		}	}	/*	 * We're at the end of the old bucket chain, so we're done partitioning	 * the tuples.	Before quitting, call _hash_squeezebucket to ensure the	 * tuples remaining in the old bucket (including the overflow pages) are	 * packed as tightly as possible.  The new bucket is already tight.	 */	_hash_wrtbuf(rel, obuf);	_hash_wrtbuf(rel, nbuf);	_hash_squeezebucket(rel, obucket, start_oblkno, NULL);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -