📄 hashpage.c
字号:
/*------------------------------------------------------------------------- * * hashpage.c * Hash table page management code for the Postgres hash access method * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.72 2008/01/01 19:45:46 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque * data at high addresses includes information about the page including * whether a page is an overflow page or a true bucket, the bucket * number, and the block numbers of the preceding and following pages * in the same bucket. * * The first page in a hash relation, page zero, is special -- it stores * information describing the hash table; it is referred to as the * "meta page." Pages one and higher store the actual data. * * There are also bitmap pages, which are not manipulated here; * see hashovfl.c. * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/genam.h"#include "access/hash.h"#include "miscadmin.h"#include "storage/lmgr.h"#include "storage/smgr.h"#include "utils/lsyscache.h"static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks);static void _hash_splitbucket(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, BlockNumber start_nblkno, uint32 maxbucket, uint32 highmask, uint32 lowmask);/* * We use high-concurrency locking on hash indexes (see README for an overview * of the locking rules). However, we can skip taking lmgr locks when the * index is local to the current backend (ie, either temp or new in the * current transaction). No one else can see it, so there's no reason to * take locks. We still take buffer-level locks, but not lmgr locks. */#define USELOCKING(rel) (!RELATION_IS_LOCAL(rel))/* * _hash_getlock() -- Acquire an lmgr lock. * * 'whichlock' should be zero to acquire the split-control lock, or the * block number of a bucket's primary bucket page to acquire the per-bucket * lock. (See README for details of the use of these locks.) * * 'access' must be HASH_SHARE or HASH_EXCLUSIVE. */void_hash_getlock(Relation rel, BlockNumber whichlock, int access){ if (USELOCKING(rel)) LockPage(rel, whichlock, access);}/* * _hash_try_getlock() -- Acquire an lmgr lock, but only if it's free. * * Same as above except we return FALSE without blocking if lock isn't free. */bool_hash_try_getlock(Relation rel, BlockNumber whichlock, int access){ if (USELOCKING(rel)) return ConditionalLockPage(rel, whichlock, access); else return true;}/* * _hash_droplock() -- Release an lmgr lock. */void_hash_droplock(Relation rel, BlockNumber whichlock, int access){ if (USELOCKING(rel)) UnlockPage(rel, whichlock, access);}/* * _hash_getbuf() -- Get a buffer by block number for read or write. * * 'access' must be HASH_READ, HASH_WRITE, or HASH_NOLOCK. * 'flags' is a bitwise OR of the allowed page types. * * This must be used only to fetch pages that are expected to be valid * already. _hash_checkpage() is applied using the given flags. * * When this routine returns, the appropriate lock is set on the * requested buffer and its reference count has been incremented * (ie, the buffer is "locked and pinned"). * * P_NEW is disallowed because this routine can only be used * to access pages that are known to be before the filesystem EOF. * Extending the index should be done with _hash_getnewbuf. */Buffer_hash_getbuf(Relation rel, BlockNumber blkno, int access, int flags){ Buffer buf; if (blkno == P_NEW) elog(ERROR, "hash AM does not use P_NEW"); buf = ReadBuffer(rel, blkno); if (access != HASH_NOLOCK) LockBuffer(buf, access); /* ref count and lock type are correct */ _hash_checkpage(rel, buf, flags); return buf;}/* * _hash_getinitbuf() -- Get and initialize a buffer by block number. * * This must be used only to fetch pages that are known to be before * the index's filesystem EOF, but are to be filled from scratch. * _hash_pageinit() is applied automatically. Otherwise it has * effects similar to _hash_getbuf() with access = HASH_WRITE. * * When this routine returns, a write lock is set on the * requested buffer and its reference count has been incremented * (ie, the buffer is "locked and pinned"). * * P_NEW is disallowed because this routine can only be used * to access pages that are known to be before the filesystem EOF. * Extending the index should be done with _hash_getnewbuf. */Buffer_hash_getinitbuf(Relation rel, BlockNumber blkno){ Buffer buf; if (blkno == P_NEW) elog(ERROR, "hash AM does not use P_NEW"); buf = ReadOrZeroBuffer(rel, blkno); LockBuffer(buf, HASH_WRITE); /* ref count and lock type are correct */ /* initialize the page */ _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf)); return buf;}/* * _hash_getnewbuf() -- Get a new page at the end of the index. * * This has the same API as _hash_getinitbuf, except that we are adding * a page to the index, and hence expect the page to be past the * logical EOF. (However, we have to support the case where it isn't, * since a prior try might have crashed after extending the filesystem * EOF but before updating the metapage to reflect the added page.) * * It is caller's responsibility to ensure that only one process can * extend the index at a time. */Buffer_hash_getnewbuf(Relation rel, BlockNumber blkno){ BlockNumber nblocks = RelationGetNumberOfBlocks(rel); Buffer buf; if (blkno == P_NEW) elog(ERROR, "hash AM does not use P_NEW"); if (blkno > nblocks) elog(ERROR, "access to noncontiguous page in hash index \"%s\"", RelationGetRelationName(rel)); /* smgr insists we use P_NEW to extend the relation */ if (blkno == nblocks) { buf = ReadBuffer(rel, P_NEW); if (BufferGetBlockNumber(buf) != blkno) elog(ERROR, "unexpected hash relation size: %u, should be %u", BufferGetBlockNumber(buf), blkno); } else buf = ReadOrZeroBuffer(rel, blkno); LockBuffer(buf, HASH_WRITE); /* ref count and lock type are correct */ /* initialize the page */ _hash_pageinit(BufferGetPage(buf), BufferGetPageSize(buf)); return buf;}/* * _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy. * * This is identical to _hash_getbuf() but also allows a buffer access * strategy to be specified. We use this for VACUUM operations. */Buffer_hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, int access, int flags, BufferAccessStrategy bstrategy){ Buffer buf; if (blkno == P_NEW) elog(ERROR, "hash AM does not use P_NEW"); buf = ReadBufferWithStrategy(rel, blkno, bstrategy); if (access != HASH_NOLOCK) LockBuffer(buf, access); /* ref count and lock type are correct */ _hash_checkpage(rel, buf, flags); return buf;}/* * _hash_relbuf() -- release a locked buffer. * * Lock and pin (refcount) are both dropped. */void_hash_relbuf(Relation rel, Buffer buf){ UnlockReleaseBuffer(buf);}/* * _hash_dropbuf() -- release an unlocked buffer. * * This is used to unpin a buffer on which we hold no lock. */void_hash_dropbuf(Relation rel, Buffer buf){ ReleaseBuffer(buf);}/* * _hash_wrtbuf() -- write a hash page to disk. * * This routine releases the lock held on the buffer and our refcount * for it. It is an error to call _hash_wrtbuf() without a write lock * and a pin on the buffer. * * NOTE: this routine should go away when/if hash indexes are WAL-ified. * The correct sequence of operations is to mark the buffer dirty, then * write the WAL record, then release the lock and pin; so marking dirty * can't be combined with releasing. */void_hash_wrtbuf(Relation rel, Buffer buf){ MarkBufferDirty(buf); UnlockReleaseBuffer(buf);}/* * _hash_chgbufaccess() -- Change the lock type on a buffer, without * dropping our pin on it. * * from_access and to_access may be HASH_READ, HASH_WRITE, or HASH_NOLOCK, * the last indicating that no buffer-level lock is held or wanted. * * When from_access == HASH_WRITE, we assume the buffer is dirty and tell * bufmgr it must be written out. If the caller wants to release a write * lock on a page that's not been modified, it's okay to pass from_access * as HASH_READ (a bit ugly, but handy in some places). */void_hash_chgbufaccess(Relation rel, Buffer buf, int from_access, int to_access){ if (from_access == HASH_WRITE) MarkBufferDirty(buf); if (from_access != HASH_NOLOCK) LockBuffer(buf, BUFFER_LOCK_UNLOCK); if (to_access != HASH_NOLOCK) LockBuffer(buf, to_access);}/* * _hash_metapinit() -- Initialize the metadata page of a hash index, * the two buckets that we begin with and the initial * bitmap page. * * We are fairly cavalier about locking here, since we know that no one else * could be accessing this index. In particular the rule about not holding * multiple buffer locks is ignored. */void_hash_metapinit(Relation rel){ HashMetaPage metap; HashPageOpaque pageopaque; Buffer metabuf; Buffer buf; Page pg; int32 data_width; int32 item_width; int32 ffactor; uint16 i; /* safety check */ if (RelationGetNumberOfBlocks(rel) != 0) elog(ERROR, "cannot initialize non-empty hash index \"%s\"", RelationGetRelationName(rel)); /* * Determine the target fill factor (in tuples per bucket) for this index. * The idea is to make the fill factor correspond to pages about as full * as the user-settable fillfactor parameter says. We can compute it * exactly if the index datatype is fixed-width, but for var-width there's * some guessing involved. */ data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid, RelationGetDescr(rel)->attrs[0]->atttypmod); item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / item_width; /* keep to a sane range */ if (ffactor < 10) ffactor = 10; /* * We initialize the metapage, the first two bucket pages, and the first * bitmap page in sequence, using _hash_getnewbuf to cause smgrextend() * calls to occur. This ensures that the smgr level has the right idea of * the physical index length. */ metabuf = _hash_getnewbuf(rel, HASH_METAPAGE); pg = BufferGetPage(metabuf); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber; pageopaque->hasho_bucket = -1; pageopaque->hasho_flag = LH_META_PAGE; pageopaque->hasho_page_id = HASHO_PAGE_ID; metap = (HashMetaPage) pg; metap->hashm_magic = HASH_MAGIC; metap->hashm_version = HASH_VERSION; metap->hashm_ntuples = 0; metap->hashm_nmaps = 0; metap->hashm_ffactor = ffactor; metap->hashm_bsize = BufferGetPageSize(metabuf); /* find largest bitmap array size that will fit in page size */ for (i = _hash_log2(metap->hashm_bsize); i > 0; --i) { if ((1 << i) <= (metap->hashm_bsize - (MAXALIGN(sizeof(PageHeaderData)) + MAXALIGN(sizeof(HashPageOpaqueData))))) break; } Assert(i > 0); metap->hashm_bmsize = 1 << i; metap->hashm_bmshift = i + BYTE_TO_BIT; Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1)); /* * Label the index with its primary hash support function's OID. This is * pretty useless for normal operation (in fact, hashm_procid is not used * anywhere), but it might be handy for forensic purposes so we keep it. */ metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); /* * We initialize the index with two buckets, 0 and 1, occupying physical * blocks 1 and 2. The first freespace bitmap page is in block 3. */ metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares)); MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */ metap->hashm_ovflpoint = 1; metap->hashm_firstfree = 0; /* * Initialize the first two buckets */ for (i = 0; i <= 1; i++) { buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i)); pg = BufferGetPage(buf); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber; pageopaque->hasho_bucket = i; pageopaque->hasho_flag = LH_BUCKET_PAGE; pageopaque->hasho_page_id = HASHO_PAGE_ID; _hash_wrtbuf(rel, buf); } /* * Initialize first bitmap page */ _hash_initbitmap(rel, metap, 3); /* all done */ _hash_wrtbuf(rel, metabuf);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -