📄 hashpage.c
字号:
/*------------------------------------------------------------------------- * * hashpage.c * Hash table page management code for the Postgres hash access method * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.52.2.1 2005/11/22 18:23:03 momjian Exp $ * * NOTES * Postgres hash pages look like ordinary relation pages. The opaque * data at high addresses includes information about the page including * whether a page is an overflow page or a true bucket, the bucket * number, and the block numbers of the preceding and following pages * in the same bucket. * * The first page in a hash relation, page zero, is special -- it stores * information describing the hash table; it is referred to as the * "meta page." Pages one and higher store the actual data. * * There are also bitmap pages, which are not manipulated here; * see hashovfl.c. * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/genam.h"#include "access/hash.h"#include "miscadmin.h"#include "storage/lmgr.h"#include "utils/lsyscache.h"static void _hash_splitbucket(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket, BlockNumber start_oblkno, BlockNumber start_nblkno, uint32 maxbucket, uint32 highmask, uint32 lowmask);/* * We use high-concurrency locking on hash indexes (see README for an overview * of the locking rules). However, we can skip taking lmgr locks when the * index is local to the current backend (ie, either temp or new in the * current transaction). No one else can see it, so there's no reason to * take locks. We still take buffer-level locks, but not lmgr locks. */#define USELOCKING(rel) (!RELATION_IS_LOCAL(rel))/* * _hash_getlock() -- Acquire an lmgr lock. * * 'whichlock' should be zero to acquire the split-control lock, or the * block number of a bucket's primary bucket page to acquire the per-bucket * lock. (See README for details of the use of these locks.) * * 'access' must be HASH_SHARE or HASH_EXCLUSIVE. */void_hash_getlock(Relation rel, BlockNumber whichlock, int access){ if (USELOCKING(rel)) LockPage(rel, whichlock, access);}/* * _hash_try_getlock() -- Acquire an lmgr lock, but only if it's free. * * Same as above except we return FALSE without blocking if lock isn't free. */bool_hash_try_getlock(Relation rel, BlockNumber whichlock, int access){ if (USELOCKING(rel)) return ConditionalLockPage(rel, whichlock, access); else return true;}/* * _hash_droplock() -- Release an lmgr lock. */void_hash_droplock(Relation rel, BlockNumber whichlock, int access){ if (USELOCKING(rel)) UnlockPage(rel, whichlock, access);}/* * _hash_getbuf() -- Get a buffer by block number for read or write. * * 'access' must be HASH_READ, HASH_WRITE, or HASH_NOLOCK. * * When this routine returns, the appropriate lock is set on the * requested buffer and its reference count has been incremented * (ie, the buffer is "locked and pinned"). * * XXX P_NEW is not used because, unlike the tree structures, we * need the bucket blocks to be at certain block numbers. we must * depend on the caller to call _hash_pageinit on the block if it * knows that this is a new block. */Buffer_hash_getbuf(Relation rel, BlockNumber blkno, int access){ Buffer buf; if (blkno == P_NEW) elog(ERROR, "hash AM does not use P_NEW"); buf = ReadBuffer(rel, blkno); if (access != HASH_NOLOCK) LockBuffer(buf, access); /* ref count and lock type are correct */ return buf;}/* * _hash_relbuf() -- release a locked buffer. * * Lock and pin (refcount) are both dropped. Note that either read or * write lock can be dropped this way, but if we modified the buffer, * this is NOT the right way to release a write lock. */void_hash_relbuf(Relation rel, Buffer buf){ LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf);}/* * _hash_dropbuf() -- release an unlocked buffer. * * This is used to unpin a buffer on which we hold no lock. It is assumed * that the buffer is not dirty. */void_hash_dropbuf(Relation rel, Buffer buf){ ReleaseBuffer(buf);}/* * _hash_wrtbuf() -- write a hash page to disk. * * This routine releases the lock held on the buffer and our refcount * for it. It is an error to call _hash_wrtbuf() without a write lock * and a pin on the buffer. * * NOTE: actually, the buffer manager just marks the shared buffer page * dirty here; the real I/O happens later. This is okay since we are not * relying on write ordering anyway. The WAL mechanism is responsible for * guaranteeing correctness after a crash. */void_hash_wrtbuf(Relation rel, Buffer buf){ LockBuffer(buf, BUFFER_LOCK_UNLOCK); WriteBuffer(buf);}/* * _hash_wrtnorelbuf() -- write a hash page to disk, but do not release * our reference or lock. * * It is an error to call _hash_wrtnorelbuf() without a write lock * and a pin on the buffer. * * See above NOTE. */void_hash_wrtnorelbuf(Relation rel, Buffer buf){ WriteNoReleaseBuffer(buf);}/* * _hash_chgbufaccess() -- Change the lock type on a buffer, without * dropping our pin on it. * * from_access and to_access may be HASH_READ, HASH_WRITE, or HASH_NOLOCK, * the last indicating that no buffer-level lock is held or wanted. * * When from_access == HASH_WRITE, we assume the buffer is dirty and tell * bufmgr it must be written out. If the caller wants to release a write * lock on a page that's not been modified, it's okay to pass from_access * as HASH_READ (a bit ugly, but handy in some places). */void_hash_chgbufaccess(Relation rel, Buffer buf, int from_access, int to_access){ if (from_access != HASH_NOLOCK) LockBuffer(buf, BUFFER_LOCK_UNLOCK); if (from_access == HASH_WRITE) WriteNoReleaseBuffer(buf); if (to_access != HASH_NOLOCK) LockBuffer(buf, to_access);}/* * _hash_metapinit() -- Initialize the metadata page of a hash index, * the two buckets that we begin with and the initial * bitmap page. * * We are fairly cavalier about locking here, since we know that no one else * could be accessing this index. In particular the rule about not holding * multiple buffer locks is ignored. */void_hash_metapinit(Relation rel){ HashMetaPage metap; HashPageOpaque pageopaque; Buffer metabuf; Buffer buf; Page pg; int32 data_width; int32 item_width; int32 ffactor; uint16 i; /* safety check */ if (RelationGetNumberOfBlocks(rel) != 0) elog(ERROR, "cannot initialize non-empty hash index \"%s\"", RelationGetRelationName(rel)); /* * Determine the target fill factor (tuples per bucket) for this index. * The idea is to make the fill factor correspond to pages about 3/4ths * full. We can compute it exactly if the index datatype is fixed-width, * but for var-width there's some guessing involved. */ data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid, RelationGetDescr(rel)->attrs[0]->atttypmod); item_width = MAXALIGN(sizeof(HashItemData)) + MAXALIGN(data_width) + sizeof(ItemIdData); /* include the line pointer */ ffactor = (BLCKSZ * 3 / 4) / item_width; /* keep to a sane range */ if (ffactor < 10) ffactor = 10; metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE); pg = BufferGetPage(metabuf); _hash_pageinit(pg, BufferGetPageSize(metabuf)); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber; pageopaque->hasho_bucket = -1; pageopaque->hasho_flag = LH_META_PAGE; pageopaque->hasho_filler = HASHO_FILL; metap = (HashMetaPage) pg; metap->hashm_magic = HASH_MAGIC; metap->hashm_version = HASH_VERSION; metap->hashm_ntuples = 0; metap->hashm_nmaps = 0; metap->hashm_ffactor = ffactor; metap->hashm_bsize = BufferGetPageSize(metabuf); /* find largest bitmap array size that will fit in page size */ for (i = _hash_log2(metap->hashm_bsize); i > 0; --i) { if ((1 << i) <= (metap->hashm_bsize - (MAXALIGN(sizeof(PageHeaderData)) + MAXALIGN(sizeof(HashPageOpaqueData))))) break; } Assert(i > 0); metap->hashm_bmsize = 1 << i; metap->hashm_bmshift = i + BYTE_TO_BIT; Assert((1 << BMPG_SHIFT(metap)) == (BMPG_MASK(metap) + 1)); metap->hashm_procid = index_getprocid(rel, 1, HASHPROC); /* * We initialize the index with two buckets, 0 and 1, occupying physical * blocks 1 and 2. The first freespace bitmap page is in block 3. */ metap->hashm_maxbucket = metap->hashm_lowmask = 1; /* nbuckets - 1 */ metap->hashm_highmask = 3; /* (nbuckets << 1) - 1 */ MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares)); MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp)); metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */ metap->hashm_ovflpoint = 1; metap->hashm_firstfree = 0; /* * Initialize the first two buckets */ for (i = 0; i <= 1; i++) { buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(metap, i), HASH_WRITE); pg = BufferGetPage(buf); _hash_pageinit(pg, BufferGetPageSize(buf)); pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg); pageopaque->hasho_prevblkno = InvalidBlockNumber; pageopaque->hasho_nextblkno = InvalidBlockNumber; pageopaque->hasho_bucket = i; pageopaque->hasho_flag = LH_BUCKET_PAGE; pageopaque->hasho_filler = HASHO_FILL; _hash_wrtbuf(rel, buf); } /* * Initialize first bitmap page. Can't do this until we create the first * two buckets, else smgr will complain. */ _hash_initbitmap(rel, metap, 3); /* all done */ _hash_wrtbuf(rel, metabuf);}/* * _hash_pageinit() -- Initialize a new hash index page. */void_hash_pageinit(Page page, Size size){ Assert(PageIsNew(page)); PageInit(page, size, sizeof(HashPageOpaqueData));}/* * Attempt to expand the hash table by creating one new bucket. * * This will silently do nothing if it cannot get the needed locks. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -