📄 multixact.c
字号:
/*------------------------------------------------------------------------- * * multixact.c * PostgreSQL multi-transaction-log manager * * The pg_multixact manager is a pg_clog-like manager that stores an array * of TransactionIds for each MultiXactId. It is a fundamental part of the * shared-row-lock implementation. A share-locked tuple stores a * MultiXactId in its Xmax, and a transaction that needs to wait for the * tuple to be unlocked can sleep on the potentially-several TransactionIds * that compose the MultiXactId. * * We use two SLRU areas, one for storing the offsets at which the data * starts for each MultiXactId in the other one. This trick allows us to * store variable length arrays of TransactionIds. (We could alternatively * use one area containing counts and TransactionIds, with valid MultiXactId * values pointing at slots containing counts; but that way seems less robust * since it would get completely confused if someone inquired about a bogus * MultiXactId that pointed to an intermediate slot containing an XID.) * * XLOG interactions: this module generates an XLOG record whenever a new * OFFSETs or MEMBERs page is initialized to zeroes, as well as an XLOG record * whenever a new MultiXactId is defined. This allows us to completely * rebuild the data entered since the last checkpoint during XLOG replay. * Because this is possible, we need not follow the normal rule of * "write WAL before data"; the only correctness guarantee needed is that * we flush and sync all dirty OFFSETs and MEMBERs pages to disk before a * checkpoint is considered complete. If a page does make it to disk ahead * of corresponding WAL records, it will be forcibly zeroed before use anyway. * Therefore, we don't need to mark our pages with LSN information; we have * enough synchronization already. * * Like clog.c, and unlike subtrans.c, we have to preserve state across * crashes and ensure that MXID and offset numbering increases monotonically * across a crash. We do this in the same way as it's done for transaction * IDs: the WAL record is guaranteed to contain evidence of every MXID we * could need to worry about, and we just make sure that at the end of * replay, the next-MXID and next-offset counters are at least as large as * anything we saw during replay. * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.27 2008/01/01 19:45:46 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/multixact.h"#include "access/slru.h"#include "access/transam.h"#include "access/xact.h"#include "miscadmin.h"#include "storage/backendid.h"#include "storage/lmgr.h"#include "utils/memutils.h"#include "storage/procarray.h"/* * Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is * used everywhere else in Postgres. * * Note: because both MultiXactOffsets and TransactionIds are 32 bits and * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no * explicit notice of that fact in this module, except when comparing segment * and page numbers in TruncateMultiXact * (see MultiXact{Offset,Member}PagePrecedes). *//* We need four bytes per offset and also four bytes per member */#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))#define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))#define MultiXactIdToOffsetPage(xid) \ ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)#define MultiXactIdToOffsetEntry(xid) \ ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)#define MXOffsetToMemberPage(xid) \ ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)#define MXOffsetToMemberEntry(xid) \ ((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)/* * Links to shared-memory data structures for MultiXact control */static SlruCtlData MultiXactOffsetCtlData;static SlruCtlData MultiXactMemberCtlData;#define MultiXactOffsetCtl (&MultiXactOffsetCtlData)#define MultiXactMemberCtl (&MultiXactMemberCtlData)/* * MultiXact state shared across all backends. All this state is protected * by MultiXactGenLock. (We also use MultiXactOffsetControlLock and * MultiXactMemberControlLock to guard accesses to the two sets of SLRU * buffers. For concurrency's sake, we avoid holding more than one of these * locks at a time.) */typedef struct MultiXactStateData{ /* next-to-be-assigned MultiXactId */ MultiXactId nextMXact; /* next-to-be-assigned offset */ MultiXactOffset nextOffset; /* the Offset SLRU area was last truncated at this MultiXactId */ MultiXactId lastTruncationPoint; /* * Per-backend data starts here. We have two arrays stored in the area * immediately following the MultiXactStateData struct. Each is indexed by * BackendId. (Note: valid BackendIds run from 1 to MaxBackends; element * zero of each array is never used.) * * OldestMemberMXactId[k] is the oldest MultiXactId each backend's current * transaction(s) could possibly be a member of, or InvalidMultiXactId * when the backend has no live transaction that could possibly be a * member of a MultiXact. Each backend sets its entry to the current * nextMXact counter just before first acquiring a shared lock in a given * transaction, and clears it at transaction end. (This works because only * during or after acquiring a shared lock could an XID possibly become a * member of a MultiXact, and that MultiXact would have to be created * during or after the lock acquisition.) * * OldestVisibleMXactId[k] is the oldest MultiXactId each backend's * current transaction(s) think is potentially live, or InvalidMultiXactId * when not in a transaction or not in a transaction that's paid any * attention to MultiXacts yet. This is computed when first needed in a * given transaction, and cleared at transaction end. We can compute it * as the minimum of the valid OldestMemberMXactId[] entries at the time * we compute it (using nextMXact if none are valid). Each backend is * required not to attempt to access any SLRU data for MultiXactIds older * than its own OldestVisibleMXactId[] setting; this is necessary because * the checkpointer could truncate away such data at any instant. * * The checkpointer can compute the safe truncation point as the oldest * valid value among all the OldestMemberMXactId[] and * OldestVisibleMXactId[] entries, or nextMXact if none are valid. * Clearly, it is not possible for any later-computed OldestVisibleMXactId * value to be older than this, and so there is no risk of truncating data * that is still needed. */ MultiXactId perBackendXactIds[1]; /* VARIABLE LENGTH ARRAY */} MultiXactStateData;/* Pointers to the state data in shared memory */static MultiXactStateData *MultiXactState;static MultiXactId *OldestMemberMXactId;static MultiXactId *OldestVisibleMXactId;/* * Definitions for the backend-local MultiXactId cache. * * We use this cache to store known MultiXacts, so we don't need to go to * SLRU areas everytime. * * The cache lasts for the duration of a single transaction, the rationale * for this being that most entries will contain our own TransactionId and * so they will be uninteresting by the time our next transaction starts. * (XXX not clear that this is correct --- other members of the MultiXact * could hang around longer than we did. However, it's not clear what a * better policy for flushing old cache entries would be.) * * We allocate the cache entries in a memory context that is deleted at * transaction end, so we don't need to do retail freeing of entries. */typedef struct mXactCacheEnt{ struct mXactCacheEnt *next; MultiXactId multi; int nxids; TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */} mXactCacheEnt;static mXactCacheEnt *MXactCache = NULL;static MemoryContext MXactContext = NULL;#ifdef MULTIXACT_DEBUG#define debug_elog2(a,b) elog(a,b)#define debug_elog3(a,b,c) elog(a,b,c)#define debug_elog4(a,b,c,d) elog(a,b,c,d)#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)#else#define debug_elog2(a,b)#define debug_elog3(a,b,c)#define debug_elog4(a,b,c,d)#define debug_elog5(a,b,c,d,e)#endif/* internal MultiXactId management */static void MultiXactIdSetOldestVisible(void);static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nxids, TransactionId *xids);static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);/* MultiXact cache management */static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);static int mXactCacheGetById(MultiXactId multi, TransactionId **xids);static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids);static int xidComparator(const void *arg1, const void *arg2);#ifdef MULTIXACT_DEBUGstatic char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);#endif/* management of SLRU infrastructure */static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog);static int ZeroMultiXactMemberPage(int pageno, bool writeXlog);static bool MultiXactOffsetPagePrecedes(int page1, int page2);static bool MultiXactMemberPagePrecedes(int page1, int page2);static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2);static void ExtendMultiXactOffset(MultiXactId multi);static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);static void TruncateMultiXact(void);static void WriteMZeroPageXlogRec(int pageno, uint8 info);/* * MultiXactIdCreate * Construct a MultiXactId representing two TransactionIds. * * The two XIDs must be different. * * NB - we don't worry about our local MultiXactId cache here, because that * is handled by the lower-level routines. */MultiXactIdMultiXactIdCreate(TransactionId xid1, TransactionId xid2){ MultiXactId newMulti; TransactionId xids[2]; AssertArg(TransactionIdIsValid(xid1)); AssertArg(TransactionIdIsValid(xid2)); Assert(!TransactionIdEquals(xid1, xid2)); /* * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs * are still running. In typical usage, xid2 will be our own XID and the * caller just did a check on xid1, so it'd be wasted effort. */ xids[0] = xid1; xids[1] = xid2; newMulti = CreateMultiXactId(2, xids); debug_elog5(DEBUG2, "Create: returning %u for %u, %u", newMulti, xid1, xid2); return newMulti;}/* * MultiXactIdExpand * Add a TransactionId to a pre-existing MultiXactId. * * If the TransactionId is already a member of the passed MultiXactId, * just return it as-is. * * Note that we do NOT actually modify the membership of a pre-existing * MultiXactId; instead we create a new one. This is necessary to avoid * a race condition against MultiXactIdWait (see notes there). * * NB - we don't worry about our local MultiXactId cache here, because that * is handled by the lower-level routines. */MultiXactIdMultiXactIdExpand(MultiXactId multi, TransactionId xid){ MultiXactId newMulti; TransactionId *members; TransactionId *newMembers; int nmembers; int i; int j; AssertArg(MultiXactIdIsValid(multi)); AssertArg(TransactionIdIsValid(xid)); debug_elog4(DEBUG2, "Expand: received multi %u, xid %u", multi, xid); nmembers = GetMultiXactIdMembers(multi, &members); if (nmembers < 0) { /* * The MultiXactId is obsolete. This can only happen if all the * MultiXactId members stop running between the caller checking and * passing it to us. It would be better to return that fact to the * caller, but it would complicate the API and it's unlikely to happen * too often, so just deal with it by creating a singleton MultiXact. */ newMulti = CreateMultiXactId(1, &xid); debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u", multi, newMulti); return newMulti; } /* * If the TransactionId is already a member of the MultiXactId, just * return the existing MultiXactId. */ for (i = 0; i < nmembers; i++) { if (TransactionIdEquals(members[i], xid)) { debug_elog4(DEBUG2, "Expand: %u is already a member of %u", xid, multi); pfree(members); return multi; } } /* * Determine which of the members of the MultiXactId are still running, * and use them to create a new one. (Removing dead members is just an * optimization, but a useful one. Note we have the same race condition * here as above: j could be 0 at the end of the loop.) */ newMembers = (TransactionId *) palloc(sizeof(TransactionId) * (nmembers + 1)); for (i = 0, j = 0; i < nmembers; i++) { if (TransactionIdIsInProgress(members[i])) newMembers[j++] = members[i]; } newMembers[j++] = xid; newMulti = CreateMultiXactId(j, newMembers); pfree(members); pfree(newMembers); debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti); return newMulti;}/* * MultiXactIdIsRunning * Returns whether a MultiXactId is "running". * * We return true if at least one member of the given MultiXactId is still * running. Note that a "false" result is certain not to change, * because it is not legal to add members to an existing MultiXactId. */boolMultiXactIdIsRunning(MultiXactId multi){ TransactionId *members; int nmembers; int i; debug_elog3(DEBUG2, "IsRunning %u?", multi); nmembers = GetMultiXactIdMembers(multi, &members); if (nmembers < 0) { debug_elog2(DEBUG2, "IsRunning: no members"); return false; } /* * Checking for myself is cheap compared to looking in shared memory, so * first do the equivalent of MultiXactIdIsCurrent(). This is not needed * for correctness, it's just a fast path. */ for (i = 0; i < nmembers; i++) { if (TransactionIdIsCurrentTransactionId(members[i])) { debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i); pfree(members); return true; } } /* * This could be made faster by having another entry point in procarray.c, * walking the PGPROC array only once for all the members. But in most * cases nmembers should be small enough that it doesn't much matter. */ for (i = 0; i < nmembers; i++) { if (TransactionIdIsInProgress(members[i])) { debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running", i, members[i]); pfree(members); return true; } } pfree(members); debug_elog3(DEBUG2, "IsRunning: %u is not running", multi); return false;}/* * MultiXactIdIsCurrent * Returns true if the current transaction is a member of the MultiXactId. * * We return true if any live subtransaction of the current top-level * transaction is a member. This is appropriate for the same reason that a * lock held by any such subtransaction is globally equivalent to a lock * held by the current subtransaction: no such lock could be released without * aborting this subtransaction, and hence releasing its locks. So it's not * necessary to add the current subxact to the MultiXact separately. */boolMultiXactIdIsCurrent(MultiXactId multi){ bool result = false; TransactionId *members; int nmembers; int i; nmembers = GetMultiXactIdMembers(multi, &members); if (nmembers < 0) return false; for (i = 0; i < nmembers; i++) { if (TransactionIdIsCurrentTransactionId(members[i])) { result = true; break; } } pfree(members); return result;}/* * MultiXactIdSetOldestMember * Save the oldest MultiXactId this transaction could be a member of. * * We set the OldestMemberMXactId for a given transaction the first time * it's going to acquire a shared lock. We need to do this even if we end * up using a TransactionId instead of a MultiXactId, because there is a * chance that another transaction would add our XID to a MultiXactId. * * The value to set is the next-to-be-assigned MultiXactId, so this is meant * to be called just before acquiring a shared lock. */voidMultiXactIdSetOldestMember(void){ if (!MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])) { MultiXactId nextMXact; /* * You might think we don't need to acquire a lock here, since * fetching and storing of TransactionIds is probably atomic, but in * fact we do: suppose we pick up nextMXact and then lose the CPU for * a long time. Someone else could advance nextMXact, and then * another someone else could compute an OldestVisibleMXactId that * would be after the value we are going to store when we get control * back. Which would be wrong. */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); /* * We have to beware of the possibility that nextMXact is in the * wrapped-around state. We don't fix the counter itself here, but we * must be sure to store a valid value in our array entry.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -