📄 nbtree.h
字号:
/*------------------------------------------------------------------------- * * nbtree.h * header file for postgres btree access method implementation. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.87 2005/10/15 02:49:42 momjian Exp $ * *------------------------------------------------------------------------- */#ifndef NBTREE_H#define NBTREE_H#include "access/itup.h"#include "access/relscan.h"#include "access/sdir.h"#include "access/xlogutils.h"/* * BTPageOpaqueData -- At the end of every page, we store a pointer * to both siblings in the tree. This is used to do forward/backward * index scans. The next-page link is also critical for recovery when * a search has navigated to the wrong page due to concurrent page splits * or deletions; see src/backend/access/nbtree/README for more info. * * In addition, we store the page's btree level (counting upwards from * zero at a leaf page) as well as some flag bits indicating the page type * and status. If the page is deleted, we replace the level with the * next-transaction-ID value indicating when it is safe to reclaim the page. * * NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested * instead. */typedef struct BTPageOpaqueData{ BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ union { uint32 level; /* tree level --- zero for leaf pages */ TransactionId xact; /* next transaction ID, if deleted */ } btpo; uint16 btpo_flags; /* flag bits, see below */} BTPageOpaqueData;typedef BTPageOpaqueData *BTPageOpaque;/* Bits defined in btpo_flags */#define BTP_LEAF (1 << 0) /* leaf page, i.e. not internal page */#define BTP_ROOT (1 << 1) /* root page (has no parent) */#define BTP_DELETED (1 << 2) /* page has been deleted from tree */#define BTP_META (1 << 3) /* meta-page */#define BTP_HALF_DEAD (1 << 4) /* empty, but still in tree *//* * The Meta page is always the first page in the btree index. * Its primary purpose is to point to the location of the btree root page. * We also point to the "fast" root, which is the current effective root; * see README for discussion. */typedef struct BTMetaPageData{ uint32 btm_magic; /* should contain BTREE_MAGIC */ uint32 btm_version; /* should contain BTREE_VERSION */ BlockNumber btm_root; /* current root location */ uint32 btm_level; /* tree level of the root page */ BlockNumber btm_fastroot; /* current "fast" root location */ uint32 btm_fastlevel; /* tree level of the "fast" root page */} BTMetaPageData;#define BTPageGetMeta(p) \ ((BTMetaPageData *) PageGetContents(p))#define BTREE_METAPAGE 0 /* first page is meta */#define BTREE_MAGIC 0x053162 /* magic number of btree pages */#define BTREE_VERSION 2 /* current version number *//* * We actually need to be able to fit three items on every page, * so restrict any one item to 1/3 the per-page available space. */#define BTMaxItemSize(page) \ ((PageGetPageSize(page) - \ sizeof(PageHeaderData) - \ MAXALIGN(sizeof(BTPageOpaqueData))) / 3 - sizeof(ItemIdData))/* * BTItems are what we store in the btree. Each item is an index tuple, * including key and pointer values. (In some cases either the key or the * pointer may go unused, see backend/access/nbtree/README for details.) * * Old comments: * In addition, we must guarantee that all tuples in the index are unique, * in order to satisfy some assumptions in Lehman and Yao. The way that we * do this is by generating a new OID for every insertion that we do in the * tree. This adds eight bytes to the size of btree index tuples. Note * that we do not use the OID as part of a composite key; the OID only * serves as a unique identifier for a given index tuple (logical position * within a page). * * New comments: * actually, we must guarantee that all tuples in A LEVEL * are unique, not in ALL INDEX. So, we can use bti_itup->t_tid * as unique identifier for a given index tuple (logical position * within a level). - vadim 04/09/97 */typedef struct BTItemData{ IndexTupleData bti_itup;} BTItemData;typedef BTItemData *BTItem;#define CopyBTItem(btitem) ((BTItem) CopyIndexTuple((IndexTuple) (btitem)))/* * For XLOG: size without alignment. Sizeof works as long as * IndexTupleData has exactly 8 bytes. */#define SizeOfBTItem sizeof(BTItemData)/* Test whether items are the "same" per the above notes */#define BTTidSame(i1, i2) \ ( (i1).ip_blkid.bi_hi == (i2).ip_blkid.bi_hi && \ (i1).ip_blkid.bi_lo == (i2).ip_blkid.bi_lo && \ (i1).ip_posid == (i2).ip_posid )#define BTItemSame(i1, i2) \ BTTidSame((i1)->bti_itup.t_tid, (i2)->bti_itup.t_tid)/* * In general, the btree code tries to localize its knowledge about * page layout to a couple of routines. However, we need a special * value to indicate "no page number" in those places where we expect * page numbers. We can use zero for this because we never need to * make a pointer to the metadata page. */#define P_NONE 0/* * Macros to test whether a page is leftmost or rightmost on its tree level, * as well as other state info kept in the opaque data. */#define P_LEFTMOST(opaque) ((opaque)->btpo_prev == P_NONE)#define P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE)#define P_ISLEAF(opaque) ((opaque)->btpo_flags & BTP_LEAF)#define P_ISROOT(opaque) ((opaque)->btpo_flags & BTP_ROOT)#define P_ISDELETED(opaque) ((opaque)->btpo_flags & BTP_DELETED)#define P_IGNORE(opaque) ((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD))/* * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost * page. The high key is not a data key, but gives info about what range of * keys is supposed to be on this page. The high key on a page is required * to be greater than or equal to any data key that appears on the page. * If we find ourselves trying to insert a key > high key, we know we need * to move right (this should only happen if the page was split since we * examined the parent page). * * Our insertion algorithm guarantees that we can use the initial least key * on our right sibling as the high key. Once a page is created, its high * key changes only if the page is split. * * On a non-rightmost page, the high key lives in item 1 and data items * start in item 2. Rightmost pages have no high key, so we store data * items beginning in item 1. */#define P_HIKEY ((OffsetNumber) 1)#define P_FIRSTKEY ((OffsetNumber) 2)#define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)/* * XLOG records for btree operations * * XLOG allows to store some information in high 4 bits of log * record xl_info field */#define XLOG_BTREE_INSERT_LEAF 0x00 /* add btitem without split */#define XLOG_BTREE_INSERT_UPPER 0x10 /* same, on a non-leaf page */#define XLOG_BTREE_INSERT_META 0x20 /* same, plus update metapage */#define XLOG_BTREE_SPLIT_L 0x30 /* add btitem with split */#define XLOG_BTREE_SPLIT_R 0x40 /* as above, new item on right */#define XLOG_BTREE_SPLIT_L_ROOT 0x50 /* add btitem with split of root */#define XLOG_BTREE_SPLIT_R_ROOT 0x60 /* as above, new item on right */#define XLOG_BTREE_DELETE 0x70 /* delete leaf btitem */#define XLOG_BTREE_DELETE_PAGE 0x80 /* delete an entire page */#define XLOG_BTREE_DELETE_PAGE_META 0x90 /* same, plus update metapage */#define XLOG_BTREE_NEWROOT 0xA0 /* new root page */#define XLOG_BTREE_NEWMETA 0xB0 /* update metadata page *//* * All that we need to find changed index tuple */typedef struct xl_btreetid{ RelFileNode node; ItemPointerData tid; /* changed tuple id */} xl_btreetid;/* * All that we need to regenerate the meta-data page */typedef struct xl_btree_metadata{ BlockNumber root; uint32 level; BlockNumber fastroot; uint32 fastlevel;} xl_btree_metadata;/* * This is what we need to know about simple (without split) insert. * * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META. * Note that INSERT_META implies it's not a leaf page. */typedef struct xl_btree_insert{ xl_btreetid target; /* inserted tuple id */ /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */ /* BTITEM FOLLOWS AT END OF STRUCT */} xl_btree_insert;#define SizeOfBtreeInsert (offsetof(xl_btreetid, tid) + SizeOfIptrData)/* * On insert with split we save items of both left and right siblings * and restore content of both pages from log record. This way takes less * xlog space than the normal approach, because if we did it standardly, * XLogInsert would almost always think the right page is new and store its * whole page image. * * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record. * The _L and _R variants indicate whether the inserted btitem went into the * left or right split page (and thus, whether otherblk is the right or left * page of the split pair). The _ROOT variants indicate that we are splitting * the root page, and thus that a newroot record rather than an insert or
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -