📄 pager.c
字号:
/*** 2001 September 15**** The author disclaims copyright to this source code. In place of** a legal notice, here is a blessing:**** May you do good and not evil.** May you find forgiveness for yourself and forgive others.** May you share freely, never taking more than you give.***************************************************************************** This is the implementation of the page cache subsystem or "pager".** ** The pager is used to access a database disk file. It implements** atomic commit and rollback through the use of a journal file that** is separate from the database file. The pager also implements file** locking to prevent two processes from writing the same database** file simultaneously, or one process from reading the database while** another is writing.**** @(#) $Id: pager.c,v 1.485 2008/08/28 02:26:07 drh Exp $*/#ifndef SQLITE_OMIT_DISKIO#include "sqliteInt.h"/*** Macros for troubleshooting. Normally turned off*/#if 0#define sqlite3DebugPrintf printf#define PAGERTRACE1(X) sqlite3DebugPrintf(X)#define PAGERTRACE2(X,Y) sqlite3DebugPrintf(X,Y)#define PAGERTRACE3(X,Y,Z) sqlite3DebugPrintf(X,Y,Z)#define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)#define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)#else#define PAGERTRACE1(X)#define PAGERTRACE2(X,Y)#define PAGERTRACE3(X,Y,Z)#define PAGERTRACE4(X,Y,Z,W)#define PAGERTRACE5(X,Y,Z,W,V)#endif/*** The following two macros are used within the PAGERTRACEX() macros above** to print out file-descriptors. **** PAGERID() takes a pointer to a Pager struct as its argument. The** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file** struct as its argument.*/#define PAGERID(p) ((int)(p->fd))#define FILEHANDLEID(fd) ((int)fd)/*** The page cache as a whole is always in one of the following** states:**** PAGER_UNLOCK The page cache is not currently reading or ** writing the database file. There is no** data held in memory. This is the initial** state.**** PAGER_SHARED The page cache is reading the database.** Writing is not permitted. There can be** multiple readers accessing the same database** file at the same time.**** PAGER_RESERVED This process has reserved the database for writing** but has not yet made any changes. Only one process** at a time can reserve the database. The original** database file has not been modified so other** processes may still be reading the on-disk** database file.**** PAGER_EXCLUSIVE The page cache is writing the database.** Access is exclusive. No other processes or** threads can be reading or writing while one** process is writing.**** PAGER_SYNCED The pager moves to this state from PAGER_EXCLUSIVE** after all dirty pages have been written to the** database file and the file has been synced to** disk. All that remains to do is to remove or** truncate the journal file and the transaction ** will be committed.**** The page cache comes up in PAGER_UNLOCK. The first time a** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.** After all pages have been released using sqlite_page_unref(),** the state transitions back to PAGER_UNLOCK. The first time** that sqlite3PagerWrite() is called, the state transitions to** PAGER_RESERVED. (Note that sqlite3PagerWrite() can only be** called on an outstanding page which means that the pager must** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)** PAGER_RESERVED means that there is an open rollback journal.** The transition to PAGER_EXCLUSIVE occurs before any changes** are made to the database file, though writes to the rollback** journal occurs with just PAGER_RESERVED. After an sqlite3PagerRollback()** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.*/#define PAGER_UNLOCK 0#define PAGER_SHARED 1 /* same as SHARED_LOCK */#define PAGER_RESERVED 2 /* same as RESERVED_LOCK */#define PAGER_EXCLUSIVE 4 /* same as EXCLUSIVE_LOCK */#define PAGER_SYNCED 5/*** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,** then failed attempts to get a reserved lock will invoke the busy callback.** This is off by default. To see why, consider the following scenario:** ** Suppose thread A already has a shared lock and wants a reserved lock.** Thread B already has a reserved lock and wants an exclusive lock. If** both threads are using their busy callbacks, it might be a long time** be for one of the threads give up and allows the other to proceed.** But if the thread trying to get the reserved lock gives up quickly** (if it never invokes its busy callback) then the contention will be** resolved quickly.*/#ifndef SQLITE_BUSY_RESERVED_LOCK# define SQLITE_BUSY_RESERVED_LOCK 0#endif/*** This macro rounds values up so that if the value is an address it** is guaranteed to be an address that is aligned to an 8-byte boundary.*/#define FORCE_ALIGNMENT(X) (((X)+7)&~7)/*** A macro used for invoking the codec if there is one*/#ifdef SQLITE_HAS_CODEC# define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }# define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))#else# define CODEC1(P,D,N,X) /* NO-OP */# define CODEC2(P,D,N,X) ((char*)D)#endif/*** A open page cache is an instance of the following structure.**** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or** or SQLITE_FULL. Once one of the first three errors occurs, it persists** and is returned as the result of every major pager API call. The** SQLITE_FULL return code is slightly different. It persists only until the** next successful rollback is performed on the pager cache. Also,** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()** APIs, they may still be used successfully.*/struct Pager { sqlite3_vfs *pVfs; /* OS functions to use for IO */ u8 journalOpen; /* True if journal file descriptors is valid */ u8 journalStarted; /* True if header of journal is synced */ u8 useJournal; /* Use a rollback journal on this file */ u8 noReadlock; /* Do not bother to obtain readlocks */ u8 stmtOpen; /* True if the statement subjournal is open */ u8 stmtInUse; /* True we are in a statement subtransaction */ u8 stmtAutoopen; /* Open stmt journal when main journal is opened*/ u8 noSync; /* Do not sync the journal if true */ u8 fullSync; /* Do extra syncs of the journal for robustness */ u8 sync_flags; /* One of SYNC_NORMAL or SYNC_FULL */ u8 state; /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */ u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ u8 needSync; /* True if an fsync() is needed on the journal */ u8 dirtyCache; /* True if cached pages have changed */ u8 alwaysRollback; /* Disable DontRollback() for all pages */ u8 memDb; /* True to inhibit all file I/O */ u8 setMaster; /* True if a m-j name has been written to jrnl */ u8 doNotSync; /* Boolean. While true, do not spill the cache */ u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ u8 journalMode; /* On of the PAGER_JOURNALMODE_* values */ u8 dbModified; /* True if there are any changes to the Db */ u8 changeCountDone; /* Set after incrementing the change-counter */ u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */ int errCode; /* One of several kinds of errors */ int dbSize; /* Number of pages in the file */ int origDbSize; /* dbSize before the current change */ int stmtSize; /* Size of database (in pages) at stmt_begin() */ int nRec; /* Number of pages written to the journal */ u32 cksumInit; /* Quasi-random value added to every checksum */ int stmtNRec; /* Number of records in stmt subjournal */ int nExtra; /* Add this many bytes to each in-memory page */ int pageSize; /* Number of bytes in a page */ int nPage; /* Total number of in-memory pages */ int mxPage; /* Maximum number of pages to hold in cache */ Pgno mxPgno; /* Maximum allowed size of the database */ Bitvec *pInJournal; /* One bit for each page in the database file */ Bitvec *pInStmt; /* One bit for each page in the database */ Bitvec *pAlwaysRollback; /* One bit for each page marked always-rollback */ char *zFilename; /* Name of the database file */ char *zJournal; /* Name of the journal file */ char *zDirectory; /* Directory hold database and journal files */ sqlite3_file *fd, *jfd; /* File descriptors for database and journal */ sqlite3_file *stfd; /* File descriptor for the statement subjournal*/ BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */ i64 journalOff; /* Current byte offset in the journal file */ i64 journalHdr; /* Byte offset to previous journal header */ i64 stmtHdrOff; /* First journal header written this statement */ i64 stmtCksum; /* cksumInit when statement was started */ i64 stmtJSize; /* Size of journal at stmt_begin() */ int sectorSize; /* Assumed sector size during rollback */#ifdef SQLITE_TEST int nHit, nMiss; /* Cache hits and missing */ int nRead, nWrite; /* Database pages read/written */#endif void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */ void (*xReiniter)(DbPage*,int); /* Call this routine when reloading pages */#ifdef SQLITE_HAS_CODEC void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */ void *pCodecArg; /* First argument to xCodec() */#endif char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ char dbFileVers[16]; /* Changes whenever database file changes */ i64 journalSizeLimit; /* Size limit for persistent journal files */ PCache *pPCache; /* Pointer to page cache object */};/*** The following global variables hold counters used for** testing purposes only. These variables do not exist in** a non-testing build. These variables are not thread-safe.*/#ifdef SQLITE_TESTint sqlite3_pager_readdb_count = 0; /* Number of full pages read from DB */int sqlite3_pager_writedb_count = 0; /* Number of full pages written to DB */int sqlite3_pager_writej_count = 0; /* Number of pages written to journal */# define PAGER_INCR(v) v++#else# define PAGER_INCR(v)#endif/*** Journal files begin with the following magic string. The data** was obtained from /dev/random. It is used only as a sanity check.**** Since version 2.8.0, the journal format contains additional sanity** checking information. If the power fails while the journal is begin** written, semi-random garbage data might appear in the journal** file after power is restored. If an attempt is then made** to roll the journal back, the database could be corrupted. The additional** sanity checking data is an attempt to discover the garbage in the** journal and ignore it.**** The sanity checking information for the new journal format consists** of a 32-bit checksum on each page of data. The checksum covers both** the page number and the pPager->pageSize bytes of data for the page.** This cksum is initialized to a 32-bit random value that appears in the** journal file right after the header. The random initializer is important,** because garbage data that appears at the end of a journal is likely** data that was once in other files that have now been deleted. If the** garbage data came from an obsolete journal file, the checksums might** be correct. But by initializing the checksum to random value which** is different for every journal, we minimize that risk.*/static const unsigned char aJournalMagic[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,};/*** The size of the header and of each page in the journal is determined** by the following macros.*/#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)/*** The journal header size for this pager. In the future, this could be** set to some value read from the disk controller. The important** characteristic is that it is the same size as a disk sector.*/#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)/*** The macro MEMDB is true if we are dealing with an in-memory database.** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,** the value of MEMDB will be a constant and the compiler will optimize** out code that would never execute.*/#ifdef SQLITE_OMIT_MEMORYDB# define MEMDB 0#else# define MEMDB pPager->memDb#endif/*** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is** reserved for working around a windows/posix incompatibility). It is** used in the journal to signify that the remainder of the journal file ** is devoted to storing a master journal name - there are no more pages to** roll back. See comments for function writeMasterJournal() for details.*//* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */#define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)/*** The maximum legal page number is (2^31 - 1).*/#define PAGER_MAX_PGNO 2147483647/*** Return true if page *pPg has already been written to the statement** journal (or statement snapshot has been created, if *pPg is part** of an in-memory database).*/static int pageInStatement(PgHdr *pPg){ Pager *pPager = pPg->pPager; if( MEMDB ){ return pPg->apSave[1]!=0; }else{ return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno); }}/*** Read a 32-bit integer from the given file descriptor. Store the integer** that is read in *pRes. Return SQLITE_OK if everything worked, or an** error code is something goes wrong.**** All values are stored on disk as big-endian.*/static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){ unsigned char ac[4]; int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset); if( rc==SQLITE_OK ){ *pRes = sqlite3Get4byte(ac); } return rc;}/*** Write a 32-bit integer into a string buffer in big-endian byte order.*/#define put32bits(A,B) sqlite3Put4byte((u8*)A,B)/*** Write a 32-bit integer into the given file descriptor. Return SQLITE_OK** on success or an error code is something goes wrong.*/static int write32bits(sqlite3_file *fd, i64 offset, u32 val){ char ac[4]; put32bits(ac, val); return sqlite3OsWrite(fd, ac, 4, offset);}/*** If file pFd is open, call sqlite3OsUnlock() on it.*/static int osUnlock(sqlite3_file *pFd, int eLock){ if( !pFd->pMethods ){ return SQLITE_OK; } return sqlite3OsUnlock(pFd, eLock);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -