pager.c
来自「SQLite 2.8.6 源代码,用来在Linux/Unix/Windows上编」· C语言 代码 · 共 2,008 行 · 第 1/5 页
C
2,008 行
/*** 2001 September 15**** The author disclaims copyright to this source code. In place of** a legal notice, here is a blessing:**** May you do good and not evil.** May you find forgiveness for yourself and forgive others.** May you share freely, never taking more than you give.***************************************************************************** This is the implementation of the page cache subsystem or "pager".** ** The pager is used to access a database disk file. It implements** atomic commit and rollback through the use of a journal file that** is separate from the database file. The pager also implements file** locking to prevent two processes from writing the same database** file simultaneously, or one process from reading the database while** another is writing.**** @(#) $Id: pager.c,v 1.87 2003/07/27 18:59:43 drh Exp $*/#include "os.h" /* Must be first to enable large file support */#include "sqliteInt.h"#include "pager.h"#include <assert.h>#include <string.h>/*** Macros for troubleshooting. Normally turned off*/#if 0static Pager *mainPager = 0;#define SET_PAGER(X) if( mainPager==0 ) mainPager = (X)#define CLR_PAGER(X) if( mainPager==(X) ) mainPager = 0#define TRACE1(X) if( pPager==mainPager ) fprintf(stderr,X)#define TRACE2(X,Y) if( pPager==mainPager ) fprintf(stderr,X,Y)#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)#else#define SET_PAGER(X)#define CLR_PAGER(X)#define TRACE1(X)#define TRACE2(X,Y)#define TRACE3(X,Y,Z)#endif/*** The page cache as a whole is always in one of the following** states:**** SQLITE_UNLOCK The page cache is not currently reading or ** writing the database file. There is no** data held in memory. This is the initial** state.**** SQLITE_READLOCK The page cache is reading the database.** Writing is not permitted. There can be** multiple readers accessing the same database** file at the same time.**** SQLITE_WRITELOCK The page cache is writing the database.** Access is exclusive. No other processes or** threads can be reading or writing while one** process is writing.**** The page cache comes up in SQLITE_UNLOCK. The first time a** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.** After all pages have been released using sqlite_page_unref(),** the state transitions back to SQLITE_UNLOCK. The first time** that sqlite_page_write() is called, the state transitions to** SQLITE_WRITELOCK. (Note that sqlite_page_write() can only be** called on an outstanding page which means that the pager must** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)** The sqlite_page_rollback() and sqlite_page_commit() functions ** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.*/#define SQLITE_UNLOCK 0#define SQLITE_READLOCK 1#define SQLITE_WRITELOCK 2/*** Each in-memory image of a page begins with the following header.** This header is only visible to this pager module. The client** code that calls pager sees only the data that follows the header.*/typedef struct PgHdr PgHdr;struct PgHdr { Pager *pPager; /* The pager to which this page belongs */ Pgno pgno; /* The page number for this page */ PgHdr *pNextHash, *pPrevHash; /* Hash collision chain for PgHdr.pgno */ int nRef; /* Number of users of this page */ PgHdr *pNextFree, *pPrevFree; /* Freelist of pages where nRef==0 */ PgHdr *pNextAll, *pPrevAll; /* A list of all pages */ PgHdr *pNextCkpt, *pPrevCkpt; /* List of pages in the checkpoint journal */ u8 inJournal; /* TRUE if has been written to journal */ u8 inCkpt; /* TRUE if written to the checkpoint journal */ u8 dirty; /* TRUE if we need to write back changes */ u8 needSync; /* Sync journal before writing this page */ u8 alwaysRollback; /* Disable dont_rollback() for this page */ PgHdr *pDirty; /* Dirty pages sorted by PgHdr.pgno */ /* SQLITE_PAGE_SIZE bytes of page data follow this header */ /* Pager.nExtra bytes of local data follow the page data */};/*** Convert a pointer to a PgHdr into a pointer to its data** and back again.*/#define PGHDR_TO_DATA(P) ((void*)(&(P)[1]))#define DATA_TO_PGHDR(D) (&((PgHdr*)(D))[-1])#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])/*** How big to make the hash table used for locating in-memory pages** by page number.*/#define N_PG_HASH 2048/*** Hash a page number*/#define pager_hash(PN) ((PN)&(N_PG_HASH-1))/*** A open page cache is an instance of the following structure.*/struct Pager { char *zFilename; /* Name of the database file */ char *zJournal; /* Name of the journal file */ char *zDirectory; /* Directory hold database and journal files */ OsFile fd, jfd; /* File descriptors for database and journal */ OsFile cpfd; /* File descriptor for the checkpoint journal */ int dbSize; /* Number of pages in the file */ int origDbSize; /* dbSize before the current change */ int ckptSize; /* Size of database (in pages) at ckpt_begin() */ off_t ckptJSize; /* Size of journal at ckpt_begin() */ int nRec; /* Number of pages written to the journal */ u32 cksumInit; /* Quasi-random value added to every checksum */ int ckptNRec; /* Number of records in the checkpoint journal */ int nExtra; /* Add this many bytes to each in-memory page */ void (*xDestructor)(void*); /* Call this routine when freeing pages */ int nPage; /* Total number of in-memory pages */ int nRef; /* Number of in-memory pages with PgHdr.nRef>0 */ int mxPage; /* Maximum number of pages to hold in cache */ int nHit, nMiss, nOvfl; /* Cache hits, missing, and LRU overflows */ u8 journalOpen; /* True if journal file descriptors is valid */ u8 journalStarted; /* True if initial magic of journal is synced */ u8 useJournal; /* Do not use a rollback journal on this file */ u8 ckptOpen; /* True if the checkpoint journal is open */ u8 ckptInUse; /* True we are in a checkpoint */ u8 ckptAutoopen; /* Open ckpt journal when main journal is opened*/ u8 noSync; /* Do not sync the journal if true */ u8 fullSync; /* Do extra syncs of the journal for robustness */ u8 state; /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */ u8 errMask; /* One of several kinds of errors */ u8 tempFile; /* zFilename is a temporary file */ u8 readOnly; /* True for a read-only database */ u8 needSync; /* True if an fsync() is needed on the journal */ u8 dirtyFile; /* True if database file has changed in any way */ u8 alwaysRollback; /* Disable dont_rollback() for all pages */ u8 *aInJournal; /* One bit for each page in the database file */ u8 *aInCkpt; /* One bit for each page in the database */ PgHdr *pFirst, *pLast; /* List of free pages */ PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */ PgHdr *pAll; /* List of all pages */ PgHdr *pCkpt; /* List of pages in the checkpoint journal */ PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number of PgHdr */};/*** These are bits that can be set in Pager.errMask.*/#define PAGER_ERR_FULL 0x01 /* a write() failed */#define PAGER_ERR_MEM 0x02 /* malloc() failed */#define PAGER_ERR_LOCK 0x04 /* error in the locking protocol */#define PAGER_ERR_CORRUPT 0x08 /* database or journal corruption */#define PAGER_ERR_DISK 0x10 /* general disk I/O error - bad hard drive? *//*** The journal file contains page records in the following** format.**** Actually, this structure is the complete page record for pager** formats less than 3. Beginning with format 3, this record is surrounded** by two checksums.*/typedef struct PageRecord PageRecord;struct PageRecord { Pgno pgno; /* The page number */ char aData[SQLITE_PAGE_SIZE]; /* Original data for page pgno */};/*** Journal files begin with the following magic string. The data** was obtained from /dev/random. It is used only as a sanity check.**** There are three journal formats (so far). The 1st journal format writes** 32-bit integers in the byte-order of the host machine. New** formats writes integers as big-endian. All new journals use the** new format, but we have to be able to read an older journal in order** to rollback journals created by older versions of the library.**** The 3rd journal format (added for 2.8.0) adds additional sanity** checking information to the journal. If the power fails while the** journal is being written, semi-random garbage data might appear in** the journal file after power is restored. If an attempt is then made** to roll the journal back, the database could be corrupted. The additional** sanity checking data is an attempt to discover the garbage in the** journal and ignore it.**** The sanity checking information for the 3rd journal format consists** of a 32-bit checksum on each page of data. The checksum covers both** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.** This cksum is initialized to a 32-bit random value that appears in the** journal file right after the header. The random initializer is important,** because garbage data that appears at the end of a journal is likely** data that was once in other files that have now been deleted. If the** garbage data came from an obsolete journal file, the checksums might** be correct. But by initializing the checksum to random value which** is different for every journal, we minimize that risk.*/static const unsigned char aJournalMagic1[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,};static const unsigned char aJournalMagic2[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,};static const unsigned char aJournalMagic3[] = { 0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,};#define JOURNAL_FORMAT_1 1#define JOURNAL_FORMAT_2 2#define JOURNAL_FORMAT_3 3/*** The following integer determines what format to use when creating** new primary journal files. By default we always use format 3.** When testing, we can set this value to older journal formats in order to** make sure that newer versions of the library are able to rollback older** journal files.**** Note that checkpoint journals always use format 2 and omit the header.*/#ifdef SQLITE_TESTint journal_format = 3;#else# define journal_format 3#endif/*** The size of the header and of each page in the journal varies according** to which journal format is being used. The following macros figure out** the sizes based on format numbers.*/#define JOURNAL_HDR_SZ(X) \ (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))#define JOURNAL_PG_SZ(X) \ (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))/*** Enable reference count tracking here:*/#ifdef SQLITE_TEST int pager_refinfo_enable = 0; static void pager_refinfo(PgHdr *p){ static int cnt = 0; if( !pager_refinfo_enable ) return; printf( "REFCNT: %4d addr=0x%08x nRef=%d\n", p->pgno, (int)PGHDR_TO_DATA(p), p->nRef ); cnt++; /* Something to set a breakpoint on */ }# define REFINFO(X) pager_refinfo(X)#else# define REFINFO(X)#endif/*** Read a 32-bit integer from the given file descriptor*/static int read32bits(int format, OsFile *fd, u32 *pRes){ u32 res; int rc; rc = sqliteOsRead(fd, &res, sizeof(res)); if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){ unsigned char ac[4]; memcpy(ac, &res, 4); res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3]; } *pRes = res; return rc;}/*** Write a 32-bit integer into the given file descriptor. Writing** is always done using the new journal format.*/static int write32bits(OsFile *fd, u32 val){ unsigned char ac[4]; if( journal_format<=1 ){ return sqliteOsWrite(fd, &val, 4); } ac[0] = (val>>24) & 0xff; ac[1] = (val>>16) & 0xff; ac[2] = (val>>8) & 0xff; ac[3] = val & 0xff; return sqliteOsWrite(fd, ac, 4);}/*** Write a 32-bit integer into a page header right before the** page data. This will overwrite the PgHdr.pDirty pointer.*/static void store32bits(u32 val, PgHdr *p, int offset){ unsigned char *ac; ac = &((char*)PGHDR_TO_DATA(p))[offset]; if( journal_format<=1 ){ memcpy(ac, &val, 4); }else{ ac[0] = (val>>24) & 0xff; ac[1] = (val>>16) & 0xff; ac[2] = (val>>8) & 0xff; ac[3] = val & 0xff; }}/*** Convert the bits in the pPager->errMask into an approprate** return code.*/static int pager_errcode(Pager *pPager){ int rc = SQLITE_OK; if( pPager->errMask & PAGER_ERR_LOCK ) rc = SQLITE_PROTOCOL; if( pPager->errMask & PAGER_ERR_DISK ) rc = SQLITE_IOERR; if( pPager->errMask & PAGER_ERR_FULL ) rc = SQLITE_FULL; if( pPager->errMask & PAGER_ERR_MEM ) rc = SQLITE_NOMEM; if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT; return rc;}/*** Add or remove a page from the list of all pages that are in the** checkpoint journal.**** The Pager keeps a separate list of pages that are currently in** the checkpoint journal. This helps the sqlitepager_ckpt_commit()** routine run MUCH faster for the common case where there are many** pages in memory but only a few are in the checkpoint journal.*/static void page_add_to_ckpt_list(PgHdr *pPg){ Pager *pPager = pPg->pPager; if( pPg->inCkpt ) return; assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 ); pPg->pPrevCkpt = 0; if( pPager->pCkpt ){ pPager->pCkpt->pPrevCkpt = pPg; } pPg->pNextCkpt = pPager->pCkpt; pPager->pCkpt = pPg; pPg->inCkpt = 1;}static void page_remove_from_ckpt_list(PgHdr *pPg){ if( !pPg->inCkpt ) return; if( pPg->pPrevCkpt ){ assert( pPg->pPrevCkpt->pNextCkpt==pPg ); pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt; }else{ assert( pPg->pPager->pCkpt==pPg ); pPg->pPager->pCkpt = pPg->pNextCkpt; } if( pPg->pNextCkpt ){ assert( pPg->pNextCkpt->pPrevCkpt==pPg ); pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt; } pPg->pNextCkpt = 0; pPg->pPrevCkpt = 0; pPg->inCkpt = 0;}/*** Find a page in the hash table given its page number. Return** a pointer to the page or NULL if not found.*/static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){ PgHdr *p = pPager->aHash[pager_hash(pgno)]; while( p && p->pgno!=pgno ){ p = p->pNextHash; } return p;}/*** Unlock the database and clear the in-memory cache. This routine** sets the state of the pager back to what it was when it was first** opened. Any outstanding pages are invalidated and subsequent attempts** to access those pages will likely result in a coredump.*/static void pager_reset(Pager *pPager){
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?