pager.c

来自「SQLite 2.8.6 源代码,用来在Linux/Unix/Windows上编」· C语言 代码 · 共 2,008 行 · 第 1/5 页

C
2,008
字号
/*** 2001 September 15**** The author disclaims copyright to this source code.  In place of** a legal notice, here is a blessing:****    May you do good and not evil.**    May you find forgiveness for yourself and forgive others.**    May you share freely, never taking more than you give.***************************************************************************** This is the implementation of the page cache subsystem or "pager".** ** The pager is used to access a database disk file.  It implements** atomic commit and rollback through the use of a journal file that** is separate from the database file.  The pager also implements file** locking to prevent two processes from writing the same database** file simultaneously, or one process from reading the database while** another is writing.**** @(#) $Id: pager.c,v 1.87 2003/07/27 18:59:43 drh Exp $*/#include "os.h"         /* Must be first to enable large file support */#include "sqliteInt.h"#include "pager.h"#include <assert.h>#include <string.h>/*** Macros for troubleshooting.  Normally turned off*/#if 0static Pager *mainPager = 0;#define SET_PAGER(X)  if( mainPager==0 ) mainPager = (X)#define CLR_PAGER(X)  if( mainPager==(X) ) mainPager = 0#define TRACE1(X)     if( pPager==mainPager ) fprintf(stderr,X)#define TRACE2(X,Y)   if( pPager==mainPager ) fprintf(stderr,X,Y)#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)#else#define SET_PAGER(X)#define CLR_PAGER(X)#define TRACE1(X)#define TRACE2(X,Y)#define TRACE3(X,Y,Z)#endif/*** The page cache as a whole is always in one of the following** states:****   SQLITE_UNLOCK       The page cache is not currently reading or **                       writing the database file.  There is no**                       data held in memory.  This is the initial**                       state.****   SQLITE_READLOCK     The page cache is reading the database.**                       Writing is not permitted.  There can be**                       multiple readers accessing the same database**                       file at the same time.****   SQLITE_WRITELOCK    The page cache is writing the database.**                       Access is exclusive.  No other processes or**                       threads can be reading or writing while one**                       process is writing.**** The page cache comes up in SQLITE_UNLOCK.  The first time a** sqlite_page_get() occurs, the state transitions to SQLITE_READLOCK.** After all pages have been released using sqlite_page_unref(),** the state transitions back to SQLITE_UNLOCK.  The first time** that sqlite_page_write() is called, the state transitions to** SQLITE_WRITELOCK.  (Note that sqlite_page_write() can only be** called on an outstanding page which means that the pager must** be in SQLITE_READLOCK before it transitions to SQLITE_WRITELOCK.)** The sqlite_page_rollback() and sqlite_page_commit() functions ** transition the state from SQLITE_WRITELOCK back to SQLITE_READLOCK.*/#define SQLITE_UNLOCK      0#define SQLITE_READLOCK    1#define SQLITE_WRITELOCK   2/*** Each in-memory image of a page begins with the following header.** This header is only visible to this pager module.  The client** code that calls pager sees only the data that follows the header.*/typedef struct PgHdr PgHdr;struct PgHdr {  Pager *pPager;                 /* The pager to which this page belongs */  Pgno pgno;                     /* The page number for this page */  PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */  int nRef;                      /* Number of users of this page */  PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */  PgHdr *pNextAll, *pPrevAll;    /* A list of all pages */  PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */  u8 inJournal;                  /* TRUE if has been written to journal */  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */  u8 dirty;                      /* TRUE if we need to write back changes */  u8 needSync;                   /* Sync journal before writing this page */  u8 alwaysRollback;             /* Disable dont_rollback() for this page */  PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */  /* SQLITE_PAGE_SIZE bytes of page data follow this header */  /* Pager.nExtra bytes of local data follow the page data */};/*** Convert a pointer to a PgHdr into a pointer to its data** and back again.*/#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))#define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[SQLITE_PAGE_SIZE])/*** How big to make the hash table used for locating in-memory pages** by page number.*/#define N_PG_HASH 2048/*** Hash a page number*/#define pager_hash(PN)  ((PN)&(N_PG_HASH-1))/*** A open page cache is an instance of the following structure.*/struct Pager {  char *zFilename;            /* Name of the database file */  char *zJournal;             /* Name of the journal file */  char *zDirectory;           /* Directory hold database and journal files */  OsFile fd, jfd;             /* File descriptors for database and journal */  OsFile cpfd;                /* File descriptor for the checkpoint journal */  int dbSize;                 /* Number of pages in the file */  int origDbSize;             /* dbSize before the current change */  int ckptSize;               /* Size of database (in pages) at ckpt_begin() */  off_t ckptJSize;            /* Size of journal at ckpt_begin() */  int nRec;                   /* Number of pages written to the journal */  u32 cksumInit;              /* Quasi-random value added to every checksum */  int ckptNRec;               /* Number of records in the checkpoint journal */  int nExtra;                 /* Add this many bytes to each in-memory page */  void (*xDestructor)(void*); /* Call this routine when freeing pages */  int nPage;                  /* Total number of in-memory pages */  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */  int mxPage;                 /* Maximum number of pages to hold in cache */  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */  u8 journalOpen;             /* True if journal file descriptors is valid */  u8 journalStarted;          /* True if initial magic of journal is synced */  u8 useJournal;              /* Do not use a rollback journal on this file */  u8 ckptOpen;                /* True if the checkpoint journal is open */  u8 ckptInUse;               /* True we are in a checkpoint */  u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/  u8 noSync;                  /* Do not sync the journal if true */  u8 fullSync;                /* Do extra syncs of the journal for robustness */  u8 state;                   /* SQLITE_UNLOCK, _READLOCK or _WRITELOCK */  u8 errMask;                 /* One of several kinds of errors */  u8 tempFile;                /* zFilename is a temporary file */  u8 readOnly;                /* True for a read-only database */  u8 needSync;                /* True if an fsync() is needed on the journal */  u8 dirtyFile;               /* True if database file has changed in any way */  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */  u8 *aInJournal;             /* One bit for each page in the database file */  u8 *aInCkpt;                /* One bit for each page in the database */  PgHdr *pFirst, *pLast;      /* List of free pages */  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */  PgHdr *pAll;                /* List of all pages */  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */};/*** These are bits that can be set in Pager.errMask.*/#define PAGER_ERR_FULL     0x01  /* a write() failed */#define PAGER_ERR_MEM      0x02  /* malloc() failed */#define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */#define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */#define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? *//*** The journal file contains page records in the following** format.**** Actually, this structure is the complete page record for pager** formats less than 3.  Beginning with format 3, this record is surrounded** by two checksums.*/typedef struct PageRecord PageRecord;struct PageRecord {  Pgno pgno;                     /* The page number */  char aData[SQLITE_PAGE_SIZE];  /* Original data for page pgno */};/*** Journal files begin with the following magic string.  The data** was obtained from /dev/random.  It is used only as a sanity check.**** There are three journal formats (so far). The 1st journal format writes** 32-bit integers in the byte-order of the host machine.  New** formats writes integers as big-endian.  All new journals use the** new format, but we have to be able to read an older journal in order** to rollback journals created by older versions of the library.**** The 3rd journal format (added for 2.8.0) adds additional sanity** checking information to the journal.  If the power fails while the** journal is being written, semi-random garbage data might appear in** the journal file after power is restored.  If an attempt is then made** to roll the journal back, the database could be corrupted.  The additional** sanity checking data is an attempt to discover the garbage in the** journal and ignore it.**** The sanity checking information for the 3rd journal format consists** of a 32-bit checksum on each page of data.  The checksum covers both** the page number and the SQLITE_PAGE_SIZE bytes of data for the page.** This cksum is initialized to a 32-bit random value that appears in the** journal file right after the header.  The random initializer is important,** because garbage data that appears at the end of a journal is likely** data that was once in other files that have now been deleted.  If the** garbage data came from an obsolete journal file, the checksums might** be correct.  But by initializing the checksum to random value which** is different for every journal, we minimize that risk.*/static const unsigned char aJournalMagic1[] = {  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,};static const unsigned char aJournalMagic2[] = {  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,};static const unsigned char aJournalMagic3[] = {  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,};#define JOURNAL_FORMAT_1 1#define JOURNAL_FORMAT_2 2#define JOURNAL_FORMAT_3 3/*** The following integer determines what format to use when creating** new primary journal files.  By default we always use format 3.** When testing, we can set this value to older journal formats in order to** make sure that newer versions of the library are able to rollback older** journal files.**** Note that checkpoint journals always use format 2 and omit the header.*/#ifdef SQLITE_TESTint journal_format = 3;#else# define journal_format 3#endif/*** The size of the header and of each page in the journal varies according** to which journal format is being used.  The following macros figure out** the sizes based on format numbers.*/#define JOURNAL_HDR_SZ(X) \   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))#define JOURNAL_PG_SZ(X) \   (SQLITE_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))/*** Enable reference count tracking here:*/#ifdef SQLITE_TEST  int pager_refinfo_enable = 0;  static void pager_refinfo(PgHdr *p){    static int cnt = 0;    if( !pager_refinfo_enable ) return;    printf(       "REFCNT: %4d addr=0x%08x nRef=%d\n",       p->pgno, (int)PGHDR_TO_DATA(p), p->nRef    );    cnt++;   /* Something to set a breakpoint on */  }# define REFINFO(X)  pager_refinfo(X)#else# define REFINFO(X)#endif/*** Read a 32-bit integer from the given file descriptor*/static int read32bits(int format, OsFile *fd, u32 *pRes){  u32 res;  int rc;  rc = sqliteOsRead(fd, &res, sizeof(res));  if( rc==SQLITE_OK && format>JOURNAL_FORMAT_1 ){    unsigned char ac[4];    memcpy(ac, &res, 4);    res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];  }  *pRes = res;  return rc;}/*** Write a 32-bit integer into the given file descriptor.  Writing** is always done using the new journal format.*/static int write32bits(OsFile *fd, u32 val){  unsigned char ac[4];  if( journal_format<=1 ){    return sqliteOsWrite(fd, &val, 4);  }  ac[0] = (val>>24) & 0xff;  ac[1] = (val>>16) & 0xff;  ac[2] = (val>>8) & 0xff;  ac[3] = val & 0xff;  return sqliteOsWrite(fd, ac, 4);}/*** Write a 32-bit integer into a page header right before the** page data.  This will overwrite the PgHdr.pDirty pointer.*/static void store32bits(u32 val, PgHdr *p, int offset){  unsigned char *ac;  ac = &((char*)PGHDR_TO_DATA(p))[offset];  if( journal_format<=1 ){    memcpy(ac, &val, 4);  }else{    ac[0] = (val>>24) & 0xff;    ac[1] = (val>>16) & 0xff;    ac[2] = (val>>8) & 0xff;    ac[3] = val & 0xff;  }}/*** Convert the bits in the pPager->errMask into an approprate** return code.*/static int pager_errcode(Pager *pPager){  int rc = SQLITE_OK;  if( pPager->errMask & PAGER_ERR_LOCK )    rc = SQLITE_PROTOCOL;  if( pPager->errMask & PAGER_ERR_DISK )    rc = SQLITE_IOERR;  if( pPager->errMask & PAGER_ERR_FULL )    rc = SQLITE_FULL;  if( pPager->errMask & PAGER_ERR_MEM )     rc = SQLITE_NOMEM;  if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = SQLITE_CORRUPT;  return rc;}/*** Add or remove a page from the list of all pages that are in the** checkpoint journal.**** The Pager keeps a separate list of pages that are currently in** the checkpoint journal.  This helps the sqlitepager_ckpt_commit()** routine run MUCH faster for the common case where there are many** pages in memory but only a few are in the checkpoint journal.*/static void page_add_to_ckpt_list(PgHdr *pPg){  Pager *pPager = pPg->pPager;  if( pPg->inCkpt ) return;  assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );  pPg->pPrevCkpt = 0;  if( pPager->pCkpt ){    pPager->pCkpt->pPrevCkpt = pPg;  }  pPg->pNextCkpt = pPager->pCkpt;  pPager->pCkpt = pPg;  pPg->inCkpt = 1;}static void page_remove_from_ckpt_list(PgHdr *pPg){  if( !pPg->inCkpt ) return;  if( pPg->pPrevCkpt ){    assert( pPg->pPrevCkpt->pNextCkpt==pPg );    pPg->pPrevCkpt->pNextCkpt = pPg->pNextCkpt;  }else{    assert( pPg->pPager->pCkpt==pPg );    pPg->pPager->pCkpt = pPg->pNextCkpt;  }  if( pPg->pNextCkpt ){    assert( pPg->pNextCkpt->pPrevCkpt==pPg );    pPg->pNextCkpt->pPrevCkpt = pPg->pPrevCkpt;  }  pPg->pNextCkpt = 0;  pPg->pPrevCkpt = 0;  pPg->inCkpt = 0;}/*** Find a page in the hash table given its page number.  Return** a pointer to the page or NULL if not found.*/static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){  PgHdr *p = pPager->aHash[pager_hash(pgno)];  while( p && p->pgno!=pgno ){    p = p->pNextHash;  }  return p;}/*** Unlock the database and clear the in-memory cache.  This routine** sets the state of the pager back to what it was when it was first** opened.  Any outstanding pages are invalidated and subsequent attempts** to access those pages will likely result in a coredump.*/static void pager_reset(Pager *pPager){

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?