⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pager.c

📁 在VC6环境下开发
💻 C
📖 第 1 页 / 共 5 页
字号:
/*
** This is the implementation of the page cache subsystem or "pager".
** 
** The pager is used to access a database disk file.  It implements
** atomic commit and rollback through the use of a journal file that
** is separate from the database file.  The pager also implements file
** locking to prevent two processes from writing the same database
** file simultaneously, or one process from reading the database while
** another is writing.
*/
#include "eDbInit.h"
/*
** Macros for troubleshooting.  Normally turned off
*/
#if 0
static Pager *mainPager = 0;
#define SET_PAGER(X)  if( mainPager==0 ) mainPager = (X)
#define CLR_PAGER(X)  if( mainPager==(X) ) mainPager = 0
#define TRACE1(X)     if( pPager==mainPager ) fprintf(stderr,X)
#define TRACE2(X,Y)   if( pPager==mainPager ) fprintf(stderr,X,Y)
#define TRACE3(X,Y,Z) if( pPager==mainPager ) fprintf(stderr,X,Y,Z)
#else
#define SET_PAGER(X)
#define CLR_PAGER(X)
#define TRACE1(X)
#define TRACE2(X,Y)
#define TRACE3(X,Y,Z)
#endif


/*
** The page cache as a whole is always in one of the following
** states:
**
**   eDb_UNLOCK       The page cache is not currently reading or 
**                       writing the database file.  There is no
**                       data held in memory.  This is the initial
**                       state.
**
**   eDb_READLOCK     The page cache is reading the database.
**                       Writing is not permitted.  There can be
**                       multiple readers accessing the same database
**                       file at the same time.
**
**   eDb_WRITELOCK    The page cache is writing the database.
**                       Access is exclusive.  No other processes or
**                       threads can be reading or writing while one
**                       process is writing.
**
** The page cache comes up in eDb_UNLOCK.  The first time a
** eDb_page_get() occurs, the state transitions to eDb_READLOCK.
** After all pages have been released using eDb_page_unref(),
** the state transitions back to eDb_UNLOCK.  The first time
** that eDb_page_write() is called, the state transitions to
** eDb_WRITELOCK.  (Note that eDb_page_write() can only be
** called on an outstanding page which means that the pager must
** be in eDb_READLOCK before it transitions to eDb_WRITELOCK.)
** The eDb_page_rollback() and eDb_page_commit() functions 
** transition the state from eDb_WRITELOCK back to eDb_READLOCK.
*/
#define eDb_UNLOCK      0
#define eDb_READLOCK    1
#define eDb_WRITELOCK   2


/*
** Each in-memory image of a page begins with the following header.
** This header is only visible to this pager module.  The client
** code that calls pager sees only the data that follows the header.
**
** Client code should call eDbpager_write() on a page prior to making
** any modifications to that page.  The first time eDbpager_write()
** is called, the original page contents are written into the rollback
** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
** the journal page has made it onto the disk surface, PgHdr.needSync
** is cleared.  The modified page cannot be written back into the original
** database file until the journal pages has been synced to disk and the
** PgHdr.needSync has been cleared.
**
** The PgHdr.dirty flag is set when eDbpager_write() is called and
** is cleared again when the page content is written back to the original
** database file.
*/
typedef struct PgHdr PgHdr;
struct PgHdr {
  Pager *pPager;                 /* The pager to which this page belongs */
  Pgno pgno;                     /* The page number for this page */
  PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
  int nRef;                      /* Number of users of this page */
  PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
  PgHdr *pNextAll, *pPrevAll;    /* A list of all pages */
  PgHdr *pNextCkpt, *pPrevCkpt;  /* List of pages in the checkpoint journal */
  u8 inJournal;                  /* TRUE if has been written to journal */
  u8 inCkpt;                     /* TRUE if written to the checkpoint journal */
  u8 dirty;                      /* TRUE if we need to write back changes */
  u8 needSync;                   /* Sync journal before writing this page */
  u8 alwaysRollback;             /* Disable dont_rollback() for this page */
  PgHdr *pDirty;                 /* Dirty pages sorted by PgHdr.pgno */
  /* eDb_PAGE_SIZE bytes of page data follow this header */
  /* Pager.nExtra bytes of local data follow the page data */
};


/*
** A macro used for invoking the codec if there is one
*/
#ifdef eDb_HAS_CODEC
# define CODEC(P,D,N,X) if( P->xCodec ){ P->xCodec(P->pCodecArg,D,N,X); }
#else
# define CODEC(P,D,N,X)
#endif

/*
** Convert a pointer to a PgHdr into a pointer to its data
** and back again.
*/
#define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
#define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
#define PGHDR_TO_EXTRA(P) ((void*)&((char*)(&(P)[1]))[eDb_PAGE_SIZE])

/*
** How big to make the hash table used for locating in-memory pages
** by page number.
*/
#define N_PG_HASH MAX_PAGES

/*
** Hash a page number
*/
#define pager_hash(PN)  ((PN)&(N_PG_HASH-1))

/*
** A open page cache is an instance of the following structure.
*/
struct Pager {
  char *zFilename;            /* Name of the database file */
  char *zJournal;             /* Name of the journal file */
  char *zDirectory;           /* Directory hold database and journal files */
  OsFile fd, jfd;             /* File descriptors for database and journal */
  OsFile cpfd;                /* File descriptor for the checkpoint journal */
  int dbSize;                 /* Number of pages in the file */
  int origDbSize;             /* dbSize before the current change */
  int ckptSize;               /* Size of database (in pages) at ckpt_begin() */
  off_t ckptJSize;            /* Size of journal at ckpt_begin() */
  int nRec;                   /* Number of pages written to the journal */
  u32 cksumInit;              /* Quasi-random value added to every checksum */
  int ckptNRec;               /* Number of records in the checkpoint journal */
  int nExtra;                 /* Add this many bytes to each in-memory page */
  void (*xDestructor)(void*); /* Call this routine when freeing pages */
  int nPage;                  /* Total number of in-memory pages */
  int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
  int mxPage;                 /* Maximum number of pages to hold in cache */
  int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
  void (*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  void *pCodecArg;            /* First argument to xCodec() */
  u8 journalOpen;             /* True if journal file descriptors is valid */
  u8 journalStarted;          /* True if header of journal is synced */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 ckptOpen;                /* True if the checkpoint journal is open */
  u8 ckptInUse;               /* True we are in a checkpoint */
  u8 ckptAutoopen;            /* Open ckpt journal when main journal is opened*/
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 state;                   /* eDb_UNLOCK, _READLOCK or _WRITELOCK */
  u8 errMask;                 /* One of several kinds of errors */
  u8 tempFile;                /* zFilename is a temporary file */
  u8 readOnly;                /* True for a read-only database */
  u8 needSync;                /* True if an fsync() is needed on the journal */
  u8 dirtyFile;               /* True if database file has changed in any way */
  u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
  u8 *aInJournal;             /* One bit for each page in the database file */
  u8 *aInCkpt;                /* One bit for each page in the database */
  PgHdr *pFirst, *pLast;      /* List of free pages */
  PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
  PgHdr *pAll;                /* List of all pages */
  PgHdr *pCkpt;               /* List of pages in the checkpoint journal */
  PgHdr *aHash[N_PG_HASH];    /* Hash table to map page number of PgHdr */
};

/*
** These are bits that can be set in Pager.errMask.
*/
#define PAGER_ERR_FULL     0x01  /* a write() failed */
#define PAGER_ERR_MEM      0x02  /* malloc() failed */
#define PAGER_ERR_LOCK     0x04  /* error in the locking protocol */
#define PAGER_ERR_CORRUPT  0x08  /* database or journal corruption */
#define PAGER_ERR_DISK     0x10  /* general disk I/O error - bad hard drive? */

/*
** The journal file contains page records in the following
** format.
**
** Actually, this structure is the complete page record for pager
** formats less than 3.  Beginning with format 3, this record is surrounded
** by two checksums.
*/
typedef struct PageRecord PageRecord;
struct PageRecord {
  Pgno pgno;                      /* The page number */
  char aData[eDb_PAGE_SIZE];   /* Original data for page pgno */
};

/*
** Journal files begin with the following magic string.  The data
** was obtained from /dev/random.  It is used only as a sanity check.
**
** There are three journal formats (so far). The 1st journal format writes
** 32-bit integers in the byte-order of the host machine.  New
** formats writes integers as big-endian.  All new journals use the
** new format, but we have to be able to read an older journal in order
** to rollback journals created by older versions of the library.
**
** The 3rd journal format (added for 2.8.0) adds additional sanity
** checking information to the journal.  If the power fails while the
** journal is being written, semi-random garbage data might appear in
** the journal file after power is restored.  If an attempt is then made
** to roll the journal back, the database could be corrupted.  The additional
** sanity checking data is an attempt to discover the garbage in the
** journal and ignore it.
**
** The sanity checking information for the 3rd journal format consists
** of a 32-bit checksum on each page of data.  The checksum covers both
** the page number and the eDb_PAGE_SIZE bytes of data for the page.
** This cksum is initialized to a 32-bit random value that appears in the
** journal file right after the header.  The random initializer is important,
** because garbage data that appears at the end of a journal is likely
** data that was once in other files that have now been deleted.  If the
** garbage data came from an obsolete journal file, the checksums might
** be correct.  But by initializing the checksum to random value which
** is different for every journal, we minimize that risk.
*/
static const unsigned char aJournalMagic1[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd4,
};
static const unsigned char aJournalMagic2[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd5,
};
static const unsigned char aJournalMagic3[] = {
  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd6,
};
#define JOURNAL_FORMAT_1 1
#define JOURNAL_FORMAT_2 2
#define JOURNAL_FORMAT_3 3

/*
** The following integer determines what format to use when creating
** new primary journal files.  By default we always use format 3.
** When testing, we can set this value to older journal formats in order to
** make sure that newer versions of the library are able to rollback older
** journal files.
**
** Note that checkpoint journals always use format 2 and omit the header.
*/
#ifdef eDb_TEST
int journal_format = 3;
#else
# define journal_format 3
#endif

/*
** The size of the header and of each page in the journal varies according
** to which journal format is being used.  The following macros figure out
** the sizes based on format numbers.
*/
#define JOURNAL_HDR_SZ(X) \
   (sizeof(aJournalMagic1) + sizeof(Pgno) + ((X)>=3)*2*sizeof(u32))
#define JOURNAL_PG_SZ(X) \
   (eDb_PAGE_SIZE + sizeof(Pgno) + ((X)>=3)*sizeof(u32))

/*
** Enable reference count tracking here:
*/
#ifdef eDb_TEST
  int pager_refinfo_enable = 0;
  static void pager_refinfo(PgHdr *p){
    static int cnt = 0;
    if( !pager_refinfo_enable ) return;
    printf(
       "REFCNT: %4d addr=0x%08x nRef=%d\n",
       p->pgno, (int)PGHDR_TO_DATA(p), p->nRef
    );
    cnt++;   /* Something to set a breakpoint on */
  }
# define REFINFO(X)  pager_refinfo(X)
#else
# define REFINFO(X)
#endif

/*
** Read a 32-bit integer from the given file descriptor.  Store the integer
** that is read in *pRes.  Return eDb_OK if everything worked, or an
** error code is something goes wrong.
**
** If the journal format is 2 or 3, read a big-endian integer.  If the
** journal format is 1, read an integer in the native byte-order of the
** host machine.
*/
static int read32bits(int format, OsFile *fd, u32 *pRes){
  u32 res;
  int rc;
  rc = eDbOsRead(fd, &res, sizeof(res));
  if( rc==eDb_OK && format>JOURNAL_FORMAT_1 ){
    unsigned char ac[4];
    memcpy(ac, &res, 4);
    res = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
  }
  *pRes = res;
  return rc;
}

/*
** Write a 32-bit integer into the given file descriptor.  Return eDb_OK
** on success or an error code is something goes wrong.
**
** If the journal format is 2 or 3, write the integer as 4 big-endian
** bytes.  If the journal format is 1, write the integer in the native
** byte order.  In normal operation, only formats 2 and 3 are used.
** Journal format 1 is only used for testing.
*/
static int write32bits(OsFile *fd, u32 val){
  unsigned char ac[4];
  if( journal_format<=1 ){
    return eDbOsWrite(fd, &val, 4);
  }
  ac[0] = (val>>24) & 0xff;
  ac[1] = (val>>16) & 0xff;
  ac[2] = (val>>8) & 0xff;
  ac[3] = val & 0xff;
  return eDbOsWrite(fd, ac, 4);
}

/*
** Write a 32-bit integer into a page header right before the
** page data.  This will overwrite the PgHdr.pDirty pointer.
**
** The integer is big-endian for formats 2 and 3 and native byte order
** for journal format 1.
*/
static void store32bits(u32 val, PgHdr *p, int offset){
  unsigned char *ac;
  ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
  if( journal_format<=1 ){
    memcpy(ac, &val, 4);
  }else{
    ac[0] = (val>>24) & 0xff;
    ac[1] = (val>>16) & 0xff;
    ac[2] = (val>>8) & 0xff;
    ac[3] = val & 0xff;
  }
}


/*
** Convert the bits in the pPager->errMask into an approprate
** return code.
*/
static int pager_errcode(Pager *pPager){
	int rc = eDb_OK;
	if( pPager->errMask & PAGER_ERR_LOCK )    rc = eDb_PROTOCOL;
	if( pPager->errMask & PAGER_ERR_DISK )    rc = eDb_IOERR;
	if( pPager->errMask & PAGER_ERR_FULL )    rc = eDb_FULL;
	if( pPager->errMask & PAGER_ERR_MEM )     rc = eDb_NOMEM;
	if( pPager->errMask & PAGER_ERR_CORRUPT ) rc = eDb_CORRUPT;
	return rc;
}

/*
** Add or remove a page from the list of all pages that are in the
** checkpoint journal.
**
** The Pager keeps a separate list of pages that are currently in
** the checkpoint journal.  This helps the eDbpager_ckpt_commit()
** routine run MUCH faster for the common case where there are many
** pages in memory but only a few are in the checkpoint journal.
*/
static void page_add_to_ckpt_list(PgHdr *pPg){
	Pager *pPager = pPg->pPager;
	if( pPg->inCkpt ) return;
	assert( pPg->pPrevCkpt==0 && pPg->pNextCkpt==0 );
	pPg->pPrevCkpt = 0;
	if( pPager->pCkpt ){
		pPager->pCkpt->pPrevCkpt = pPg;
	}
	pPg->pNextCkpt = pPager->pCkpt;
	pPager->pCkpt = pPg;
	pPg->inCkpt = 1;
}
static void page_remove_from_ckpt_list(PgHdr *pPg){
	if( !pPg->inCkpt ) return;
	if( pPg->pPrevCkpt ){
		assert( pPg->pPrevCkpt->pNextCkpt==pPg );

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -