📄 pager.c
字号:
rc = read32bits(pPager->jfd, pDbSize);
if( rc ) return rc;
/* Update the assumed sector-size to match the value used by
** the process that created this journal. If this journal was
** created by a process other than this one, then this routine
** is being called from within pager_playback(). The local value
** of Pager.sectorSize is restored at the end of that routine.
*/
rc = read32bits(pPager->jfd, (u32 *)&pPager->sectorSize);
if( rc ) return rc;
pPager->journalOff += JOURNAL_HDR_SZ(pPager);
rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
return rc;
}
/*
** Write the supplied master journal name into the journal file for pager
** pPager at the current location. The master journal name must be the last
** thing written to a journal file. If the pager is in full-sync mode, the
** journal file descriptor is advanced to the next sector boundary before
** anything is written. The format is:
**
** + 4 bytes: PAGER_MJ_PGNO.
** + N bytes: length of master journal name.
** + 4 bytes: N
** + 4 bytes: Master journal name checksum.
** + 8 bytes: aJournalMagic[].
**
** The master journal page checksum is the sum of the bytes in the master
** journal name.
**
** If zMaster is a NULL pointer (occurs for a single database transaction),
** this call is a no-op.
*/
static int writeMasterJournal(Pager *pPager, const char *zMaster){
int rc;
int len;
int i;
u32 cksum = 0;
char zBuf[sizeof(aJournalMagic)+2*4];
if( !zMaster || pPager->setMaster) return SQLITE_OK;
pPager->setMaster = 1;
len = strlen(zMaster);
for(i=0; i<len; i++){
cksum += zMaster[i];
}
/* If in full-sync mode, advance to the next disk sector before writing
** the master journal name. This is in case the previous page written to
** the journal has already been synced.
*/
if( pPager->fullSync ){
rc = seekJournalHdr(pPager);
if( rc!=SQLITE_OK ) return rc;
}
pPager->journalOff += (len+20);
rc = write32bits(pPager->jfd, PAGER_MJ_PGNO(pPager));
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsWrite(pPager->jfd, zMaster, len);
if( rc!=SQLITE_OK ) return rc;
put32bits(zBuf, len);
put32bits(&zBuf[4], cksum);
memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic));
pPager->needSync = !pPager->noSync;
return rc;
}
/*
** Add or remove a page from the list of all pages that are in the
** statement journal.
**
** The Pager keeps a separate list of pages that are currently in
** the statement journal. This helps the sqlite3pager_stmt_commit()
** routine run MUCH faster for the common case where there are many
** pages in memory but only a few are in the statement journal.
*/
static void page_add_to_stmt_list(PgHdr *pPg){
Pager *pPager = pPg->pPager;
if( pPg->inStmt ) return;
assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 );
pPg->pPrevStmt = 0;
if( pPager->pStmt ){
pPager->pStmt->pPrevStmt = pPg;
}
pPg->pNextStmt = pPager->pStmt;
pPager->pStmt = pPg;
pPg->inStmt = 1;
}
static void page_remove_from_stmt_list(PgHdr *pPg){
if( !pPg->inStmt ) return;
if( pPg->pPrevStmt ){
assert( pPg->pPrevStmt->pNextStmt==pPg );
pPg->pPrevStmt->pNextStmt = pPg->pNextStmt;
}else{
assert( pPg->pPager->pStmt==pPg );
pPg->pPager->pStmt = pPg->pNextStmt;
}
if( pPg->pNextStmt ){
assert( pPg->pNextStmt->pPrevStmt==pPg );
pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt;
}
pPg->pNextStmt = 0;
pPg->pPrevStmt = 0;
pPg->inStmt = 0;
}
/*
** Find a page in the hash table given its page number. Return
** a pointer to the page or NULL if not found.
*/
static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
PgHdr *p;
if( pPager->aHash==0 ) return 0;
p = pPager->aHash[pgno & (pPager->nHash-1)];
while( p && p->pgno!=pgno ){
p = p->pNextHash;
}
return p;
}
/*
** Unlock the database and clear the in-memory cache. This routine
** sets the state of the pager back to what it was when it was first
** opened. Any outstanding pages are invalidated and subsequent attempts
** to access those pages will likely result in a coredump.
*/
static void pager_reset(Pager *pPager){
PgHdr *pPg, *pNext;
if( pPager->errCode ) return;
for(pPg=pPager->pAll; pPg; pPg=pNext){
pNext = pPg->pNextAll;
sqliteFree(pPg);
}
pPager->pFirst = 0;
pPager->pFirstSynced = 0;
pPager->pLast = 0;
pPager->pAll = 0;
pPager->nHash = 0;
sqliteFree(pPager->aHash);
pPager->nPage = 0;
pPager->aHash = 0;
if( pPager->state>=PAGER_RESERVED ){
sqlite3pager_rollback(pPager);
}
sqlite3OsUnlock(pPager->fd, NO_LOCK);
pPager->state = PAGER_UNLOCK;
pPager->dbSize = -1;
pPager->nRef = 0;
assert( pPager->journalOpen==0 );
}
/*
** When this routine is called, the pager has the journal file open and
** a RESERVED or EXCLUSIVE lock on the database. This routine releases
** the database lock and acquires a SHARED lock in its place. The journal
** file is deleted and closed.
**
** TODO: Consider keeping the journal file open for temporary databases.
** This might give a performance improvement on windows where opening
** a file is an expensive operation.
*/
static int pager_unwritelock(Pager *pPager){
PgHdr *pPg;
int rc;
assert( !MEMDB );
if( pPager->state<PAGER_RESERVED ){
return SQLITE_OK;
}
sqlite3pager_stmt_commit(pPager);
if( pPager->stmtOpen ){
sqlite3OsClose(&pPager->stfd);
pPager->stmtOpen = 0;
}
if( pPager->journalOpen ){
sqlite3OsClose(&pPager->jfd);
pPager->journalOpen = 0;
sqlite3OsDelete(pPager->zJournal);
sqliteFree( pPager->aInJournal );
pPager->aInJournal = 0;
for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
pPg->inJournal = 0;
pPg->dirty = 0;
pPg->needSync = 0;
#ifdef SQLITE_CHECK_PAGES
pPg->pageHash = pager_pagehash(pPg);
#endif
}
pPager->pDirty = 0;
pPager->dirtyCache = 0;
pPager->nRec = 0;
}else{
assert( pPager->aInJournal==0 );
assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
}
rc = sqlite3OsUnlock(pPager->fd, SHARED_LOCK);
pPager->state = PAGER_SHARED;
pPager->origDbSize = 0;
pPager->setMaster = 0;
pPager->needSync = 0;
pPager->pFirstSynced = pPager->pFirst;
return rc;
}
/*
** Compute and return a checksum for the page of data.
**
** This is not a real checksum. It is really just the sum of the
** random initial value and the page number. We experimented with
** a checksum of the entire data, but that was found to be too slow.
**
** Note that the page number is stored at the beginning of data and
** the checksum is stored at the end. This is important. If journal
** corruption occurs due to a power failure, the most likely scenario
** is that one end or the other of the record will be changed. It is
** much less likely that the two ends of the journal record will be
** correct and the middle be corrupt. Thus, this "checksum" scheme,
** though fast and simple, catches the mostly likely kind of corruption.
**
** FIX ME: Consider adding every 200th (or so) byte of the data to the
** checksum. That way if a single page spans 3 or more disk sectors and
** only the middle sector is corrupt, we will still have a reasonable
** chance of failing the checksum and thus detecting the problem.
*/
static u32 pager_cksum(Pager *pPager, const u8 *aData){
u32 cksum = pPager->cksumInit;
int i = pPager->pageSize-200;
while( i>0 ){
cksum += aData[i];
i -= 200;
}
return cksum;
}
/* Forward declaration */
static void makeClean(PgHdr*);
/*
** Read a single page from the journal file opened on file descriptor
** jfd. Playback this one page.
**
** If useCksum==0 it means this journal does not use checksums. Checksums
** are not used in statement journals because statement journals do not
** need to survive power failures.
*/
static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
int rc;
PgHdr *pPg; /* An existing page in the cache */
Pgno pgno; /* The page number of a page in journal */
u32 cksum; /* Checksum used for sanity checking */
u8 aData[SQLITE_MAX_PAGE_SIZE]; /* Temp storage for a page */
/* useCksum should be true for the main journal and false for
** statement journals. Verify that this is always the case
*/
assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
rc = read32bits(jfd, &pgno);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
if( rc!=SQLITE_OK ) return rc;
pPager->journalOff += pPager->pageSize + 4;
/* Sanity checking on the page. This is more important that I originally
** thought. If a power failure occurs while the journal is being written,
** it could cause invalid data to be written into the journal. We need to
** detect this invalid data (with high probability) and ignore it.
*/
if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
return SQLITE_DONE;
}
if( pgno>(unsigned)pPager->dbSize ){
return SQLITE_OK;
}
if( useCksum ){
rc = read32bits(jfd, &cksum);
if( rc ) return rc;
pPager->journalOff += 4;
if( pager_cksum(pPager, aData)!=cksum ){
return SQLITE_DONE;
}
}
assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
/* If the pager is in RESERVED state, then there must be a copy of this
** page in the pager cache. In this case just update the pager cache,
** not the database file. The page is left marked dirty in this case.
**
** If in EXCLUSIVE state, then we update the pager cache if it exists
** and the main file. The page is then marked not dirty.
**
** Ticket #1171: The statement journal might contain page content that is
** different from the page content at the start of the transaction.
** This occurs when a page is changed prior to the start of a statement
** then changed again within the statement. When rolling back such a
** statement we must not write to the original database unless we know
** for certain that original page contents are in the main rollback
** journal. Otherwise, if a full ROLLBACK occurs after the statement
** rollback the full ROLLBACK will not restore the page to its original
** content. Two conditions must be met before writing to the database
** files. (1) the database must be locked. (2) we know that the original
** page content is in the main journal either because the page is not in
** cache or else it is marked as needSync==0.
*/
pPg = pager_lookup(pPager, pgno);
assert( pPager->state>=PAGER_EXCLUSIVE || pPg!=0 );
TRACE3("PLAYBACK %d page %d\n", PAGERID(pPager), pgno);
if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
if( rc==SQLITE_OK ){
rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize);
}
if( pPg ){
makeClean(pPg);
}
}
if( pPg ){
/* No page should ever be explicitly rolled back that is in use, except
** for page 1 which is held in use in order to keep the lock on the
** database active. However such a page may be rolled back as a result
** of an internal error resulting in an automatic call to
** sqlite3pager_rollback().
*/
void *pData;
/* assert( pPg->nRef==0 || pPg->pgno==1 ); */
pData = PGHDR_TO_DATA(pPg);
memcpy(pData, aData, pPager->pageSize);
if( pPager->xDestructor ){ /*** FIX ME: Should this be xReinit? ***/
pPager->xDestructor(pData, pPager->pageSize);
}
#ifdef SQLITE_CHECK_PAGES
pPg->pageHash = pager_pagehash(pPg);
#endif
CODEC1(pPager, pData, pPg->pgno, 3);
}
return rc;
}
/*
** Parameter zMaster is the name of a master journal file. A single journal
** file that referred to the master journal file has just been rolled back.
** This routine checks if it is possible to delete the master journal file,
** and does so if it is.
**
** The master journal file contains the names of all child journals.
** To tell if a master journal can be deleted, check to each of the
** children. If all children are either missing or do not refer to
** a different master journal, then this master journal can be deleted.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -