📄 pager.c
字号:
**** Note that the page number is stored at the beginning of data and** the checksum is stored at the end. This is important. If journal** corruption occurs due to a power failure, the most likely scenario** is that one end or the other of the record will be changed. It is** much less likely that the two ends of the journal record will be** correct and the middle be corrupt. Thus, this "checksum" scheme,** though fast and simple, catches the mostly likely kind of corruption.**** FIX ME: Consider adding every 200th (or so) byte of the data to the** checksum. That way if a single page spans 3 or more disk sectors and** only the middle sector is corrupt, we will still have a reasonable** chance of failing the checksum and thus detecting the problem.*/static u32 pager_cksum(Pager *pPager, const u8 *aData){ u32 cksum = pPager->cksumInit; int i = pPager->pageSize-200; while( i>0 ){ cksum += aData[i]; i -= 200; } return cksum;}/* Forward declaration */static void makeClean(PgHdr*);/*** Read a single page from the journal file opened on file descriptor** jfd. Playback this one page.**** If useCksum==0 it means this journal does not use checksums. Checksums** are not used in statement journals because statement journals do not** need to survive power failures.*/static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){ int rc; PgHdr *pPg; /* An existing page in the cache */ Pgno pgno; /* The page number of a page in journal */ u32 cksum; /* Checksum used for sanity checking */ u8 *aData = (u8 *)pPager->pTmpSpace; /* Temp storage for a page */ /* useCksum should be true for the main journal and false for ** statement journals. Verify that this is always the case */ assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) ); assert( aData ); rc = read32bits(jfd, &pgno); if( rc!=SQLITE_OK ) return rc; rc = sqlite3OsRead(jfd, aData, pPager->pageSize); if( rc!=SQLITE_OK ) return rc; pPager->journalOff += pPager->pageSize + 4; /* Sanity checking on the page. This is more important that I originally ** thought. If a power failure occurs while the journal is being written, ** it could cause invalid data to be written into the journal. We need to ** detect this invalid data (with high probability) and ignore it. */ if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){ return SQLITE_DONE; } if( pgno>(unsigned)pPager->dbSize ){ return SQLITE_OK; } if( useCksum ){ rc = read32bits(jfd, &cksum); if( rc ) return rc; pPager->journalOff += 4; if( pager_cksum(pPager, aData)!=cksum ){ return SQLITE_DONE; } } assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE ); /* If the pager is in RESERVED state, then there must be a copy of this ** page in the pager cache. In this case just update the pager cache, ** not the database file. The page is left marked dirty in this case. ** ** An exception to the above rule: If the database is in no-sync mode ** and a page is moved during an incremental vacuum then the page may ** not be in the pager cache. Later: if a malloc() or IO error occurs ** during a Movepage() call, then the page may not be in the cache ** either. So the condition described in the above paragraph is not ** assert()able. ** ** If in EXCLUSIVE state, then we update the pager cache if it exists ** and the main file. The page is then marked not dirty. ** ** Ticket #1171: The statement journal might contain page content that is ** different from the page content at the start of the transaction. ** This occurs when a page is changed prior to the start of a statement ** then changed again within the statement. When rolling back such a ** statement we must not write to the original database unless we know ** for certain that original page contents are synced into the main rollback ** journal. Otherwise, a power loss might leave modified data in the ** database file without an entry in the rollback journal that can ** restore the database to its original form. Two conditions must be ** met before writing to the database files. (1) the database must be ** locked. (2) we know that the original page content is fully synced ** in the main journal either because the page is not in cache or else ** the page is marked as needSync==0. */ pPg = pager_lookup(pPager, pgno); PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n", PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData)); if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){ rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize); if( rc==SQLITE_OK ){ rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize); } if( pPg ){ makeClean(pPg); } } if( pPg ){ /* No page should ever be explicitly rolled back that is in use, except ** for page 1 which is held in use in order to keep the lock on the ** database active. However such a page may be rolled back as a result ** of an internal error resulting in an automatic call to ** sqlite3PagerRollback(). */ void *pData; /* assert( pPg->nRef==0 || pPg->pgno==1 ); */ pData = PGHDR_TO_DATA(pPg); memcpy(pData, aData, pPager->pageSize); if( pPager->xReiniter ){ pPager->xReiniter(pPg, pPager->pageSize); }#ifdef SQLITE_CHECK_PAGES pPg->pageHash = pager_pagehash(pPg);#endif /* If this was page 1, then restore the value of Pager.dbFileVers. ** Do this before any decoding. */ if( pgno==1 ){ memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers)); } /* Decode the page just read from disk */ CODEC1(pPager, pData, pPg->pgno, 3); } return rc;}/*** Parameter zMaster is the name of a master journal file. A single journal** file that referred to the master journal file has just been rolled back.** This routine checks if it is possible to delete the master journal file,** and does so if it is.**** The master journal file contains the names of all child journals.** To tell if a master journal can be deleted, check to each of the** children. If all children are either missing or do not refer to** a different master journal, then this master journal can be deleted.*/static int pager_delmaster(const char *zMaster){ int rc; int master_open = 0; OsFile *master = 0; char *zMasterJournal = 0; /* Contents of master journal file */ i64 nMasterJournal; /* Size of master journal file */ /* Open the master journal file exclusively in case some other process ** is running this routine also. Not that it makes too much difference. */ rc = sqlite3OsOpenReadOnly(zMaster, &master); assert( rc!=SQLITE_OK || master ); if( rc!=SQLITE_OK ) goto delmaster_out; master_open = 1; rc = sqlite3OsFileSize(master, &nMasterJournal); if( rc!=SQLITE_OK ) goto delmaster_out; if( nMasterJournal>0 ){ char *zJournal; char *zMasterPtr = 0; /* Load the entire master journal file into space obtained from ** sqliteMalloc() and pointed to by zMasterJournal. */ zMasterJournal = (char *)sqliteMalloc(nMasterJournal); if( !zMasterJournal ){ rc = SQLITE_NOMEM; goto delmaster_out; } rc = sqlite3OsRead(master, zMasterJournal, nMasterJournal); if( rc!=SQLITE_OK ) goto delmaster_out; zJournal = zMasterJournal; while( (zJournal-zMasterJournal)<nMasterJournal ){ if( sqlite3OsFileExists(zJournal) ){ /* One of the journals pointed to by the master journal exists. ** Open it and check if it points at the master journal. If ** so, return without deleting the master journal file. */ OsFile *journal = 0; int c; rc = sqlite3OsOpenReadOnly(zJournal, &journal); assert( rc!=SQLITE_OK || journal ); if( rc!=SQLITE_OK ){ goto delmaster_out; } rc = readMasterJournal(journal, &zMasterPtr); sqlite3OsClose(&journal); if( rc!=SQLITE_OK ){ goto delmaster_out; } c = zMasterPtr!=0 && strcmp(zMasterPtr, zMaster)==0; sqliteFree(zMasterPtr); if( c ){ /* We have a match. Do not delete the master journal file. */ goto delmaster_out; } } zJournal += (strlen(zJournal)+1); } } rc = sqlite3OsDelete(zMaster);delmaster_out: if( zMasterJournal ){ sqliteFree(zMasterJournal); } if( master_open ){ sqlite3OsClose(&master); } return rc;}static void pager_truncate_cache(Pager *pPager);/*** Truncate the main file of the given pager to the number of pages** indicated. Also truncate the cached representation of the file.*/static int pager_truncate(Pager *pPager, int nPage){ int rc = SQLITE_OK; if( pPager->state>=PAGER_EXCLUSIVE ){ rc = sqlite3OsTruncate(pPager->fd, pPager->pageSize*(i64)nPage); } if( rc==SQLITE_OK ){ pPager->dbSize = nPage; pager_truncate_cache(pPager); } return rc;}/*** Set the sectorSize for the given pager.**** The sector size is the larger of the sector size reported** by sqlite3OsSectorSize() and the pageSize.*/static void setSectorSize(Pager *pPager){ pPager->sectorSize = sqlite3OsSectorSize(pPager->fd); if( pPager->sectorSize<pPager->pageSize ){ pPager->sectorSize = pPager->pageSize; }}/*** Playback the journal and thus restore the database file to** the state it was in before we started making changes. **** The journal file format is as follows: **** (1) 8 byte prefix. A copy of aJournalMagic[].** (2) 4 byte big-endian integer which is the number of valid page records** in the journal. If this value is 0xffffffff, then compute the** number of page records from the journal size.** (3) 4 byte big-endian integer which is the initial value for the ** sanity checksum.** (4) 4 byte integer which is the number of pages to truncate the** database to during a rollback.** (5) 4 byte integer which is the number of bytes in the master journal** name. The value may be zero (indicate that there is no master** journal.)** (6) N bytes of the master journal name. The name will be nul-terminated** and might be shorter than the value read from (5). If the first byte** of the name is \000 then there is no master journal. The master** journal name is stored in UTF-8.** (7) Zero or more pages instances, each as follows:** + 4 byte page number.** + pPager->pageSize bytes of data.** + 4 byte checksum**** When we speak of the journal header, we mean the first 6 items above.** Each entry in the journal is an instance of the 7th item.**** Call the value from the second bullet "nRec". nRec is the number of** valid page entries in the journal. In most cases, you can compute the** value of nRec from the size of the journal file. But if a power** failure occurred while the journal was being written, it could be the** case that the size of the journal file had already been increased but** the extra entries had not yet made it safely to disk. In such a case,** the value of nRec computed from the file size would be too large. For** that reason, we always use the nRec value in the header.**** If the nRec value is 0xffffffff it means that nRec should be computed** from the file size. This value is used when the user selects the** no-sync option for the journal. A power failure could lead to corruption** in this case. But for things like temporary table (which will be** deleted when the power is restored) we don't care. **** If the file opened as the journal file is not a well-formed** journal file then all pages up to the first corrupted page are rolled** back (or no pages if the journal header is corrupted). The journal file** is then deleted and SQLITE_OK returned, just as if no corruption had** been encountered.**** If an I/O or malloc() error occurs, the journal-file is not deleted** and an error code is returned.*/static int pager_playback(Pager *pPager, int isHot){ i64 szJ; /* Size of the journal file in bytes */ u32 nRec; /* Number of Records in the journal */ int i; /* Loop counter */ Pgno mxPg = 0; /* Size of the original file in pages */ int rc; /* Result code of a subroutine */ char *zMaster = 0; /* Name of master journal file if any */ /* Figure out how many records are in the journal. Abort early if ** the journal is empty. */ assert( pPager->journalOpen ); rc = sqlite3OsFileSize(pPager->jfd, &szJ); if( rc!=SQLITE_OK || szJ==0 ){ goto end_playback; } /* Read the master journal name from the journal, if it is present. ** If a master journal file name is specified, but the file is not ** present on disk, then the journal is not hot and does not need to be ** played back. */ rc = readMasterJournal(pPager->jfd, &zMaster); assert( rc!=SQLITE_DONE ); if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){ sqliteFree(zMaster); zMaster = 0; if( rc==SQLITE_DONE ) rc = SQLITE_OK; goto end_playback; } sqlite3OsSeek(pPager->jfd, 0); pPager->journalOff = 0; /* This loop terminates either when the readJournalHdr() call returns ** SQLITE_DONE or an IO error occurs. */ while( 1 ){ /* Read the next journal header from the journal file. If there are ** not enough bytes left in the journal file for a complete header, or ** it is corrupted, then a process must of failed while writing it.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -