📄 btree.c
字号:
/* This routine should never be called on an overfull page. The
** following asserts verify that constraint. */
assert( Addr(pCell) > Addr(pPage) );
assert( Addr(pCell) < Addr(pPage) + eDb_USABLE_SIZE );
n = cellSize(pBt, pCell);
pCell->h.iNext = SWAB16(pBt, (u16)(pc + n));
memcpy(&newPage[pc], pCell, n);
pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
pc += n;
}
assert( pPage->nFree==eDb_USABLE_SIZE-pc );
memcpy(pPage->u.aDisk, newPage, pc);
if( pPage->nCell>0 ){
pPage->apCell[pPage->nCell-1]->h.iNext = 0;
}
pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
pFBlk->iSize = SWAB16(pBt, (u16)(eDb_USABLE_SIZE - pc));
pFBlk->iNext = 0;
pPage->u.hdr.firstFree = SWAB16(pBt, pc);
memset(&pFBlk[1], 0, eDb_USABLE_SIZE - pc - sizeof(FreeBlk));
}
/*
** Allocate nByte bytes of space on a page. nByte must be a
** multiple of 4.
**
** Return the index into pPage->u.aDisk[] of the first byte of
** the new allocation. Or return 0 if there is not enough free
** space on the page to satisfy the allocation request.
**
** If the page contains nBytes of free space but does not contain
** nBytes of contiguous free space, then this routine automatically
** calls defragementPage() to consolidate all free space before
** allocating the new chunk.
*/
static int allocateSpace(Btree *pBt, MemPage *pPage, int nByte){
FreeBlk *p;
u16 *pIdx;
int start;
int iSize;
#ifndef NDEBUG
int cnt = 0;
#endif
assert( eDbpager_iswriteable(pPage) );
assert( nByte==ROUNDUP(nByte) );
assert( pPage->isInit );
if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
pIdx = &pPage->u.hdr.firstFree;
p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
while( (iSize = SWAB16(pBt, p->iSize))<nByte ){
assert( cnt++ < eDb_USABLE_SIZE/4 );
if( p->iNext==0 ){
defragmentPage(pBt, pPage);
pIdx = &pPage->u.hdr.firstFree;
}else{
pIdx = &p->iNext;
}
p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
}
if( iSize==nByte ){
start = SWAB16(pBt, *pIdx);
*pIdx = p->iNext;
}else{
FreeBlk *pNew;
start = SWAB16(pBt, *pIdx);
pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
pNew->iNext = p->iNext;
pNew->iSize = SWAB16(pBt, (u16)(iSize - nByte));
*pIdx = SWAB16(pBt, (u16)(start + nByte));
}
pPage->nFree -= nByte;
return start;
}
/*
** Return a section of the MemPage.u.aDisk[] to the freelist.
** The first byte of the new free block is pPage->u.aDisk[start]
** and the size of the block is "size" bytes. Size must be
** a multiple of 4.
**
** Most of the effort here is involved in coalesing adjacent
** free blocks into a single big free block.
*/
static void freeSpace(Btree *pBt, MemPage *pPage, int start, int size){
int end = start + size;
u16 *pIdx, idx;
FreeBlk *pFBlk;
FreeBlk *pNew;
FreeBlk *pNext;
int iSize;
assert( eDbpager_iswriteable(pPage) );
assert( size == ROUNDUP(size) );
assert( start == ROUNDUP(start) );
assert( pPage->isInit );
pIdx = &pPage->u.hdr.firstFree;
idx = SWAB16(pBt, *pIdx);
while( idx!=0 && idx<start ){
pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
iSize = SWAB16(pBt, pFBlk->iSize);
if( idx + iSize == start ){
pFBlk->iSize = SWAB16(pBt, (u16)(iSize + size));
if( idx + iSize + size == SWAB16(pBt, pFBlk->iNext) ){
pNext = (FreeBlk*)&pPage->u.aDisk[idx + iSize + size];
if( pBt->needSwab ){
pFBlk->iSize = swab16((u16)(swab16(pNext->iSize) + iSize+size));
}else{
pFBlk->iSize += pNext->iSize;
}
pFBlk->iNext = pNext->iNext;
}
pPage->nFree += size;
return;
}
pIdx = &pFBlk->iNext;
idx = SWAB16(pBt, *pIdx);
}
pNew = (FreeBlk*)&pPage->u.aDisk[start];
if( idx != end ){
pNew->iSize = SWAB16(pBt, size);
pNew->iNext = SWAB16(pBt, idx);
}else{
pNext = (FreeBlk*)&pPage->u.aDisk[idx];
pNew->iSize = SWAB16(pBt, (u16)(size + SWAB16(pBt, pNext->iSize)));
pNew->iNext = pNext->iNext;
}
*pIdx = SWAB16(pBt, start);
pPage->nFree += size;
}
/*
** Initialize the auxiliary information for a disk block.
**
** The pParent parameter must be a pointer to the MemPage which
** is the parent of the page being initialized. The root of the
** BTree (usually page 2) has no parent and so for that page,
** pParent==NULL.
**
** Return eDb_OK on success. If we see that the page does
** not contain a well-formed database page, then return
** eDb_CORRUPT. Note that a return of eDb_OK does not
** guarantee that the page is well-formed. It only shows that
** we failed to detect any corruption.
*/
static int initPage(Bt *pBt, MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
int idx; /* An index into pPage->u.aDisk[] */
Cell *pCell; /* A pointer to a Cell in pPage->u.aDisk[] */
FreeBlk *pFBlk; /* A pointer to a free block in pPage->u.aDisk[] */
int sz; /* The size of a Cell in bytes */
int freeSpace; /* Amount of free space on the page */
if( pPage->pParent ){
assert( pPage->pParent==pParent );
return eDb_OK;
}
if( pParent ){
pPage->pParent = pParent;
eDbpager_ref(pParent);
}
if( pPage->isInit ) return eDb_OK;
pPage->isInit = 1;
pPage->nCell = 0;
freeSpace = USABLE_SPACE;
idx = SWAB16(pBt, pPage->u.hdr.firstCell);
while( idx!=0 ){
if( idx>eDb_USABLE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
if( idx<sizeof(PageHdr) ) goto page_format_error;
if( idx!=ROUNDUP(idx) ) goto page_format_error;
pCell = (Cell*)&pPage->u.aDisk[idx];
sz = cellSize(pBt, pCell);
if( idx+sz > eDb_USABLE_SIZE ) goto page_format_error;
freeSpace -= sz;
pPage->apCell[pPage->nCell++] = pCell;
idx = SWAB16(pBt, pCell->h.iNext);
}
pPage->nFree = 0;
idx = SWAB16(pBt, pPage->u.hdr.firstFree);
while( idx!=0 ){
int iNext;
if( idx>eDb_USABLE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
if( idx<sizeof(PageHdr) ) goto page_format_error;
pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
pPage->nFree += SWAB16(pBt, pFBlk->iSize);
iNext = SWAB16(pBt, pFBlk->iNext);
if( iNext>0 && iNext <= idx ) goto page_format_error;
idx = iNext;
}
if( pPage->nCell==0 && pPage->nFree==0 ){
/* As a special case, an uninitialized root page appears to be
** an empty database */
return eDb_OK;
}
if( pPage->nFree!=freeSpace ) goto page_format_error;
return eDb_OK;
page_format_error:
return eDb_CORRUPT;
}
/*
** Set up a raw page so that it looks like a database page holding
** no entries.
*/
static void zeroPage(Btree *pBt, MemPage *pPage){
PageHdr *pHdr;
FreeBlk *pFBlk;
assert( eDbpager_iswriteable(pPage) );
memset(pPage, 0, eDb_USABLE_SIZE);
pHdr = &pPage->u.hdr;
pHdr->firstCell = 0;
pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr));
pFBlk = (FreeBlk*)&pHdr[1];
pFBlk->iNext = 0;
pPage->nFree = eDb_USABLE_SIZE - sizeof(*pHdr);
pFBlk->iSize = SWAB16(pBt, pPage->nFree);
pPage->nCell = 0;
pPage->isOverfull = 0;
}
/*
** This routine is called when the reference count for a page
** reaches zero. We need to unref the pParent pointer when that
** happens.
*/
static void pageDestructor(void *pData){
MemPage *pPage = (MemPage*)pData;
if( pPage->pParent ){
MemPage *pParent = pPage->pParent;
pPage->pParent = 0;
eDbpager_unref(pParent);
}
}
/*
** Open a new database.
**
** Actually, this routine just sets up the internal data structures
** for accessing the database. We do not open the database file
** until the first page is loaded.
**
** zFilename is the name of the database file. If zFilename is NULL
** a new database with a random name is created. This randomly named
** database file will be deleted when eDbBtreeClose() is called.
*/
int eDbBtreeOpen(
const char *zFilename, /* Name of the file containing the BTree database */
int omitJournal, /* if TRUE then do not journal this file */
int nCache, /* How many pages in the page cache */
Btree **ppBtree /* Pointer to new Btree object written here */
){
Btree *pBt;
int rc;
/*
** The following asserts make sure that structures used by the btree are
** the right size. This is to guard against size changes that result
** when compiling on a different architecture.
*/
assert( sizeof(u32)==4 );
assert( sizeof(u16)==2 );
assert( sizeof(Pgno)==4 );
assert( sizeof(PageHdr)==8 );
assert( sizeof(CellHdr)==12 );
assert( sizeof(FreeBlk)==4 );
assert( sizeof(OverflowPage)==eDb_USABLE_SIZE );
assert( sizeof(FreelistInfo)==OVERFLOW_SIZE );
assert( sizeof(ptr)==sizeof(char*) );
assert( sizeof(uptr)==sizeof(ptr) );
pBt = eDbMalloc( sizeof(*pBt) );
if( pBt==0 ){
*ppBtree = 0;
return eDb_NOMEM;
}
if( nCache<10 ) nCache = 10;
rc = eDbpager_open(&pBt->pPager, zFilename, nCache, EXTRA_SIZE,
!omitJournal);
if( rc!=eDb_OK ){
if( pBt->pPager ) eDbpager_close(pBt->pPager);
eDbFree(pBt);
*ppBtree = 0;
return rc;
}
eDbpager_set_destructor(pBt->pPager, pageDestructor);
pBt->pCursor = 0;
pBt->page1 = 0;
pBt->readOnly = eDbpager_isreadonly(pBt->pPager);
pBt->pOps = &eDbBtreeOps;
*ppBtree = pBt;
return eDb_OK;
}
/*
** Close an open database and invalidate all cursors.
*/
static int fileBtreeClose(Btree *pBt){
while( pBt->pCursor ){
fileBtreeCloseCursor(pBt->pCursor);
}
eDbpager_close(pBt->pPager);
eDbFree(pBt);
return eDb_OK;
}
/*
** Change the limit on the number of pages allowed in the cache.
**
** The maximum number of cache pages is set to the absolute
** value of mxPage. If mxPage is negative, the pager will
** operate asynchronously - it will not stop to do fsync()s
** to insure data is written to the disk surface before
** continuing. Transactions still work if synchronous is off,
** and the database cannot be corrupted if this program
** crashes. But if the operating system crashes or there is
** an abrupt power failure when synchronous is off, the database
** could be left in an inconsistent and unrecoverable state.
** Synchronous is on by default so database corruption is not
** normally a worry.
*/
static int fileBtreeSetCacheSize(Btree *pBt, int mxPage){
eDbpager_set_cachesize(pBt->pPager, mxPage);
return eDb_OK;
}
/*
** Change the way data is synced to disk in order to increase or decrease
** how well the database resists damage due to OS crashes and power
** failures. Level 1 is the same as asynchronous (no syncs() occur and
** there is a high probability of damage) Level 2 is the default. There
** is a very low but non-zero probability of damage. Level 3 reduces the
** probability of damage to near zero but with a write performance reduction.
*/
static int fileBtreeSetSafetyLevel(Btree *pBt, int level){
eDbpager_set_safety_level(pBt->pPager, level);
return eDb_OK;
}
/*
** Get a reference to page1 of the database file. This will
** also acquire a readlock on that file.
**
** eDb_OK is returned on success. If the file is not a
** well-formed database file, then eDb_CORRUPT is returned.
** eDb_BUSY is returned if the database is locked. eDb_NOMEM
** is returned if we run out of memory. eDb_PROTOCOL is returned
** if there is a locking protocol violation.
*/
static int lockBtree(Btree *pBt){
int rc;
if( pBt->page1 ) return eDb_OK;
rc = eDbpager_get(pBt->pPager, 1, (void**)&pBt->page1);
if( rc!=eDb_OK ) return rc;
/* Do some checking to help insure the file we opened really is
** a valid database file.
*/
if( eDbpager_pagecount(pBt->pPager)>0 ){
PageOne *pP1 = pBt->page1;
if( strcmp(pP1->zMagic,zMagicHeader)!=0 ||
(pP1->iMagic!=MAGIC && swab32(pP1->iMagic)!=MAGIC) ){
rc = eDb_NOTADB;
goto page1_init_failed;
}
pBt->needSwab = pP1->iMagic!=MAGIC;
}
return rc;
page1_init_failed:
eDbpager_unref(pBt->page1);
pBt->page1 = 0;
return rc;
}
/*
** If there are no outstanding cursors and we are not in the middle
** of a transaction but there is a read lock on the database, then
** this routine unrefs the first page of the database file which
** has the effect of releasing the read lock.
**
** If there are any outstanding cursors, this routine is a no-op.
**
** If there is a transaction in progress, this routine is a no-op.
*/
static void unlockBtreeIfUnused(Btree *pBt){
if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){
eDbpager_unref(pBt->page1);
pBt->page1 = 0;
pBt->inTrans = 0;
pBt->inCkpt = 0;
}
}
/*
** Create a new database by initializing the first two pages of the
** file.
*/
static int newDatabase(Btree *pBt){
MemPage *pRoot;
PageOne *pP1;
int rc;
if( eDbpager_pagecount(pBt->pPager)>1 ) return eDb_OK;
pP1 = pBt->page1;
rc = eDbpager_write(pBt->page1);
if( rc ) return rc;
rc = eDbpager_get(pBt->pPager, 2, (void**)&pRoot);
if( rc ) return rc;
rc = eDbpager_write(pRoot);
if( rc ){
eDbpager_unref(pRoot);
return rc;
}
strcpy(pP1->zMagic, zMagicHeader);
if( btree_native_byte_order ){
pP1->iMagic = MAGIC;
pBt->needSwab = 0;
}else{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -