📄 bt_cursor.c
字号:
/*- * See the file LICENSE for redistribution information. * * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */#include "db_config.h"#ifndef lintstatic const char revid[] = "$Id: bt_cursor.c,v 11.147 2002/08/13 20:46:07 ubell Exp $";#endif /* not lint */#ifndef NO_SYSTEM_INCLUDES#include <sys/types.h>#include <string.h>#endif#include "db_int.h"#include "dbinc/db_page.h"#include "dbinc/db_shash.h"#include "dbinc/btree.h"#include "dbinc/lock.h"static int __bam_bulk __P((DBC *, DBT *, u_int32_t));static int __bam_c_close __P((DBC *, db_pgno_t, int *));static int __bam_c_del __P((DBC *));static int __bam_c_destroy __P((DBC *));static int __bam_c_first __P((DBC *));static int __bam_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));static int __bam_c_getstack __P((DBC *));static int __bam_c_last __P((DBC *));static int __bam_c_next __P((DBC *, int, int));static int __bam_c_physdel __P((DBC *));static int __bam_c_prev __P((DBC *));static int __bam_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));static int __bam_c_search __P((DBC *, db_pgno_t, const DBT *, u_int32_t, int *));static int __bam_c_writelock __P((DBC *));static int __bam_getboth_finddatum __P((DBC *, DBT *, u_int32_t));static int __bam_getbothc __P((DBC *, DBT *));static int __bam_get_prev __P((DBC *));static int __bam_isopd __P((DBC *, db_pgno_t *));/* * Acquire a new page/lock. If we hold a page/lock, discard the page, and * lock-couple the lock. * * !!! * We have to handle both where we have a lock to lock-couple and where we * don't -- we don't duplicate locks when we duplicate cursors if we are * running in a transaction environment as there's no point if locks are * never discarded. This means that the cursor may or may not hold a lock. * In the case where we are decending the tree we always want to * unlock the held interior page so we use ACQUIRE_COUPLE. */#undef ACQUIRE#define ACQUIRE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ if ((pagep) != NULL) { \ ret = __mpf->put(__mpf, pagep, 0); \ pagep = NULL; \ } else \ ret = 0; \ if ((ret) == 0 && STD_LOCKING(dbc)) \ ret = __db_lget(dbc, LCK_COUPLE, lpgno, mode, 0, &(lock));\ if ((ret) == 0) \ ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep)); \}#undef ACQUIRE_COUPLE#define ACQUIRE_COUPLE(dbc, mode, lpgno, lock, fpgno, pagep, ret) { \ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ if ((pagep) != NULL) { \ ret = __mpf->put(__mpf, pagep, 0); \ pagep = NULL; \ } else \ ret = 0; \ if ((ret) == 0 && STD_LOCKING(dbc)) \ ret = __db_lget(dbc, \ LCK_COUPLE_ALWAYS, lpgno, mode, 0, &(lock)); \ if ((ret) == 0) \ ret = __mpf->get(__mpf, &(fpgno), 0, &(pagep)); \}/* Acquire a new page/lock for a cursor. */#undef ACQUIRE_CUR#define ACQUIRE_CUR(dbc, mode, p, ret) { \ BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ ACQUIRE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \ if ((ret) == 0) { \ __cp->pgno = p; \ __cp->lock_mode = (mode); \ } \}/* * Acquire a new page/lock for a cursor and release the previous. * This is typically used when decending a tree and we do not * want to hold the interior nodes locked. */#undef ACQUIRE_CUR_COUPLE#define ACQUIRE_CUR_COUPLE(dbc, mode, p, ret) { \ BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ ACQUIRE_COUPLE(dbc, mode, p, __cp->lock, p, __cp->page, ret); \ if ((ret) == 0) { \ __cp->pgno = p; \ __cp->lock_mode = (mode); \ } \}/* * Acquire a write lock if we don't already have one. * * !!! * See ACQUIRE macro on why we handle cursors that don't have locks. */#undef ACQUIRE_WRITE_LOCK#define ACQUIRE_WRITE_LOCK(dbc, ret) { \ BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ ret = 0; \ if (STD_LOCKING(dbc) && \ __cp->lock_mode != DB_LOCK_WRITE && \ ((ret) = __db_lget(dbc, \ LOCK_ISSET(__cp->lock) ? LCK_COUPLE : 0, \ __cp->pgno, DB_LOCK_WRITE, 0, &__cp->lock)) == 0) \ __cp->lock_mode = DB_LOCK_WRITE; \}/* Discard the current page/lock. */#undef DISCARD#define DISCARD(dbc, ldiscard, lock, pagep, ret) { \ DB_MPOOLFILE *__mpf = (dbc)->dbp->mpf; \ int __t_ret; \ if ((pagep) != NULL) { \ ret = __mpf->put(__mpf, pagep, 0); \ pagep = NULL; \ } else \ ret = 0; \ if (ldiscard) \ __t_ret = __LPUT((dbc), lock); \ else \ __t_ret = __TLPUT((dbc), lock); \ if (__t_ret != 0 && (ret) == 0) \ ret = __t_ret; \}/* Discard the current page/lock for a cursor. */#undef DISCARD_CUR#define DISCARD_CUR(dbc, ret) { \ BTREE_CURSOR *__cp = (BTREE_CURSOR *)(dbc)->internal; \ DISCARD(dbc, 0, __cp->lock, __cp->page, ret); \ if ((ret) == 0) \ __cp->lock_mode = DB_LOCK_NG; \}/* If on-page item is a deleted record. */#undef IS_DELETED#define IS_DELETED(dbp, page, indx) \ B_DISSET(GET_BKEYDATA(dbp, page, \ (indx) + (TYPE(page) == P_LBTREE ? O_INDX : 0))->type)#undef IS_CUR_DELETED#define IS_CUR_DELETED(dbc) \ IS_DELETED((dbc)->dbp, (dbc)->internal->page, (dbc)->internal->indx)/* * Test to see if two cursors could point to duplicates of the same key. * In the case of off-page duplicates they are they same, as the cursors * will be in the same off-page duplicate tree. In the case of on-page * duplicates, the key index offsets must be the same. For the last test, * as the original cursor may not have a valid page pointer, we use the * current cursor's. */#undef IS_DUPLICATE#define IS_DUPLICATE(dbc, i1, i2) \ (P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i1] == \ P_INP((dbc)->dbp,((PAGE *)(dbc)->internal->page))[i2])#undef IS_CUR_DUPLICATE#define IS_CUR_DUPLICATE(dbc, orig_pgno, orig_indx) \ (F_ISSET(dbc, DBC_OPD) || \ (orig_pgno == (dbc)->internal->pgno && \ IS_DUPLICATE(dbc, (dbc)->internal->indx, orig_indx)))/* * __bam_c_init -- * Initialize the access private portion of a cursor * * PUBLIC: int __bam_c_init __P((DBC *, DBTYPE)); */int__bam_c_init(dbc, dbtype) DBC *dbc; DBTYPE dbtype;{ DB_ENV *dbenv; int ret; dbenv = dbc->dbp->dbenv; /* Allocate/initialize the internal structure. */ if (dbc->internal == NULL && (ret = __os_malloc(dbenv, sizeof(BTREE_CURSOR), &dbc->internal)) != 0) return (ret); /* Initialize methods. */ dbc->c_close = __db_c_close; dbc->c_count = __db_c_count; dbc->c_del = __db_c_del; dbc->c_dup = __db_c_dup; dbc->c_get = dbc->c_real_get = __db_c_get; dbc->c_pget = __db_c_pget; dbc->c_put = __db_c_put; if (dbtype == DB_BTREE) { dbc->c_am_bulk = __bam_bulk; dbc->c_am_close = __bam_c_close; dbc->c_am_del = __bam_c_del; dbc->c_am_destroy = __bam_c_destroy; dbc->c_am_get = __bam_c_get; dbc->c_am_put = __bam_c_put; dbc->c_am_writelock = __bam_c_writelock; } else { dbc->c_am_bulk = __bam_bulk; dbc->c_am_close = __bam_c_close; dbc->c_am_del = __ram_c_del; dbc->c_am_destroy = __bam_c_destroy; dbc->c_am_get = __ram_c_get; dbc->c_am_put = __ram_c_put; dbc->c_am_writelock = __bam_c_writelock; } return (0);}/* * __bam_c_refresh * Set things up properly for cursor re-use. * * PUBLIC: int __bam_c_refresh __P((DBC *)); */int__bam_c_refresh(dbc) DBC *dbc;{ BTREE *t; BTREE_CURSOR *cp; DB *dbp; dbp = dbc->dbp; t = dbp->bt_internal; cp = (BTREE_CURSOR *)dbc->internal; /* * If our caller set the root page number, it's because the root was * known. This is always the case for off page dup cursors. Else, * pull it out of our internal information. */ if (cp->root == PGNO_INVALID) cp->root = t->bt_root; LOCK_INIT(cp->lock); cp->lock_mode = DB_LOCK_NG; cp->sp = cp->csp = cp->stack; cp->esp = cp->stack + sizeof(cp->stack) / sizeof(cp->stack[0]); /* * The btree leaf page data structures require that two key/data pairs * (or four items) fit on a page, but other than that there's no fixed * requirement. The btree off-page duplicates only require two items, * to be exact, but requiring four for them as well seems reasonable. * * Recno uses the btree bt_ovflsize value -- it's close enough. */ cp->ovflsize = B_MINKEY_TO_OVFLSIZE( dbp, F_ISSET(dbc, DBC_OPD) ? 2 : t->bt_minkey, dbp->pgsize); cp->recno = RECNO_OOB; cp->order = INVALID_ORDER; cp->flags = 0; /* Initialize for record numbers. */ if (F_ISSET(dbc, DBC_OPD) || dbc->dbtype == DB_RECNO || F_ISSET(dbp, DB_AM_RECNUM)) { F_SET(cp, C_RECNUM); /* * All btrees that support record numbers, optionally standard * recno trees, and all off-page duplicate recno trees have * mutable record numbers. */ if ((F_ISSET(dbc, DBC_OPD) && dbc->dbtype == DB_RECNO) || F_ISSET(dbp, DB_AM_RECNUM | DB_AM_RENUMBER)) F_SET(cp, C_RENUMBER); } return (0);}/* * __bam_c_close -- * Close down the cursor. */static int__bam_c_close(dbc, root_pgno, rmroot) DBC *dbc; db_pgno_t root_pgno; int *rmroot;{ BTREE_CURSOR *cp, *cp_opd, *cp_c; DB *dbp; DBC *dbc_opd, *dbc_c; DB_MPOOLFILE *mpf; PAGE *h; int cdb_lock, ret, t_ret; dbp = dbc->dbp; mpf = dbp->mpf; cp = (BTREE_CURSOR *)dbc->internal; cp_opd = (dbc_opd = cp->opd) == NULL ? NULL : (BTREE_CURSOR *)dbc_opd->internal; cdb_lock = ret = 0; /* * There are 3 ways this function is called: * * 1. Closing a primary cursor: we get called with a pointer to a * primary cursor that has a NULL opd field. This happens when * closing a btree/recno database cursor without an associated * off-page duplicate tree. * * 2. Closing a primary and an off-page duplicate cursor stack: we * get called with a pointer to the primary cursor which has a * non-NULL opd field. This happens when closing a btree cursor * into database with an associated off-page btree/recno duplicate * tree. (It can't be a primary recno database, recno databases * don't support duplicates.) * * 3. Closing an off-page duplicate cursor stack: we get called with * a pointer to the off-page duplicate cursor. This happens when * closing a non-btree database that has an associated off-page * btree/recno duplicate tree or for a btree database when the * opd tree is not empty (root_pgno == PGNO_INVALID). * * If either the primary or off-page duplicate cursor deleted a btree * key/data pair, check to see if the item is still referenced by a * different cursor. If it is, confirm that cursor's delete flag is * set and leave it to that cursor to do the delete. * * NB: The test for == 0 below is correct. Our caller already removed * our cursor argument from the active queue, we won't find it when we * search the queue in __bam_ca_delete(). * NB: It can't be true that both the primary and off-page duplicate * cursors have deleted a btree key/data pair. Either the primary * cursor may have deleted an item and there's no off-page duplicate * cursor, or there's an off-page duplicate cursor and it may have * deleted an item. * * Primary recno databases aren't an issue here. Recno keys are either * deleted immediately or never deleted, and do not have to be handled * here. * * Off-page duplicate recno databases are an issue here, cases #2 and * #3 above can both be off-page recno databases. The problem is the * same as the final problem for off-page duplicate btree databases. * If we no longer need the off-page duplicate tree, we want to remove * it. For off-page duplicate btrees, we are done with the tree when * we delete the last item it contains, i.e., there can be no further * references to it when it's empty. For off-page duplicate recnos, * we remove items from the tree as the application calls the remove * function, so we are done with the tree when we close the last cursor * that references it. * * We optionally take the root page number from our caller. If the * primary database is a btree, we can get it ourselves because dbc * is the primary cursor. If the primary database is not a btree, * the problem is that we may be dealing with a stack of pages. The * cursor we're using to do the delete points at the bottom of that * stack and we need the top of the stack. */ if (F_ISSET(cp, C_DELETED)) { dbc_c = dbc; switch (dbc->dbtype) { case DB_BTREE: /* Case #1, #3. */ if (__bam_ca_delete(dbp, cp->pgno, cp->indx, 1) == 0) goto lock; goto done; case DB_RECNO: if (!F_ISSET(dbc, DBC_OPD)) /* Case #1. */ goto done; /* Case #3. */ if (__ram_ca_delete(dbp, cp->root) == 0) goto lock; goto done; default: return (__db_unknown_type(dbp->dbenv, "__bam_c_close", dbc->dbtype)); } } if (dbc_opd == NULL) goto done; if (F_ISSET(cp_opd, C_DELETED)) { /* Case #2. */ /* * We will not have been provided a root page number. Acquire * one from the primary database. */ if ((ret = mpf->get(mpf, &cp->pgno, 0, &h)) != 0) goto err; root_pgno = GET_BOVERFLOW(dbp, h, cp->indx + O_INDX)->pgno; if ((ret = mpf->put(mpf, h, 0)) != 0) goto err; dbc_c = dbc_opd; switch (dbc_opd->dbtype) { case DB_BTREE: if (__bam_ca_delete( dbp, cp_opd->pgno, cp_opd->indx, 1) == 0) goto lock; goto done; case DB_RECNO: if (__ram_ca_delete(dbp, cp_opd->root) == 0) goto lock; goto done; default: return (__db_unknown_type(dbp->dbenv, "__bam_c_close", dbc->dbtype)); } } goto done;lock: cp_c = (BTREE_CURSOR *)dbc_c->internal; /* * If this is CDB, upgrade the lock if necessary. While we acquired * the write lock to logically delete the record, we released it when * we returned from that call, and so may not be holding a write lock * at the moment. NB: to get here in CDB we must either be holding a * write lock or be the only cursor that is permitted to acquire write * locks. The reason is that there can never be more than a single CDB * write cursor (that cursor cannot be dup'd), and so that cursor must * be closed and the item therefore deleted before any other cursor * could acquire a reference to this item. * * Note that dbc may be an off-page dup cursor; this is the sole * instance in which an OPD cursor does any locking, but it's necessary * because we may be closed by ourselves without a parent cursor * handy, and we have to do a lock upgrade on behalf of somebody. * If this is the case, the OPD has been given the parent's locking * info in __db_c_get--the OPD is also a WRITEDUP. */ if (CDB_LOCKING(dbp->dbenv)) { if (F_ISSET(dbc, DBC_WRITEDUP | DBC_WRITECURSOR)) { if ((ret = dbp->dbenv->lock_get( dbp->dbenv, dbc->locker, DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE, &dbc->mylock)) != 0) goto err; cdb_lock = 1; } if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0) goto err; goto delete; } /* * The variable dbc_c has been initialized to reference the cursor in * which we're going to do the delete. Initialize the cursor's page * and lock structures as necessary. * * First, we may not need to acquire any locks. If we're in case #3, * that is, the primary database isn't a btree database, our caller * is responsible for acquiring any necessary locks before calling us. */ if (F_ISSET(dbc, DBC_OPD)) { if ((ret = mpf->get(mpf, &cp_c->pgno, 0, &cp_c->page)) != 0) goto err; goto delete; } /* * Otherwise, acquire a write lock. If the cursor that did the initial * logical deletion (and which had a write lock) is not the same as the * cursor doing the physical deletion (which may have only ever had a * read lock on the item), we need to upgrade. The confusion comes as * follows: * * C1 created, acquires item read lock * C2 dup C1, create C2, also has item read lock. * C1 acquire write lock, delete item * C1 close * C2 close, needs a write lock to physically delete item. * * If we're in a TXN, we know that C2 will be able to acquire the write * lock, because no locker other than the one shared by C1 and C2 can * acquire a write lock -- the original write lock C1 acquire was never * discarded. * * If we're not in a TXN, it's nastier. Other cursors might acquire * read locks on the item after C1 closed, discarding its write lock, * and such locks would prevent C2 from acquiring a read lock. That's * OK, though, we'll simply wait until we can acquire a read lock, or * we'll deadlock. (Which better not happen, since we're not in a TXN.)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -